#!/usr/bin/env python2.7 # -*- coding: UTF-8 -*- from __future__ import print_function from __future__ import unicode_literals import re import regex import sys import os if not (("PYTHONIOENCODING" in os.environ) and re.search("^utf-?8$", os.environ["PYTHONIOENCODING"], re.I)): sys.stderr.write(sys.argv[0] + ": Please set your PYTHONIOENCODING envariable to utf8\n") sys.exit(1) import unicodedata if unicodedata.unidata_version < "6.0.0": print("WARNING: Your old UCD is out of date, expected 6.0.0 but got", unicodedata.unidata_version) wide_enough = (sys.maxunicode > 65536) if not wide_enough: print("WARNING: Narrow build detected, your Python lacks full Unicode support!!") data_rows = [ [ "\N{COMBINING GREEK YPOGEGRAMMENI}", "\N{GREEK CAPITAL LETTER IOTA}" ], [ "\N{MICRO SIGN}", "\N{GREEK CAPITAL LETTER MU}" ], [ "\N{LATIN SMALL LETTER S}", "\N{LATIN SMALL LETTER LONG S}" ], [ "στιγμας", "ΣΤΙΓΜΑΣ" ], [ "poſt", "POST" ], ] re_pass, re_fail = 0, 0 regex_pass, regex_fail = 0, 0 for lc, uc in data_rows: if re.match(uc, lc, re.IGNORECASE + re.UNICODE): print("PASS: re pattern", uc, "is indeed the same as string", lc) re_pass = re_pass + 1 else: print("FAIL: re pattern", uc, "is not the same as string", lc) re_fail = re_fail + 1 if regex.match(uc, lc, regex.IGNORECASE + regex.UNICODE): print("PASS: regex pattern", uc, "is indeed the same as string", lc) regex_pass = regex_pass + 1 else: print("FAIL: regex pattern", uc, "is not the same as string", lc) regex_fail = regex_fail + 1 re_total = re_pass + re_fail regex_total = regex_pass + regex_fail print("") print("re lib passed", re_pass, "of", re_total, "tests") print("regex lib passed", regex_pass, "of", regex_total, "tests") ########################################## # FAIL: re pattern Ι is not the same as string ͅ # PASS: regex pattern Ι is indeed the same as string ͅ # FAIL: re pattern Μ is not the same as string µ # PASS: regex pattern Μ is indeed the same as string µ # FAIL: re pattern ſ is not the same as string s # PASS: regex pattern ſ is indeed the same as string s # FAIL: re pattern ΣΤΙΓΜΑΣ is not the same as string στιγμας # PASS: regex pattern ΣΤΙΓΜΑΣ is indeed the same as string στιγμας # FAIL: re pattern POST is not the same as string poſt # PASS: regex pattern POST is indeed the same as string poſt # # re lib passed 0 of 5 tests # regex lib passed 5 of 5 tests