#!/usr/bin/env python2.7
# -*- coding: UTF-8 -*-

from __future__ import print_function
from __future__ import unicode_literals

import re
import regex
import sys
import os

if not (("PYTHONIOENCODING" in os.environ)
            and
        re.search("^utf-?8$", os.environ["PYTHONIOENCODING"], re.I)):
    sys.stderr.write(sys.argv[0] + ": Please set your PYTHONIOENCODING envariable to utf8\n")
    sys.exit(1)


import unicodedata
if unicodedata.unidata_version < "6.0.0":
    print("WARNING: Your old UCD is out of date, expected 6.0.0 but got", 
           unicodedata.unidata_version)

wide_enough = (sys.maxunicode > 65536)
if not wide_enough:
    print("WARNING: Narrow build detected, your Python lacks full Unicode support!!")

pass_data = [
    "po\N{LATIN SMALL LETTER LONG S}t", 

    "caf\N{LATIN SMALL LETTER E WITH ACUTE}",
    "cafe\N{COMBINING ACUTE ACCENT}",     # marks are word chars!!
    "\N{CIRCLED LATIN CAPITAL LETTER K}",   # Other_Uppercase, GC=So

    "\N{COMBINING GREEK YPOGEGRAMMENI}",  # Other_Lowercase
    "\N{HEBREW POINT SHEVA}",

    "\N{MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO}", # astral digit

    # wide digits
    "１２３",

    "\N{ROMAN NUMERAL TWELVE}",    # GC=Nl
    "\N{RUNIC TVIMADUR SYMBOL}",   # GC=Nl
    "\N{GOTHIC LETTER NINETY}",   # astral and  GC=Nl

    "𝔘𝔫𝔦𝔠𝔬𝔡𝔢", "𐐔𐐯𐑅𐐨𐑉𐐯𐐻",   # astral
    "\u0526\u0527",         # Unicode 6

  # all connectors are valid word chars
    "under_score", "connector‿punctuation",

  # lc/tc/uc
    "ᾲ_στο_διάολο", "Ὰͅ_Στο_Διάολο", "ᾺΙ_ΣΤΟ_ΔΙΆΟΛΟ",

  # go goth
    "ATTA_UNSAR_ÞU_IN_HIMINAM", "𐌰𐍄𐍄𐌰‿𐌿𐌽𐍃𐌰𐍂‿𐌸𐌿‿𐌹𐌽‿𐌷𐌹𐌼𐌹𐌽𐌰𐌼",
]

fail_data = [

  # these are all GC=No, not GC=Nd or GC=Nl, so *not* word chars per UTS#18 RL1.2a:
    "¹²³",
    "₁₂₃",
    "¼½¾",
    "\N{PARENTHESIZED DIGIT THREE}",

]

re_pass,    re_fail    = 0, 0 
regex_pass, regex_fail = 0, 0 

for str in pass_data:

    if re.search(r'^\w+$', str, re.IGNORECASE + re.UNICODE):
        print("pass lib re    found all alphanumeric string", str)
        re_pass = re_pass + 1
    else:
        print("FAIL lib re    found non alphanumeric string", str)
        re_fail = re_fail + 1

    if regex.search(r'^\w+$', str, regex.IGNORECASE + regex.UNICODE):
        print("pass lib regex found non alphanumeric string", str)
        regex_pass = regex_pass + 1
    else:
        print("FAIL lib regex found non alphanumeric string", str)
        regex_fail = regex_fail + 1

    print("")

for str in fail_data:

    if not re.search(r'^\w+$', str, re.IGNORECASE + re.UNICODE):
        print("pass lib re    found non alphanumeric string", str)
        re_pass = re_pass + 1
    else:
        print("FAIL lib re    found all alphanumeric string", str)
        re_fail = re_fail + 1

    if not regex.search(r'^\w+$', str, regex.IGNORECASE + regex.UNICODE):
        print("pass lib regex found non alphanumeric string", str)
        regex_pass = regex_pass + 1
    else:
        print("FAIL lib regex found all alphanumeric string", str)
        regex_fail = regex_fail + 1

    print("")


re_total    = re_pass    + re_fail
regex_total = regex_pass + regex_fail

print("")

print("re    lib passed",    re_pass,    "of", re_total,    "tests")
print("regex lib passed", regex_pass, "of", regex_total, "tests")


##########################################################
# WARNING: Narrow build detected, your Python lacks full Unicode support!!
# pass lib re    found all alphanumeric string poſt
# pass lib regex found non alphanumeric string poſt
# 
# pass lib re    found all alphanumeric string café
# pass lib regex found non alphanumeric string café
# 
# FAIL lib re    found non alphanumeric string café
# pass lib regex found non alphanumeric string café
# 
# FAIL lib re    found non alphanumeric string Ⓚ
# pass lib regex found non alphanumeric string Ⓚ
# 
# FAIL lib re    found non alphanumeric string ͅ
# pass lib regex found non alphanumeric string ͅ
# 
# FAIL lib re    found non alphanumeric string ְ
# pass lib regex found non alphanumeric string ְ
# 
# FAIL lib re    found non alphanumeric string 𝟘
# FAIL lib regex found non alphanumeric string 𝟘
# 
# pass lib re    found all alphanumeric string １２３
# pass lib regex found non alphanumeric string １２３
# 
# pass lib re    found all alphanumeric string Ⅻ
# pass lib regex found non alphanumeric string Ⅻ
# 
# pass lib re    found all alphanumeric string ᛯ
# pass lib regex found non alphanumeric string ᛯ
# 
# FAIL lib re    found non alphanumeric string 𐍁
# FAIL lib regex found non alphanumeric string 𐍁
# 
# FAIL lib re    found non alphanumeric string 𝔘𝔫𝔦𝔠𝔬𝔡𝔢
# FAIL lib regex found non alphanumeric string 𝔘𝔫𝔦𝔠𝔬𝔡𝔢
# 
# FAIL lib re    found non alphanumeric string 𐐔𐐯𐑅𐐨𐑉𐐯𐐻
# FAIL lib regex found non alphanumeric string 𐐔𐐯𐑅𐐨𐑉𐐯𐐻
# 
# pass lib re    found all alphanumeric string Ԧԧ
# pass lib regex found non alphanumeric string Ԧԧ
# 
# pass lib re    found all alphanumeric string under_score
# pass lib regex found non alphanumeric string under_score
# 
# FAIL lib re    found non alphanumeric string connector‿punctuation
# pass lib regex found non alphanumeric string connector‿punctuation
# 
# pass lib re    found all alphanumeric string ᾲ_στο_διάολο
# pass lib regex found non alphanumeric string ᾲ_στο_διάολο
# 
# FAIL lib re    found non alphanumeric string Ὰͅ_Στο_Διάολο
# pass lib regex found non alphanumeric string Ὰͅ_Στο_Διάολο
# 
# pass lib re    found all alphanumeric string ᾺΙ_ΣΤΟ_ΔΙΆΟΛΟ
# pass lib regex found non alphanumeric string ᾺΙ_ΣΤΟ_ΔΙΆΟΛΟ
# 
# pass lib re    found all alphanumeric string ATTA_UNSAR_ÞU_IN_HIMINAM
# pass lib regex found non alphanumeric string ATTA_UNSAR_ÞU_IN_HIMINAM
# 
# FAIL lib re    found non alphanumeric string 𐌰𐍄𐍄𐌰‿𐌿𐌽𐍃𐌰𐍂‿𐌸𐌿‿𐌹𐌽‿𐌷𐌹𐌼𐌹𐌽𐌰𐌼
# FAIL lib regex found non alphanumeric string 𐌰𐍄𐍄𐌰‿𐌿𐌽𐍃𐌰𐍂‿𐌸𐌿‿𐌹𐌽‿𐌷𐌹𐌼𐌹𐌽𐌰𐌼
# 
# FAIL lib re    found all alphanumeric string ¹²³
# pass lib regex found non alphanumeric string ¹²³
# 
# FAIL lib re    found all alphanumeric string ₁₂₃
# pass lib regex found non alphanumeric string ₁₂₃
# 
# FAIL lib re    found all alphanumeric string ¼½¾
# pass lib regex found non alphanumeric string ¼½¾
# 
# FAIL lib re    found all alphanumeric string ⑶
# pass lib regex found non alphanumeric string ⑶
# 
# 
# re    lib passed 10 of 25 tests
# regex lib passed 20 of 25 tests