diff --git a/Lib/curses/ascii.py b/Lib/curses/ascii.py --- a/Lib/curses/ascii.py +++ b/Lib/curses/ascii.py @@ -54,7 +54,7 @@ def isalnum(c): return isalpha(c) or isdigit(c) def isalpha(c): return isupper(c) or islower(c) def isascii(c): return _ctoi(c) <= 127 # ? -def isblank(c): return _ctoi(c) in (8,32) +def isblank(c): return _ctoi(c) in (9, 32) # tab, space def iscntrl(c): return _ctoi(c) <= 31 def isdigit(c): return _ctoi(c) >= 48 and _ctoi(c) <= 57 def isgraph(c): return _ctoi(c) >= 33 and _ctoi(c) <= 126 diff --git a/Lib/test/test_curses_ascii.py b/Lib/test/test_curses_ascii.py new file mode 100644 --- /dev/null +++ b/Lib/test/test_curses_ascii.py @@ -0,0 +1,121 @@ +"""Test character classification functions from curses.ascii module.""" +# tests are written to be run on both Python 2 and 3 from the same source +import sys +import unittest + +try: + from test.support import import_module, run_with_locale +except ImportError: # Python 2 + from test.test_support import import_module, run_with_locale + +ascii = import_module('curses.ascii') # skip tests if there is no curses.ascii + +PY2 = sys.version_info < (3,) + +# like corresponding ctype.h character classes from c11 but +# locale-independent -- "C" locale (ascii-only) +character_classes = C = { + 'blank': '\t ', # standard blank characters + 'cntrl': ''.join(map(chr, list(range(0x1f + 1)) + [0x7f])), # control + # characters + 'digit': '0123456789', # string.digits + 'lower': 'abcdefghijklmnopqrstuvwxyz', # string.ascii_lowercase + 'print': ''.join(map(chr, range(0x20, 0x7e + 1))), # printing + # characters + 'space': '\t\n\v\f\r ', # standard white-space characters, + # string.whitespace in C locale + 'upper': 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', # string.ascii_uppercase + 'xdigit': '0123456789ABCDEFabcdef', # string.hexdigits +} +C['alpha'] = C['upper'] + C['lower'] +C['alnum'] = C['digit'] + C['alpha'] +# all printing characters except space +C['graph'] = ''.join(sorted(set(C['print']) - set(' '))) +# string.punctuation in C locale +C['punct'] = ''.join(sorted(set(C['print']) - set(C['space'] + C['alnum']))) + +def add_ctype_h_tests(testcase_class): + """Generate tests for all character classes in character_classes table.""" + for char_class_name, member_chars in sorted(character_classes.items()): + predicate_name = 'is' + char_class_name + def test(self, + expected_chars=set(member_chars), + ismember=getattr(ascii, predicate_name)): + # test 0...255 characters + all_chars = list(map(chr, range(0x100))) + self.assertEqual(set(filter(ismember, all_chars)), expected_chars) + + # test 0...255 ints + expected_ints = set(map(ord, expected_chars)) + member_ints = set(filter(ismember, map(ord, all_chars))) + self.assertEqual(member_ints, expected_ints) + + # test negative ints + self.assertFalse(list(filter(ismember, [-257, -2, -1]))) #XXX + + # test a byte in ascii range + c = next(iter(expected_chars)) + self.assertTrue(ismember(c)) + b = c.encode('ascii') + if not PY2: + if ismember not in (ascii.isspace, ascii.isblank): #XXX + #see + #issue + ##9770 + self.assertRaises(TypeError, ismember, [b]) + else: + self.assertFalse(ismember(b)) + else: # Python 2 + self.assertTrue(ismember(b)) + + # test a Unicode character in ascii range + u = b.decode('ascii') + if not PY2: + self.assertTrue(ismember(u)) + else: # Python 2 + self.assertFalse(ismember(u)) + + # test surrogate and non-bmp codepoints + for s in ['\udead', '\U0001F382']: + if isinstance(s, bytes): # Python 2 + s = s.decode('unicode_escape') + self.assertFalse(ismember(s)) + + # test non-int, non-char + value = None + if not PY2: + if ismember not in (ascii.isspace, ascii.isblank): #XXX + self.assertRaises(TypeError, ismember, [value]) + else: + self.assertFalse(ismember(value)) + else: # Python 2 + self.assertFalse(ismember(value)) + + if char_class_name in ('cntrl', 'punct'): #XXX see issue #9770 + test = unittest.expectedFailure(test) + + test.__name__ = 'test_' + predicate_name + setattr(testcase_class, test.__name__, test) + return testcase_class + +@add_ctype_h_tests +class TestCharacterClassificationFunctions(unittest.TestCase): + pass + +@run_with_locale("LC_ALL", "C") +def _test_test_data(): + # test that character classes correspond to constants from string module + import string + + assert set(string.ascii_lowercase) == set(C['lower']) + assert set(string.ascii_uppercase) == set(C['upper']) + assert set(string.digits) == set(C['digit']) + assert set(string.hexdigits) == set(C['xdigit']) + # these constants may be locale-dependent on Python 2 + assert set(string.punctuation) == set(C['punct']) + assert set(string.printable) == set(C['print'] + C['space']) #!!! + assert set(string.whitespace) == set(C['space']) + +if __name__ == '__main__': + _test_test_data() + unittest.main()