"""Verify whitespace handling in str.split and .strip methods.""" import string import unicodedata as ud whitespace = frozenset(string.whitespace) def iswhitespace(c): # According to the documentation, this is the definitive # test for whitespaceness. return ud.category(c) == 'Zs' or ud.bidirectional(c) in ('WS', 'B', 'S') def fmt(c): """Format character c as U+XXXX.""" return 'U+%04X {%s}' % (ord(c), ud.name(c, '')) def every_unicode(): """Iterate over the entire Unicode character set.""" i = 0 try: while True: yield chr(i) i += 1 except ValueError: assert i == 0x110000 # =========================================================================== # compare_* functions return None on success and an error message on failure. def compare_whitespace_tests(c): if c.isspace(): if not iswhitespace(c): return 'str.isspace() returns True on non-whitespace character' else: if iswhitespace(c): return 'str.isspace() returns False on whitespace character' def compare_string_const(c): if c in whitespace: if not c.isspace(): return "non-whitespace character in string.whitespace" else: if c.isspace(): return "whitespace character missing from string.whitespace" def compare_split(c): s = 'a' + c + 'b' if c.isspace(): if s.split() != ['a', 'b']: return "string.split() fails to split on whitespace" else: if s.split() == ['a', 'b']: return "string.split() splits on non-whitespace" def compare_strip(c): s = c + 'a' + c if c.isspace(): if s.strip() != 'a': return "string.strip() fails to strip whitespace" else: if s.strip() == 'a': return "string.strip() strips non-whitespace" # =========================================================================== def main(): for c in every_unicode(): for test in (compare_whitespace_tests, compare_string_const, compare_split, compare_strip ): err = test(c) if err is not None: print(fmt(c), err) if __name__ == '__main__': main()