diff -r 6cdb7981eb0f Lib/sre_compile.py --- a/Lib/sre_compile.py Sun Sep 14 16:21:27 2014 +0300 +++ b/Lib/sre_compile.py Sun Sep 14 17:30:29 2014 +0300 @@ -121,10 +121,10 @@ emit(OPCODES[op]) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) + if flags & SRE_FLAG_UNICODE: + av = AT_UNICODE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) - elif flags & SRE_FLAG_UNICODE: - av = AT_UNICODE.get(av, av) emit(ATCODES[av]) elif op is BRANCH: emit(OPCODES[op]) @@ -142,10 +142,10 @@ code[tail] = _len(code) - tail elif op is CATEGORY: emit(OPCODES[op]) + if flags & SRE_FLAG_UNICODE: + av = CH_UNICODE.get(av, av) if flags & SRE_FLAG_LOCALE: - av = CH_LOCALE[av] - elif flags & SRE_FLAG_UNICODE: - av = CH_UNICODE[av] + av = CH_LOCALE.get(av, av) emit(CHCODES[av]) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: @@ -188,12 +188,11 @@ elif op is BIGCHARSET: code.extend(av) elif op is CATEGORY: + if flags & SRE_FLAG_UNICODE: + av = CH_UNICODE.get(av, av) if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) + av = CH_LOCALE.get(av, av) + emit(CHCODES[av]) else: raise error("internal: unsupported set operator") emit(OPCODES[FAILURE]) diff -r 6cdb7981eb0f Lib/sre_constants.py --- a/Lib/sre_constants.py Sun Sep 14 16:21:27 2014 +0300 +++ b/Lib/sre_constants.py Sun Sep 14 17:30:29 2014 +0300 @@ -13,7 +13,7 @@ # update when constants are added or removed -MAGIC = 20031017 +MAGIC = 20140914 from _sre import MAXREPEAT @@ -70,10 +70,15 @@ AT_END = "at_end" AT_END_LINE = "at_end_line" AT_END_STRING = "at_end_string" + +AT_UNI_BOUNDARY = "at_uni_boundary" +AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" + AT_LOC_BOUNDARY = "at_loc_boundary" AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" -AT_UNI_BOUNDARY = "at_uni_boundary" -AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" + +AT_UNI_LOC_BOUNDARY = "at_uni_loc_boundary" +AT_UNI_LOC_NON_BOUNDARY = "at_uni_loc_non_boundary" # categories CATEGORY_DIGIT = "category_digit" @@ -84,8 +89,7 @@ CATEGORY_NOT_WORD = "category_not_word" CATEGORY_LINEBREAK = "category_linebreak" CATEGORY_NOT_LINEBREAK = "category_not_linebreak" -CATEGORY_LOC_WORD = "category_loc_word" -CATEGORY_LOC_NOT_WORD = "category_loc_not_word" + CATEGORY_UNI_DIGIT = "category_uni_digit" CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" CATEGORY_UNI_SPACE = "category_uni_space" @@ -95,6 +99,20 @@ CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" +CATEGORY_LOC_DIGIT = "category_loc_digit" +CATEGORY_LOC_NOT_DIGIT = "category_loc_not_digit" +CATEGORY_LOC_SPACE = "category_loc_space" +CATEGORY_LOC_NOT_SPACE = "category_loc_not_space" +CATEGORY_LOC_WORD = "category_loc_word" +CATEGORY_LOC_NOT_WORD = "category_loc_not_word" + +CATEGORY_UNI_LOC_DIGIT = "category_uni_loc_digit" +CATEGORY_UNI_LOC_NOT_DIGIT = "category_uni_loc_not_digit" +CATEGORY_UNI_LOC_SPACE = "category_uni_loc_space" +CATEGORY_UNI_LOC_NOT_SPACE = "category_uni_loc_not_space" +CATEGORY_UNI_LOC_WORD = "category_uni_loc_word" +CATEGORY_UNI_LOC_NOT_WORD = "category_uni_loc_not_word" + OPCODES = [ # failure=0 success=1 (just because it looks better that way :-) @@ -128,8 +146,9 @@ ATCODES = [ AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, - AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, - AT_UNI_NON_BOUNDARY + AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, + AT_UNI_BOUNDARY, AT_UNI_NON_BOUNDARY, + AT_UNI_LOC_BOUNDARY, AT_UNI_LOC_NON_BOUNDARY, ] CHCODES = [ @@ -139,16 +158,16 @@ CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, - CATEGORY_UNI_NOT_LINEBREAK + CATEGORY_UNI_NOT_LINEBREAK, + CATEGORY_LOC_DIGIT, CATEGORY_LOC_NOT_DIGIT, + CATEGORY_LOC_SPACE, CATEGORY_LOC_NOT_SPACE, + CATEGORY_UNI_LOC_DIGIT, CATEGORY_UNI_LOC_NOT_DIGIT, + CATEGORY_UNI_LOC_SPACE, CATEGORY_UNI_LOC_NOT_SPACE, + CATEGORY_UNI_LOC_WORD, CATEGORY_UNI_LOC_NOT_WORD, ] def makedict(list): - d = {} - i = 0 - for item in list: - d[item] = i - i = i + 1 - return d + return {item: i for i, item in enumerate(list)} OPCODES = makedict(OPCODES) ATCODES = makedict(ATCODES) @@ -169,12 +188,15 @@ AT_LOCALE = { AT_BOUNDARY: AT_LOC_BOUNDARY, - AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY + AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY, + + AT_UNI_BOUNDARY: AT_UNI_LOC_BOUNDARY, + AT_UNI_NON_BOUNDARY: AT_UNI_LOC_NON_BOUNDARY, } AT_UNICODE = { AT_BOUNDARY: AT_UNI_BOUNDARY, - AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY + AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY, } CH_LOCALE = { @@ -184,8 +206,13 @@ CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, CATEGORY_WORD: CATEGORY_LOC_WORD, CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, - CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK + + CATEGORY_UNI_DIGIT: CATEGORY_UNI_LOC_DIGIT, + CATEGORY_UNI_NOT_DIGIT: CATEGORY_UNI_LOC_NOT_DIGIT, + CATEGORY_UNI_SPACE: CATEGORY_UNI_LOC_SPACE, + CATEGORY_UNI_NOT_SPACE: CATEGORY_UNI_LOC_NOT_SPACE, + CATEGORY_UNI_WORD: CATEGORY_UNI_LOC_WORD, + CATEGORY_UNI_NOT_WORD: CATEGORY_UNI_LOC_NOT_WORD, } CH_UNICODE = { @@ -196,7 +223,7 @@ CATEGORY_WORD: CATEGORY_UNI_WORD, CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK + CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK, } # flags diff -r 6cdb7981eb0f Lib/sre_parse.py --- a/Lib/sre_parse.py Sun Sep 14 16:21:27 2014 +0300 +++ b/Lib/sre_parse.py Sun Sep 14 17:30:29 2014 +0300 @@ -730,9 +730,15 @@ flags |= SRE_FLAG_UNICODE elif flags & SRE_FLAG_UNICODE: raise ValueError("ASCII and UNICODE flags are incompatible") + elif flags & SRE_FLAG_LOCALE: + raise ValueError("ASCII and LOCALE flags are incompatible") else: if flags & SRE_FLAG_UNICODE: raise ValueError("can't use UNICODE flag with a bytes pattern") + if not flags & SRE_FLAG_ASCII: + flags |= SRE_FLAG_ASCII + elif flags & SRE_FLAG_LOCALE: + raise ValueError("ASCII and LOCALE flags are incompatible") return flags def parse(str, flags=0, pattern=None): diff -r 6cdb7981eb0f Lib/test/test_re.py --- a/Lib/test/test_re.py Sun Sep 14 16:21:27 2014 +0300 +++ b/Lib/test/test_re.py Sun Sep 14 17:30:29 2014 +0300 @@ -590,9 +590,12 @@ self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) + self.assertEqual(_sre.getlower(ord('A'), re.LOCALE|re.UNICODE), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") + self.assertEqual(re.match("abc", "ABC", re.I|re.L).group(0), "ABC") + self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC") def test_not_literal(self): self.assertEqual(re.search("\s([^a])", " b").group(1), "b") @@ -1037,6 +1040,70 @@ self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?au)\w') + def test_locale_flag(self): + import locale + _, enc = locale.getlocale(locale.LC_CTYPE) + # Search non-ASCII letter + for i in range(128, 256): + try: + c = bytes([i]).decode(enc) + sletter = c.lower() + if sletter == c: continue + bletter = sletter.encode(enc) + if len(bletter) != 1: continue + if bletter.decode(enc) != sletter: continue + spat = re.escape(c) + bpat = re.escape(bytes([i])) + break + except (UnicodeError, TypeError): + pass + else: + sletter = bletter = None + spat = 'A' + bpat = b'A' + # String patterns + for flags in (0, re.UNICODE): + pat = re.compile(spat, flags | re.LOCALE | re.IGNORECASE) + if sletter: + self.assertTrue(pat.match(sletter)) + pat = re.compile('(?L)' + spat, flags | re.IGNORECASE) + if sletter: + self.assertTrue(pat.match(sletter)) + pat = re.compile('\w', flags | re.LOCALE) + if sletter: + self.assertTrue(pat.match(sletter)) + pat = re.compile('(?L)\w', flags) + if sletter: + self.assertTrue(pat.match(sletter)) + # Bytes patterns + pat = re.compile(bpat, re.LOCALE | re.IGNORECASE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'(?L)' + bpat, re.IGNORECASE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(bpat, re.IGNORECASE) + if bletter: + self.assertIsNone(pat.match(bletter)) + pat = re.compile(b'\w', re.LOCALE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'(?L)\w') + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'\w') + if bletter: + self.assertIsNone(pat.match(bletter)) + # Incompatibilities + self.assertRaises(ValueError, re.compile, b'\w', re.LOCALE | re.ASCII) + self.assertRaises(ValueError, re.compile, b'(?L)\w', re.ASCII) + self.assertRaises(ValueError, re.compile, b'(?a)\w', re.LOCALE) + self.assertRaises(ValueError, re.compile, b'(?aL)\w') + self.assertRaises(ValueError, re.compile, '\w', re.LOCALE | re.ASCII) + self.assertRaises(ValueError, re.compile, '(?L)\w', re.ASCII) + self.assertRaises(ValueError, re.compile, '(?a)\w', re.LOCALE) + self.assertRaises(ValueError, re.compile, '(?aL)\w') + def test_bug_6509(self): # Replacement strings of both types must parse properly. # all strings @@ -1275,7 +1342,13 @@ self.check(b'bytes pattern', "re.compile(b'bytes pattern')") self.check_flags(b'bytes pattern', re.A, - "re.compile(b'bytes pattern', re.ASCII)") + "re.compile(b'bytes pattern')") + + def test_locale(self): + self.check_flags('random pattern', re.L, + "re.compile('random pattern', re.LOCALE)") + self.check_flags(b'bytes pattern', re.L, + "re.compile(b'bytes pattern', re.LOCALE)") def test_quotes(self): self.check('random "double quoted" pattern', diff -r 6cdb7981eb0f Modules/_sre.c --- a/Modules/_sre.c Sun Sep 14 16:21:27 2014 +0300 +++ b/Modules/_sre.c Sun Sep 14 17:30:29 2014 +0300 @@ -48,6 +48,7 @@ #define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) #include +#include /* name of this module, minus the leading underscore */ #if !defined(SRE_MODULE) @@ -144,6 +145,8 @@ /* locale-specific character predicates */ /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids * warnings when c's type supports only numbers < N+1 */ +#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0) +#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0) #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') @@ -165,6 +168,18 @@ return (unsigned int) Py_UNICODE_TOLOWER(ch); } +/* unicode locale-specific character predicates */ + +#define SRE_UNI_LOC_IS_DIGIT(ch) ((ch) <= WCHAR_MAX ? iswdigit((ch)) : 0) +#define SRE_UNI_LOC_IS_SPACE(ch) ((ch) <= WCHAR_MAX ? iswspace((ch)) : 0) +#define SRE_UNI_LOC_IS_ALNUM(ch) ((ch) <= WCHAR_MAX ? iswalnum((ch)) : 0) +#define SRE_UNI_LOC_IS_WORD(ch) (SRE_UNI_LOC_IS_ALNUM((ch)) || (ch) == '_') + +static unsigned int sre_lower_unicode_locale(unsigned int ch) +{ + return ((ch) <= WCHAR_MAX ? (unsigned int)towlower((ch)) : ch); +} + LOCAL(int) sre_category(SRE_CODE category, unsigned int ch) { @@ -187,6 +202,14 @@ case SRE_CATEGORY_NOT_LINEBREAK: return !SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_LOC_DIGIT: + return SRE_LOC_IS_DIGIT(ch); + case SRE_CATEGORY_LOC_NOT_DIGIT: + return !SRE_LOC_IS_DIGIT(ch); + case SRE_CATEGORY_LOC_SPACE: + return SRE_LOC_IS_SPACE(ch); + case SRE_CATEGORY_LOC_NOT_SPACE: + return !SRE_LOC_IS_SPACE(ch); case SRE_CATEGORY_LOC_WORD: return SRE_LOC_IS_WORD(ch); case SRE_CATEGORY_LOC_NOT_WORD: @@ -208,6 +231,19 @@ return SRE_UNI_IS_LINEBREAK(ch); case SRE_CATEGORY_UNI_NOT_LINEBREAK: return !SRE_UNI_IS_LINEBREAK(ch); + + case SRE_CATEGORY_UNI_LOC_DIGIT: + return SRE_UNI_LOC_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_LOC_NOT_DIGIT: + return !SRE_UNI_LOC_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_LOC_SPACE: + return SRE_UNI_LOC_IS_SPACE(ch); + case SRE_CATEGORY_UNI_LOC_NOT_SPACE: + return !SRE_UNI_LOC_IS_SPACE(ch); + case SRE_CATEGORY_UNI_LOC_WORD: + return SRE_UNI_LOC_IS_WORD(ch); + case SRE_CATEGORY_UNI_LOC_NOT_WORD: + return !SRE_UNI_LOC_IS_WORD(ch); } return 0; } @@ -285,11 +321,17 @@ int character, flags; if (!PyArg_ParseTuple(args, "ii", &character, &flags)) return NULL; - if (flags & SRE_FLAG_LOCALE) - return Py_BuildValue("i", sre_lower_locale(character)); - if (flags & SRE_FLAG_UNICODE) - return Py_BuildValue("i", sre_lower_unicode(character)); - return Py_BuildValue("i", sre_lower(character)); + if (flags & SRE_FLAG_UNICODE) { + if (flags & SRE_FLAG_LOCALE) + return PyLong_FromLong(sre_lower_unicode_locale(character)); + else + return PyLong_FromLong(sre_lower_unicode(character)); + } else { + if (flags & SRE_FLAG_LOCALE) + return PyLong_FromLong(sre_lower_locale(character)); + else + return PyLong_FromLong(sre_lower(character)); + } } LOCAL(void) @@ -400,12 +442,18 @@ state->pos = start; state->endpos = end; - if (pattern->flags & SRE_FLAG_LOCALE) - state->lower = sre_lower_locale; - else if (pattern->flags & SRE_FLAG_UNICODE) - state->lower = sre_lower_unicode; - else - state->lower = sre_lower; + if (pattern->flags & SRE_FLAG_UNICODE) { + if (pattern->flags & SRE_FLAG_LOCALE) + state->lower = sre_lower_unicode_locale; + else + state->lower = sre_lower_unicode; + } + else { + if (pattern->flags & SRE_FLAG_LOCALE) + state->lower = sre_lower_locale; + else + state->lower = sre_lower; + } return string; err: @@ -1239,11 +1287,11 @@ size_t i; int flags = obj->flags; - /* Omit re.UNICODE for valid string patterns. */ - if (obj->isbytes == 0 && - (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == - SRE_FLAG_UNICODE) + /* Omit default flag. */ + if (obj->isbytes == 0) flags &= ~SRE_FLAG_UNICODE; + else if (obj->isbytes == 1) + flags &= ~SRE_FLAG_ASCII; flag_items = PyList_New(0); if (!flag_items) @@ -1616,6 +1664,10 @@ case SRE_CATEGORY_NOT_WORD: case SRE_CATEGORY_LINEBREAK: case SRE_CATEGORY_NOT_LINEBREAK: + case SRE_CATEGORY_LOC_DIGIT: + case SRE_CATEGORY_LOC_NOT_DIGIT: + case SRE_CATEGORY_LOC_SPACE: + case SRE_CATEGORY_LOC_NOT_SPACE: case SRE_CATEGORY_LOC_WORD: case SRE_CATEGORY_LOC_NOT_WORD: case SRE_CATEGORY_UNI_DIGIT: @@ -1626,6 +1678,12 @@ case SRE_CATEGORY_UNI_NOT_WORD: case SRE_CATEGORY_UNI_LINEBREAK: case SRE_CATEGORY_UNI_NOT_LINEBREAK: + case SRE_CATEGORY_UNI_LOC_DIGIT: + case SRE_CATEGORY_UNI_LOC_NOT_DIGIT: + case SRE_CATEGORY_UNI_LOC_SPACE: + case SRE_CATEGORY_UNI_LOC_NOT_SPACE: + case SRE_CATEGORY_UNI_LOC_WORD: + case SRE_CATEGORY_UNI_LOC_NOT_WORD: break; default: FAIL; @@ -1697,6 +1755,8 @@ case SRE_AT_LOC_NON_BOUNDARY: case SRE_AT_UNI_BOUNDARY: case SRE_AT_UNI_NON_BOUNDARY: + case SRE_AT_UNI_LOC_BOUNDARY: + case SRE_AT_UNI_LOC_NON_BOUNDARY: break; default: FAIL; diff -r 6cdb7981eb0f Modules/sre_constants.h --- a/Modules/sre_constants.h Sun Sep 14 16:21:27 2014 +0300 +++ b/Modules/sre_constants.h Sun Sep 14 17:30:29 2014 +0300 @@ -11,7 +11,7 @@ * See the _sre.c file for information on usage and redistribution. */ -#define SRE_MAGIC 20031017 +#define SRE_MAGIC 20140914 #define SRE_OP_FAILURE 0 #define SRE_OP_SUCCESS 1 #define SRE_OP_ANY 2 @@ -56,6 +56,8 @@ #define SRE_AT_LOC_NON_BOUNDARY 9 #define SRE_AT_UNI_BOUNDARY 10 #define SRE_AT_UNI_NON_BOUNDARY 11 +#define SRE_AT_UNI_LOC_BOUNDARY 12 +#define SRE_AT_UNI_LOC_NON_BOUNDARY 13 #define SRE_CATEGORY_DIGIT 0 #define SRE_CATEGORY_NOT_DIGIT 1 #define SRE_CATEGORY_SPACE 2 @@ -74,6 +76,16 @@ #define SRE_CATEGORY_UNI_NOT_WORD 15 #define SRE_CATEGORY_UNI_LINEBREAK 16 #define SRE_CATEGORY_UNI_NOT_LINEBREAK 17 +#define SRE_CATEGORY_LOC_DIGIT 18 +#define SRE_CATEGORY_LOC_NOT_DIGIT 19 +#define SRE_CATEGORY_LOC_SPACE 20 +#define SRE_CATEGORY_LOC_NOT_SPACE 21 +#define SRE_CATEGORY_UNI_LOC_DIGIT 22 +#define SRE_CATEGORY_UNI_LOC_NOT_DIGIT 23 +#define SRE_CATEGORY_UNI_LOC_SPACE 24 +#define SRE_CATEGORY_UNI_LOC_NOT_SPACE 25 +#define SRE_CATEGORY_UNI_LOC_WORD 26 +#define SRE_CATEGORY_UNI_LOC_NOT_WORD 27 #define SRE_FLAG_TEMPLATE 1 #define SRE_FLAG_IGNORECASE 2 #define SRE_FLAG_LOCALE 4 diff -r 6cdb7981eb0f Modules/sre_lib.h --- a/Modules/sre_lib.h Sun Sep 14 16:21:27 2014 +0300 +++ b/Modules/sre_lib.h Sun Sep 14 17:30:29 2014 +0300 @@ -95,6 +95,24 @@ SRE_UNI_IS_WORD((int) ptr[0]) : 0; return thisp == thatp; + case SRE_AT_UNI_LOC_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_UNI_LOC_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + } return 0;