Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (revision 58993) +++ Parser/tokenizer.c (working copy) @@ -1253,24 +1253,21 @@ /* Identifier (most frequent token!) */ nonascii = 0; + int is_raw_string = 0; if (is_potential_identifier_start(c)) { - /* Process r"", u"" and ur"" */ - switch (c) { - case 'r': - case 'R': + /* Process b"", r"" and br"" */ + if (c == 'b' || c == 'B') { c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; - break; - case 'b': - case 'B': + } + if (c == 'r' || c == 'R') { c = tok_nextc(tok); - if (c == 'r' || c == 'R') - c = tok_nextc(tok); - if (c == '"' || c == '\'') + if (c == '"' || c == '\'') { + is_raw_string = 1; goto letter_quote; - break; - } + } + } while (is_potential_identifier_char(c)) { if (c >= 128) nonascii = 1; @@ -1417,59 +1414,51 @@ *p_end = tok->cur; return NUMBER; } - + letter_quote: /* String */ if (c == '\'' || c == '"') { - Py_ssize_t quote2 = tok->cur - tok->start + 1; - int quote = c; - int triple = 0; - int tripcount = 0; - for (;;) { - c = tok_nextc(tok); - if (c == '\n') { - if (!triple) { - tok->done = E_EOLS; - tok_backup(tok, c); - return ERRORTOKEN; - } - tripcount = 0; - tok->cont_line = 1; /* multiline string. */ - } - else if (c == EOF) { - if (triple) - tok->done = E_EOFS; - else - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; - } - else if (c == quote) { - tripcount++; - if (tok->cur - tok->start == quote2) { - c = tok_nextc(tok); - if (c == quote) { - triple = 1; - tripcount = 0; - continue; - } - tok_backup(tok, c); - } - if (!triple || tripcount == 3) - break; - } - else if (c == '\\') { - tripcount = 0; - c = tok_nextc(tok); - if (c == EOF) { - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; - } - } + int quote = c; + int quote_size = 1; /* 1 or 3 */ + int end_quote_size = 0; + + /* Find the quote size and start of string */ + c = tok_nextc(tok); + if (c == quote) { + c = tok_nextc(tok); + if (c == quote) + quote_size = 3; else - tripcount = 0; + end_quote_size = 1; /* empty string found */ } + if (c != quote) + tok_backup(tok, c); + + /* Get rest of string */ + while (end_quote_size != quote_size) { + c = tok_nextc(tok); + if (c == EOF) { + if (quote_size == 3) + tok->done = E_EOFS; + else + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (quote_size == 1 && c == '\n') { + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (c == quote) + end_quote_size += 1; + else { + end_quote_size = 0; + if (c == '\\' && is_raw_string == 0) + c = tok_nextc(tok); /* skip escaped char */ + } + } + *p_start = tok->start; *p_end = tok->cur; return STRING; Index: Lib/sgmllib.py =================================================================== --- Lib/sgmllib.py (revision 58993) +++ Lib/sgmllib.py (working copy) @@ -33,7 +33,7 @@ tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') attrfind = re.compile( r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') + r'''(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?''') class SGMLParseError(RuntimeError): Index: Lib/markupbase.py =================================================================== --- Lib/markupbase.py (revision 58993) +++ Lib/markupbase.py (working copy) @@ -9,7 +9,7 @@ import re _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match -_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match +_declstringlit_match = re.compile(r'''(\'[^\']*\'|"[^"]*")\s*''').match _commentclose = re.compile(r'--\s*>') _markedsectionclose = re.compile(r']\s*]\s*>') Index: Lib/textwrap.py =================================================================== --- Lib/textwrap.py (revision 58993) +++ Lib/textwrap.py (working copy) @@ -73,12 +73,12 @@ wordsep_re = re.compile( r'(\s+|' # any whitespace r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + r'''(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))''') # em-dash # XXX this is not locale-aware sentence_end_re = re.compile(r'[a-z]' # lowercase letter r'[\.\!\?]' # sentence-ending punct. - r'[\"\']?' # optional end-of-quote + r'''[\"\']?''' # optional end-of-quote ) Index: Lib/tokenize.py =================================================================== --- Lib/tokenize.py (revision 58993) +++ Lib/tokenize.py (working copy) @@ -61,6 +61,11 @@ Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) +# Tail end of ' raw string. +rSingle = r"[^']*(?:.[^']*)*'" +# Tail end of " raw string. +rDouble = r'[^"]*(?:.[^"]*)*"' + # Tail end of ' string. Single = r"[^'\\]*(?:\\.[^'\\]*)*'" # Tail end of " string. @@ -69,7 +74,15 @@ Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' + +# Tail end of ''' raw string. +rSingle3 = r"[^']*(?:(?:\\.|'(?!''))[^']*)*'''" +# Tail end of """ raw string. +rDouble3 = r'[^"]*(?:(?:\\.|"(?!""))[^"]*)*"""' + + Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""') + # Single-line ' or " string. String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') @@ -90,25 +103,43 @@ Token = Ignore + PlainToken # First (or only) line of ' or " string. -ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) +ContStr = group( \ + r"[bB]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'), + r'[bB]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n'), + r"[bB]?[rR]+'[^\n']*(?:\\.[^\n']*)*" + group("'", r'\\\r?\n'), + r'[bB]?[rR]+"[^\n"]*(?:\\.[^\n"]*)*' + group('"', r'\\\r?\n')) + PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) -tokenprog, pseudoprog, single3prog, double3prog = map( - re.compile, (Token, PseudoToken, Single3, Double3)) -endprogs = {"'": re.compile(Single), '"': re.compile(Double), +(tokenprog, pseudoprog, + singleprog, doubleprog, rsingleprog, rdoubleprog, + single3prog, double3prog, rsingle3prog, rdouble3prog) \ + = map(re.compile, (Token, PseudoToken, + Single, Double, + rSingle, rDouble, + Single3, Double3, + rSingle3, rDouble3)) + +endprogs = {"'": singleprog, '"': doubleprog, + "r'": rsingleprog, 'r"': rdoubleprog, + "b'": singleprog, 'b"': doubleprog, + "br'": rsingleprog, 'br"': rdoubleprog, + "R'": rsingleprog, 'R"': rdoubleprog, + "B'": singleprog, 'B"': doubleprog, + "bR'": rsingleprog, 'bR"': rdoubleprog, + "Br'": rsingleprog, 'Br"': rdoubleprog, + "BR'": rsingleprog, 'BR"': rdoubleprog, + "'''": single3prog, '"""': double3prog, - "r'''": single3prog, 'r"""': double3prog, + "r'''": rsingle3prog, 'r"""': rdouble3prog, "b'''": single3prog, 'b"""': double3prog, - "br'''": single3prog, 'br"""': double3prog, - "R'''": single3prog, 'R"""': double3prog, + "br'''": rsingle3prog, 'br"""': rdouble3prog, + "R'''": rsingle3prog, 'R"""': rdouble3prog, "B'''": single3prog, 'B"""': double3prog, - "bR'''": single3prog, 'bR"""': double3prog, - "Br'''": single3prog, 'Br"""': double3prog, - "BR'''": single3prog, 'BR"""': double3prog, + "bR'''": rsingle3prog, 'bR"""': rdouble3prog, + "Br'''": rsingle3prog, 'Br"""': rdouble3prog, + "BR'''": rsingle3prog, 'BR"""': rdouble3prog, 'r': None, 'R': None, 'b': None, 'B': None} triple_quoted = {} Index: Lib/distutils/util.py =================================================================== --- Lib/distutils/util.py (revision 58993) +++ Lib/distutils/util.py (working copy) @@ -278,7 +278,7 @@ _wordchars_re = _squote_re = _dquote_re = None def _init_regex(): global _wordchars_re, _squote_re, _dquote_re - _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) + _wordchars_re = re.compile(r'''[^\\\'\"%s ]*''' % string.whitespace) _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') Index: Lib/cookielib.py =================================================================== --- Lib/cookielib.py (revision 58993) +++ Lib/cookielib.py (working copy) @@ -319,7 +319,7 @@ return match.string[:start]+match.string[end:] HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +HEADER_QUOTED_VALUE_RE = re.compile(r'^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"') HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") HEADER_ESCAPE_RE = re.compile(r"\\(.)") def split_header_words(header_values): @@ -407,7 +407,7 @@ if pairs: result.append(pairs) return result -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") +HEADER_JOIN_ESCAPE_RE = re.compile(r'([\"\\])') def join_header_words(lists): """Do the inverse (almost) of the conversion done by split_header_words. @@ -1202,7 +1202,7 @@ """ non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") + quote_re = re.compile(r'([\"\\])') strict_domain_re = re.compile(r"\.?[^.]*") domain_re = re.compile(r"[^.]*") dots_re = re.compile(r"^\.+") Index: Lib/pydoc.py =================================================================== --- Lib/pydoc.py (revision 58993) +++ Lib/pydoc.py (working copy) @@ -389,7 +389,7 @@ # Backslashes are only literal in the string and are never # needed to make any special characters, so show a raw string. return 'r' + testrepr[0] + self.escape(test) + testrepr[0] - return re.sub(r'((\\[\\abfnrtv\'"]|\\[0-9]..|\\x..|\\u....)+)', + return re.sub(r'''((\\[\\abfnrtv\'"]|\\[0-9]..|\\x..|\\u....)+)''', r'\1', self.escape(testrepr)) Index: Lib/doctest.py =================================================================== --- Lib/doctest.py (revision 58993) +++ Lib/doctest.py (working copy) @@ -639,7 +639,7 @@ # "#doctest:". Eliminating these false positives would require # actually parsing the string; but we limit them by ignoring any # line containing "#doctest:" that is *followed* by a quote mark. - _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$', + _OPTION_DIRECTIVE_RE = re.compile(r'''#\s*doctest:\s*([^\n\'"]*)$''', re.MULTILINE) def _find_options(self, source, name, lineno): Index: Lib/test/tokenize_tests.txt =================================================================== --- Lib/test/tokenize_tests.txt (revision 58993) +++ Lib/test/tokenize_tests.txt (working copy) @@ -100,8 +100,8 @@ jumps over\n\ the \'lazy\' dog.\n\ '; -x = r'\\' + R'\\' -x = r'\'' + '' +x = r'\' + R'\' +x = r'\' + '' y = r''' foo bar \\ baz''' + R''' @@ -114,8 +114,8 @@ y = b"abc" + B"ABC" x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC' y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC" -x = br'\\' + BR'\\' -x = br'\'' + '' +x = br'\' + BR'\' +x = br'\' + '' y = br''' foo bar \\ baz''' + BR''' Index: Lib/test/output/test_tokenize =================================================================== --- Lib/test/output/test_tokenize (revision 58993) +++ Lib/test/output/test_tokenize (working copy) @@ -318,16 +318,16 @@ 102,2-102,3: NEWLINE '\n' 103,0-103,1: NAME 'x' 103,2-103,3: OP '=' -103,4-103,9: STRING "r'\\\\'" -103,10-103,11: OP '+' -103,12-103,17: STRING "R'\\\\'" -103,17-103,18: NEWLINE '\n' +103,4-103,8: STRING "r'\\'" +103,9-103,10: OP '+' +103,11-103,15: STRING "R'\\'" +103,15-103,16: NEWLINE '\n' 104,0-104,1: NAME 'x' 104,2-104,3: OP '=' -104,4-104,9: STRING "r'\\''" -104,10-104,11: OP '+' -104,12-104,14: STRING "''" -104,14-104,15: NEWLINE '\n' +104,4-104,8: STRING "r'\\'" +104,9-104,10: OP '+' +104,11-104,13: STRING "''" +104,13-104,14: NEWLINE '\n' 105,0-105,1: NAME 'y' 105,2-105,3: OP '=' 105,4-107,6: STRING "r'''\nfoo bar \\\\\nbaz'''" @@ -374,16 +374,16 @@ 116,41-116,42: NEWLINE '\n' 117,0-117,1: NAME 'x' 117,2-117,3: OP '=' -117,4-117,10: STRING "br'\\\\'" -117,11-117,12: OP '+' -117,13-117,19: STRING "BR'\\\\'" -117,19-117,20: NEWLINE '\n' +117,4-117,9: STRING "br'\\'" +117,10-117,11: OP '+' +117,12-117,17: STRING "BR'\\'" +117,17-117,18: NEWLINE '\n' 118,0-118,1: NAME 'x' 118,2-118,3: OP '=' -118,4-118,10: STRING "br'\\''" -118,11-118,12: OP '+' -118,13-118,15: STRING "''" -118,15-118,16: NEWLINE '\n' +118,4-118,9: STRING "br'\\'" +118,10-118,11: OP '+' +118,12-118,14: STRING "''" +118,14-118,15: NEWLINE '\n' 119,0-119,1: NAME 'y' 119,2-119,3: OP '=' 119,4-121,6: STRING "br'''\nfoo bar \\\\\nbaz'''" Index: Lib/xml/etree/ElementTree.py =================================================================== --- Lib/xml/etree/ElementTree.py (revision 58993) +++ Lib/xml/etree/ElementTree.py (working copy) @@ -726,7 +726,7 @@ else: return s -_escape = re.compile(r"[&<>\"\u0080-\uffff]+") +_escape = re.compile(r'[&<>\"\u0080-\uffff]+') _escape_map = { "&": "&", Index: Lib/HTMLParser.py =================================================================== --- Lib/HTMLParser.py (revision 58993) +++ Lib/HTMLParser.py (working copy) @@ -26,7 +26,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?') + r'''(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?''') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name