Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(29104)

Side by Side Diff: Lib/tokenize.py

Issue 18873: "Encoding" detected in non-comment lines
Patch Set: Created 6 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/test/test_tokenize.py ('k') | Tools/scripts/findnocoding.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Tokenization help for Python programs. 1 """Tokenization help for Python programs.
2 2
3 tokenize(readline) is a generator that breaks a stream of bytes into 3 tokenize(readline) is a generator that breaks a stream of bytes into
4 Python tokens. It decodes the bytes according to PEP-0263 for 4 Python tokens. It decodes the bytes according to PEP-0263 for
5 determining source file encoding. 5 determining source file encoding.
6 6
7 It accepts a readline-like method which is called repeatedly to get the 7 It accepts a readline-like method which is called repeatedly to get the
8 next line of input (or b"" for EOF). It generates 5-tuples with these 8 next line of input (or b"" for EOF). It generates 5-tuples with these
9 members: 9 members:
10 10
(...skipping 13 matching lines...) Expand all
24 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, ' 24 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
25 'Skip Montanaro, Raymond Hettinger, Trent Nelson, ' 25 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
26 'Michael Foord') 26 'Michael Foord')
27 import builtins 27 import builtins
28 import re 28 import re
29 import sys 29 import sys
30 from token import * 30 from token import *
31 from codecs import lookup, BOM_UTF8 31 from codecs import lookup, BOM_UTF8
32 import collections 32 import collections
33 from io import TextIOWrapper 33 from io import TextIOWrapper
34 cookie_re = re.compile("coding[:=]\s*([-\w.]+)") 34 cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
35 35
36 import token 36 import token
37 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding", 37 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
38 "NL", "untokenize", "ENCODING", "TokenInfo"] 38 "NL", "untokenize", "ENCODING", "TokenInfo"]
39 del token 39 del token
40 40
41 COMMENT = N_TOKENS 41 COMMENT = N_TOKENS
42 tok_name[COMMENT] = 'COMMENT' 42 tok_name[COMMENT] = 'COMMENT'
43 NL = N_TOKENS + 1 43 NL = N_TOKENS + 1
44 tok_name[NL] = 'NL' 44 tok_name[NL] = 'NL'
(...skipping 320 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 # Decode as UTF-8. Either the line is an encoding declaration, 365 # Decode as UTF-8. Either the line is an encoding declaration,
366 # in which case it should be pure ASCII, or it must be UTF-8 366 # in which case it should be pure ASCII, or it must be UTF-8
367 # per default encoding. 367 # per default encoding.
368 line_string = line.decode('utf-8') 368 line_string = line.decode('utf-8')
369 except UnicodeDecodeError: 369 except UnicodeDecodeError:
370 msg = "invalid or missing encoding declaration" 370 msg = "invalid or missing encoding declaration"
371 if filename is not None: 371 if filename is not None:
372 msg = '{} for {!r}'.format(msg, filename) 372 msg = '{} for {!r}'.format(msg, filename)
373 raise SyntaxError(msg) 373 raise SyntaxError(msg)
374 374
375 matches = cookie_re.findall(line_string) 375 match = cookie_re.match(line_string)
376 if not matches: 376 if not match:
377 return None 377 return None
378 encoding = _get_normal_name(matches[0]) 378 encoding = _get_normal_name(match.group(1))
379 try: 379 try:
380 codec = lookup(encoding) 380 codec = lookup(encoding)
381 except LookupError: 381 except LookupError:
382 # This behaviour mimics the Python interpreter 382 # This behaviour mimics the Python interpreter
383 if filename is None: 383 if filename is None:
384 msg = "unknown encoding: " + encoding 384 msg = "unknown encoding: " + encoding
385 else: 385 else:
386 msg = "unknown encoding for {!r}: {}".format(filename, 386 msg = "unknown encoding for {!r}: {}".format(filename,
387 encoding) 387 encoding)
388 raise SyntaxError(msg) 388 raise SyntaxError(msg)
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
673 except OSError as err: 673 except OSError as err:
674 error(err) 674 error(err)
675 except KeyboardInterrupt: 675 except KeyboardInterrupt:
676 print("interrupted\n") 676 print("interrupted\n")
677 except Exception as err: 677 except Exception as err:
678 perror("unexpected error: %s" % err) 678 perror("unexpected error: %s" % err)
679 raise 679 raise
680 680
681 if __name__ == "__main__": 681 if __name__ == "__main__":
682 main() 682 main()
OLDNEW
« no previous file with comments | « Lib/test/test_tokenize.py ('k') | Tools/scripts/findnocoding.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+