diff -r fcd29041c991 Doc/library/tokenize.rst --- a/Doc/library/tokenize.rst Sun Sep 18 07:40:20 2011 +0200 +++ b/Doc/library/tokenize.rst Mon Sep 19 08:12:37 2011 -0500 @@ -15,6 +15,9 @@ as well, making it useful for implementing "pretty-printers," including colorizers for on-screen displays. +Tokenizing Input +---------------- + The primary entry point is a :term:`generator`: .. function:: tokenize(readline) @@ -116,6 +119,26 @@ .. versionadded:: 3.2 +.. _tokenize-cli: + +Command-Line Usage +------------------ + +.. versionadded:: 3.3 + +The :mod:`tokenize` module can be executed as a script from the command line. +It is as simple as + +.. code-block:: sh + + python -m tokenize [somefile.py] + +If :file:`somefile.py` is specified its contents are tokenized to stdout. +Otherwise, tokenization is performed on stdin. + +Examples +------------------ + Example of a script rewriter that transforms float literals into Decimal objects:: @@ -158,3 +181,37 @@ result.append((toknum, tokval)) return untokenize(result).decode('utf-8') +Example of tokenizing from the command line. The script:: + + def say_hello(): + print("Hello, World!") + + say_hello() + +will be tokenized to the following output where the first column is the range +of the line/column coordinates where the token is found, the second column is +the name of the token, and the final column is the value of the token (if any) + +.. code-block:: sh + + $ python -m tokenize hello.py + 0,0-0,0: ENCODING 'utf-8' + 1,0-1,3: NAME 'def' + 1,4-1,13: NAME 'say_hello' + 1,13-1,14: OP '(' + 1,14-1,15: OP ')' + 1,15-1,16: OP ':' + 1,16-1,17: NEWLINE '\n' + 2,0-2,4: INDENT ' ' + 2,4-2,9: NAME 'print' + 2,9-2,10: OP '(' + 2,10-2,25: STRING '"Hello, World!"' + 2,25-2,26: OP ')' + 2,26-2,27: NEWLINE '\n' + 3,0-3,1: NL '\n' + 4,0-4,0: DEDENT '' + 4,0-4,9: NAME 'say_hello' + 4,9-4,10: OP '(' + 4,10-4,11: OP ')' + 4,11-4,12: NEWLINE '\n' + 5,0-5,0: ENDMARKER '' diff -r fcd29041c991 Lib/tokenize.py --- a/Lib/tokenize.py Sun Sep 18 07:40:20 2011 +0200 +++ b/Lib/tokenize.py Mon Sep 19 08:12:37 2011 -0500 @@ -531,26 +531,16 @@ return _tokenize(readline, None) if __name__ == "__main__": - # Quick sanity check - s = b'''def parseline(self, line): - """Parse the line into a command name and a string containing - the arguments. Returns a tuple containing (command, args, line). - 'command' and 'args' may be None if the line couldn't be parsed. - """ - line = line.strip() - if not line: - return None, None, line - elif line[0] == '?': - line = 'help ' + line[1:] - elif line[0] == '!': - if hasattr(self, 'do_shell'): - line = 'shell ' + line[1:] - else: - return None, None, line - i, n = 0, len(line) - while i < n and line[i] in self.identchars: i = i+1 - cmd, arg = line[:i], line[i:].strip() - return cmd, arg, line - ''' - for tok in tokenize(iter(s.splitlines()).__next__): - print(tok) + if len(sys.argv) == 2: + with builtins.open(sys.argv[1], 'rb') as f: + tokens = list(tokenize(f.readline)) + elif len(sys.argv) > 2: + sys.stderr.write("usage: python -m tokenize [somefile.py]\n") + sys.exit(2) + else: + tokens = _tokenize(sys.stdin.readline, None) + + for token in tokens: + token_range = "%d,%d-%d,%d:" % (token.start + token.end) + print("%-20s%-15s%-15r" % + (token_range, tok_name[token.type], token.string))