diff -r f51921883f50 Doc/library/tokenize.rst
--- a/Doc/library/tokenize.rst	Sun Oct 04 01:19:36 2015 -0400
+++ b/Doc/library/tokenize.rst	Mon Oct 05 09:04:22 2015 +0300
@@ -29,8 +29,9 @@ The primary entry point is a :term:`gene
 
    The :func:`tokenize` generator requires one argument, *readline*, which
    must be a callable object which provides the same interface as the
-   :meth:`io.IOBase.readline` method of file objects.  Each call to the
-   function should return one line of input as bytes.
+   :meth:`io.IOBase.readline` or the :meth:`io.TextIOBase.readline` method
+   of file objects.  Each call to the function should return one line of input
+   as bytes or text.
 
    The generator produces 5-tuples with these members: the token type; the
    token string; a 2-tuple ``(srow, scol)`` of ints specifying the row and
@@ -52,8 +53,11 @@ The primary entry point is a :term:`gene
    .. versionchanged:: 3.3
       Added support for ``exact_type``.
 
-   :func:`tokenize` determines the source encoding of the file by looking for a
-   UTF-8 BOM or encoding cookie, according to :pep:`263`.
+   .. versionchanged:: 3.6
+      Added support for text input.
+
+   :func:`tokenize` determines the source encoding of binary files by
+   looking for a UTF-8 BOM or encoding cookie, according to :pep:`263`.
 
 
 All constants from the :mod:`token` module are also exported from
@@ -74,8 +78,8 @@ All constants from the :mod:`token` modu
 .. data:: ENCODING
 
     Token value that indicates the encoding used to decode the source bytes
-    into text. The first token returned by :func:`tokenize` will always be an
-    ENCODING token.
+    into text.  For binary file the first token returned by :func:`tokenize`
+    will always be an ENCODING token.  For text files, this token is not used.
 
 
 Another function is provided to reverse the tokenization process. This is
@@ -89,15 +93,14 @@ write back the modified script.
     sequences with at least two elements, the token type and the token string.
     Any additional sequence elements are ignored.
 
-    The reconstructed script is returned as a single string.  The result is
+    The reconstructed script is returned as a single string or bytes, encoded
+    using the ENCODING token if it is the first token sequence output by
+    :func:`tokenize`.  The result is
     guaranteed to tokenize back to match the input so that the conversion is
     lossless and round-trips are assured.  The guarantee applies only to the
     token type and token string as the spacing between tokens (column
     positions) may change.
 
-    It returns bytes, encoded using the ENCODING token, which is the first
-    token sequence output by :func:`tokenize`.
-
 
 :func:`tokenize` needs to detect the encoding of source files it tokenizes. The
 function it uses to do this is available:
@@ -108,9 +111,12 @@ function it uses to do this is available
     should be used to decode a Python source file. It requires one argument,
     readline, in the same way as the :func:`tokenize` generator.
 
-    It will call readline a maximum of twice, and return the encoding used
-    (as a string) and a list of any lines (not decoded from bytes) it has read
-    in.
+    If readline returns a string, ``detect_encoding`` returns the encoding as
+    ``None`` and a list containing this string.
+
+    Otherwise, it will call readline a maximum of twice, and return the
+    encoding used (as a string) and a list of any lines (not decoded from
+    bytes) it has read in.
 
     It detects the encoding from the presence of a UTF-8 BOM or an encoding
     cookie as specified in :pep:`263`. If both a BOM and a cookie are present,
@@ -123,6 +129,9 @@ function it uses to do this is available
     Use :func:`open` to open Python source files: it uses
     :func:`detect_encoding` to detect the file encoding.
 
+   .. versionchanged:: 3.6
+      Added support for text input.
+
 
 .. function:: open(filename)
 
@@ -211,7 +220,7 @@ objects::
         -3.217160342717258261933904529E-7
         """
         result = []
-        g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
+        g = tokenize(StringIO(s).readline) # tokenize the string
         for toknum, tokval, _, _, _  in g:
             if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
                 result.extend([
@@ -222,7 +231,7 @@ objects::
                 ])
             else:
                 result.append((toknum, tokval))
-        return untokenize(result).decode('utf-8')
+        return untokenize(result)
 
 Example of tokenizing from the command line.  The script::
 
diff -r f51921883f50 Lib/cgitb.py
--- a/Lib/cgitb.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/cgitb.py	Mon Oct 05 09:04:22 2015 +0300
@@ -80,7 +80,7 @@ def lookup(name, frame, locals):
 def scanvars(reader, frame, locals):
     """Scan one logical line of Python and look up values of variables used."""
     vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__
-    for ttype, token, start, end, line in tokenize.generate_tokens(reader):
+    for ttype, token, start, end, line in tokenize.tokenize(reader):
         if ttype == tokenize.NEWLINE: break
         if ttype == tokenize.NAME and token not in keyword.kwlist:
             if lasttoken == '.':
diff -r f51921883f50 Lib/gettext.py
--- a/Lib/gettext.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/gettext.py	Mon Oct 05 09:04:22 2015 +0300
@@ -66,7 +66,7 @@ def c2py(plural):
     """
     # Security check, allow only the "n" identifier
     import token, tokenize
-    tokens = tokenize.generate_tokens(io.StringIO(plural).readline)
+    tokens = tokenize.tokenize(io.StringIO(plural).readline)
     try:
         danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
     except tokenize.TokenError:
diff -r f51921883f50 Lib/idlelib/EditorWindow.py
--- a/Lib/idlelib/EditorWindow.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/idlelib/EditorWindow.py	Mon Oct 05 09:04:22 2015 +0300
@@ -1626,7 +1626,7 @@ class IndentSearcher(object):
         _tokenize.tabsize = self.tabwidth
         try:
             try:
-                tokens = _tokenize.generate_tokens(self.readline)
+                tokens = _tokenize.tokenize(self.readline)
                 for token in tokens:
                     self.tokeneater(*token)
             except (_tokenize.TokenError, SyntaxError):
diff -r f51921883f50 Lib/idlelib/ScriptBinding.py
--- a/Lib/idlelib/ScriptBinding.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/idlelib/ScriptBinding.py	Mon Oct 05 09:04:22 2015 +0300
@@ -67,7 +67,7 @@ class ScriptBinding:
         # XXX: tabnanny should work on binary files as well
         with tokenize.open(filename) as f:
             try:
-                tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
+                tabnanny.process_tokens(tokenize.tokenize(f.readline))
             except tokenize.TokenError as msg:
                 msgtxt, (lineno, start) = msg.args
                 self.editwin.gotoline(lineno)
diff -r f51921883f50 Lib/inspect.py
--- a/Lib/inspect.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/inspect.py	Mon Oct 05 09:04:22 2015 +0300
@@ -894,7 +894,7 @@ def getblock(lines):
     """Extract the block of code at the top of the given list of lines."""
     blockfinder = BlockFinder()
     try:
-        tokens = tokenize.generate_tokens(iter(lines).__next__)
+        tokens = tokenize.tokenize(iter(lines).__next__)
         for _token in tokens:
             blockfinder.tokeneater(*_token)
     except (EndOfBlock, IndentationError):
diff -r f51921883f50 Lib/pyclbr.py
--- a/Lib/pyclbr.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/pyclbr.py	Mon Oct 05 09:04:22 2015 +0300
@@ -158,7 +158,7 @@ def _readmodule(module, path, inpackage=
 
     stack = [] # stack of (class, indent) pairs
 
-    g = tokenize.generate_tokens(f.readline)
+    g = tokenize.tokenize(f.readline)
     try:
         for tokentype, token, start, _end, _line in g:
             if tokentype == DEDENT:
diff -r f51921883f50 Lib/tabnanny.py
--- a/Lib/tabnanny.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/tabnanny.py	Mon Oct 05 09:04:22 2015 +0300
@@ -103,7 +103,7 @@ def check(file):
         print("checking %r ..." % file)
 
     try:
-        process_tokens(tokenize.generate_tokens(f.readline))
+        process_tokens(tokenize.tokenize(f.readline))
 
     except tokenize.TokenError as msg:
         errprint("%r: Token Error: %s" % (file, msg))
diff -r f51921883f50 Lib/tokenize.py
--- a/Lib/tokenize.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/tokenize.py	Mon Oct 05 09:04:22 2015 +0300
@@ -318,8 +318,8 @@ class Untokenizer:
 
 def untokenize(iterable):
     """Transform tokens back into Python source code.
-    It returns a bytes object, encoded using the ENCODING
-    token, which is the first token sequence output by tokenize.
+    It returns a string or a bytes object, encoded using the ENCODING
+    token, if it is the first token sequence output by tokenize.
 
     Each element returned by the iterable must be a token sequence
     with at least two elements, a token number and token value.  If
@@ -423,6 +423,8 @@ def detect_encoding(readline):
         return encoding
 
     first = read_or_stop()
+    if isinstance(first, str):
+        return None, [first]
     if first.startswith(BOM_UTF8):
         bom_found = True
         first = first[3:]
@@ -468,7 +470,7 @@ def tokenize(readline):
     The tokenize() generator requires one argment, readline, which
     must be a callable object which provides the same interface as the
     readline() method of built-in file objects.  Each call to the function
-    should return one line of input as bytes.  Alternately, readline
+    should return one line of input as bytes or text.  Alternately, readline
     can be a callable function terminating with StopIteration:
         readline = open(myfile, 'rb').__next__  # Example of alternate readline
 
@@ -479,16 +481,16 @@ def tokenize(readline):
     and the line on which the token was found.  The line passed is the
     logical line; continuation lines are included.
 
-    The first token sequence will always be an ENCODING token
-    which tells you which encoding was used to decode the bytes stream.
+    If readline() returns bytes the first token sequence will always be an
+    ENCODING token which tells you which encoding was used to decode the bytes
+    stream.
     """
     # This import is here to avoid problems when the itertools module is not
     # built yet and tokenize is imported.
     from itertools import chain, repeat
     encoding, consumed = detect_encoding(readline)
-    rl_gen = iter(readline, b"")
-    empty = repeat(b"")
-    return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
+    rl_gen = iter(readline, consumed[:0])
+    return _tokenize(chain(consumed, rl_gen).__next__, encoding)
 
 
 def _tokenize(readline, encoding):
@@ -710,6 +712,8 @@ def _tokenize(readline, encoding):
 # An undocumented, backwards compatible, API for all the places in the standard
 # library that expect to be able to use tokenize with strings
 def generate_tokens(readline):
+    import warnings
+    warnings.warn("use tokenize()", DeprecationWarning, stacklevel=2)
     return _tokenize(readline, None)
 
 def main():
diff -r f51921883f50 Lib/trace.py
--- a/Lib/trace.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Lib/trace.py	Mon Oct 05 09:04:22 2015 +0300
@@ -418,7 +418,7 @@ def _find_strings(filename, encoding=Non
     # Add this special case so that the test in the loop passes.
     prev_ttype = token.INDENT
     with open(filename, encoding=encoding) as f:
-        tok = tokenize.generate_tokens(f.readline)
+        tok = tokenize.tokenize(f.readline)
         for ttype, tstr, start, end, line in tok:
             if ttype == token.STRING:
                 if prev_ttype == token.INDENT:
diff -r f51921883f50 Tools/scripts/cleanfuture.py
--- a/Tools/scripts/cleanfuture.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Tools/scripts/cleanfuture.py	Mon Oct 05 09:04:22 2015 +0300
@@ -162,7 +162,7 @@ class FutureFinder:
         OP = tokenize.OP
 
         changed = self.changed
-        get = tokenize.generate_tokens(self.getline).__next__
+        get = tokenize.tokenize(self.getline).__next__
         type, token, (srow, scol), (erow, ecol), line = get()
 
         # Chew up initial comments and blank lines (if any).
diff -r f51921883f50 Tools/scripts/finddiv.py
--- a/Tools/scripts/finddiv.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Tools/scripts/finddiv.py	Mon Oct 05 09:04:22 2015 +0300
@@ -55,7 +55,7 @@ def process(filename, listnames):
     except IOError as msg:
         sys.stderr.write("Can't open: %s\n" % msg)
         return 1
-    g = tokenize.generate_tokens(fp.readline)
+    g = tokenize.tokenize(fp.readline)
     lastrow = None
     for type, token, (row, col), end, line in g:
         if token in ("/", "/="):
diff -r f51921883f50 Tools/scripts/fixdiv.py
--- a/Tools/scripts/fixdiv.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Tools/scripts/fixdiv.py	Mon Oct 05 09:04:22 2015 +0300
@@ -214,7 +214,7 @@ def process(filename, list):
     f = FileContext(fp)
     list.sort()
     index = 0 # list[:index] has been processed, list[index:] is still to do
-    g = tokenize.generate_tokens(f.readline)
+    g = tokenize.tokenize(f.readline)
     while 1:
         startlineno, endlineno, slashes = lineinfo = scanline(g)
         if startlineno is None:
diff -r f51921883f50 Tools/scripts/highlight.py
--- a/Tools/scripts/highlight.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Tools/scripts/highlight.py	Mon Oct 05 09:04:22 2015 +0300
@@ -34,7 +34,7 @@ def analyze_python(source):
     kind = tok_str = ''
     tok_type = tokenize.COMMENT
     written = (1, 0)
-    for tok in tokenize.generate_tokens(readline):
+    for tok in tokenize.tokenize(readline):
         prev_tok_type, prev_tok_str = tok_type, tok_str
         tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
         kind = ''
diff -r f51921883f50 Tools/scripts/reindent.py
--- a/Tools/scripts/reindent.py	Sun Oct 04 01:19:36 2015 -0400
+++ b/Tools/scripts/reindent.py	Mon Oct 05 09:04:22 2015 +0300
@@ -195,7 +195,7 @@ class Reindenter:
         self.newlines = f.newlines
 
     def run(self):
-        tokens = tokenize.generate_tokens(self.getline)
+        tokens = tokenize.tokenize(self.getline)
         for _token in tokens:
             self.tokeneater(*_token)
         # Remove trailing empty lines.