diff --git a/Doc/c-api/tokenizer.rst b/Doc/c-api/tokenizer.rst
new file mode 100644
--- /dev/null
+++ b/Doc/c-api/tokenizer.rst
@@ -0,0 +1,61 @@
+.. highlightlang:: c
+
+.. _tokenizer:
+
+Tokenizing Python Code
+======================
+
+.. sectionauthor:: Dustin J. Mitchell <dustin@cs.uchicago.edu>
+
+.. index::
+   tokenizer
+
+These routines allow C code to break Python code into a stream of tokens.
+The token constants match those defined in :mod:`token`.
+
+.. c:type:: PyTokenizer_State
+
+    The C structure used to represent the state of a tokenizer.
+
+.. c:function:: PyTokenizer_State *PyTokenizer_FromString(string, exec_input)
+
+    :param string: string to convert to tokens
+    :param exec_input: true if the input is from an ``exec`` call
+
+    Initialize a tokenizer to read from a C string.
+    If ``exec_input`` is true, then an implicit newline will be added to the end of the string.
+
+.. c:function:: PyTokenizer_State *PyTokenizer_FromUTF8String(string, exec_input)
+
+    :param string: UTF-8 encoded string to convert to tokens
+    :param exec_input: true if the input is from an ``exec`` call
+
+    Initialize a tokenizer to read from a UTF-8 encoded C string.
+    If ``exec_input`` is true, then an implicit newline will be added to the end of the string.
+
+.. c:function:: PyTokenizer_State *PyTokenizer_FromFile(FILE *fp, const char *encoding, const char *ps1, const char *ps2)
+
+    :param fp: file to tokenize
+    :param encoding: encoding of the file contents
+    :param ps1: initial-line interactive prompt
+    :param ps2: subsequent-line interactive prompt
+
+    Initialize a tokenizer to read from a file.
+    The file data is decoded using ``encoding``, if given.
+    If ``ps1`` and ``ps2`` are not NULL, the tokenizer will operate in interactive mode.
+
+.. c:function:: PyTokenizer_Free(PyTokenizer_State *state)
+
+    :param state: tokenizer state
+
+    Free the given tokenizer.
+
+.. c:function:: int PyTokenizer_Get(PyTokenizer_State, *state, char **p_start, char **p_end)
+
+    :param state: tokenizer state
+    :param p_start: (output) first character of the returned token
+    :param p_end: (output) first character following the returned token
+    :return: token
+
+    Get the next token from the tokenizer.
+    The ``p_start`` and ``p_end`` output parameters give the boundaries of the returned token.
diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst
--- a/Doc/c-api/utilities.rst
+++ b/Doc/c-api/utilities.rst
@@ -19,3 +19,4 @@ and parsing function arguments and const
    conversion.rst
    reflection.rst
    codec.rst
+   tokenizer.rst
diff --git a/Include/tokenizer.h b/Include/tokenizer.h
new file mode 100644
--- /dev/null
+++ b/Include/tokenizer.h
@@ -0,0 +1,83 @@
+#ifndef Py_TOKENIZER_H
+#define Py_TOKENIZER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "object.h"
+
+/* Tokenizer interface */
+
+#include "token.h"      /* For token types */
+
+typedef struct PyTokenizer_State PyTokenizer_State;
+
+#ifndef Py_LIMITED_API
+#define MAXINDENT 100   /* Max indentation level */
+
+enum decoding_state {
+    STATE_INIT,
+    STATE_RAW,
+    STATE_NORMAL        /* have a codec associated with input */
+};
+
+struct PyTokenizer_State {
+    /* Input state; buf <= cur <= inp <= end */
+    /* NB an entire line is held in the buffer */
+    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
+    char *cur;          /* Next character in buffer */
+    char *inp;          /* End of data in buffer */
+    char *end;          /* End of input buffer if buf != NULL */
+    char *start;        /* Start of current token if not NULL */
+    int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
+    /* NB If done != E_OK, cur must be == inp!!! */
+    FILE *fp;           /* Rest of input; NULL if tokenizing a string */
+    int tabsize;        /* Tab spacing */
+    int indent;         /* Current indentation index */
+    int indstack[MAXINDENT];            /* Stack of indents */
+    int atbol;          /* Nonzero if at begin of new line */
+    int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
+    const char *prompt, *nextprompt;          /* For interactive prompting */
+    int lineno;         /* Current line number */
+    int level;          /* () [] {} Parentheses nesting level */
+            /* Used to allow free continuations inside them */
+    /* Stuff for checking on different tab sizes */
+#ifndef PGEN
+    /* pgen doesn't have access to Python codecs, it cannot decode the input
+       filename. The bytes filename might be kept, but it is only used by
+       indenterror() and it is not really needed: pgen only compiles one file
+       (Grammar/Grammar). */
+    PyObject *filename;
+#endif
+    int altwarning;     /* Issue warning if alternate tabs don't match */
+    int alterror;       /* Issue error if alternate tabs don't match */
+    int alttabsize;     /* Alternate tab spacing */
+    int altindstack[MAXINDENT];         /* Stack of alternate indents */
+    /* Stuff for PEP 0263 */
+    enum decoding_state decoding_state;
+    int decoding_erred;         /* whether erred in decoding  */
+    int read_coding_spec;       /* whether 'coding:...' has been read  */
+    char *encoding;         /* Source encoding. */
+    int cont_line;          /* whether we are in a continuation line. */
+    const char* line_start;     /* pointer to start of current line */
+#ifndef PGEN
+    PyObject *decoding_readline; /* open(...).readline */
+    PyObject *decoding_buffer;
+#endif
+    const char* enc;        /* Encoding for the current str. */
+    const char* str;
+    const char* input; /* Tokenizer's newline translated copy of the string. */
+};
+#endif
+
+PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromString(const char *, int);
+PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromUTF8(const char *, int);
+PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromFile(FILE *, const char*,
+                                              const char *, const char *);
+PyAPI_FUNC(void) PyTokenizer_Free(PyTokenizer_State *);
+PyAPI_FUNC(int) PyTokenizer_Get(PyTokenizer_State *, char **, char **);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_TOKENIZER_H */
diff --git a/Makefile.pre.in b/Makefile.pre.in
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -322,7 +322,7 @@ PGOBJS=		\
 PARSER_HEADERS= \
 		$(srcdir)/Parser/parser.h \
 		$(srcdir)/Include/parsetok.h \
-		$(srcdir)/Parser/tokenizer.h
+		$(srcdir)/Include/tokenizer.h
 
 PGENSRCS=	$(PSRCS) $(PGSRCS)
 PGENOBJS=	$(POBJS) $(PGOBJS)
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -12,7 +12,7 @@
 
 
 /* Forward */
-static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
+static node *parsetok(PyTokenizer_State *, grammar *, int, perrdetail *, int *);
 static int initerr(perrdetail *err_ret, PyObject * filename);
 
 /* Parse input coming from a string.  Return error code, print some errors. */
@@ -45,7 +45,7 @@ PyParser_ParseStringObject(const char *s
                            grammar *g, int start,
                            perrdetail *err_ret, int *flags)
 {
-    struct tok_state *tok;
+    PyTokenizer_State *tok;
     int exec_input = start == file_input;
 
     if (initerr(err_ret, filename) < 0)
@@ -118,7 +118,7 @@ PyParser_ParseFileObject(FILE *fp, PyObj
                          const char *ps1, const char *ps2,
                          perrdetail *err_ret, int *flags)
 {
-    struct tok_state *tok;
+    PyTokenizer_State *tok;
 
     if (initerr(err_ret, filename) < 0)
         return NULL;
@@ -181,7 +181,7 @@ warn(const char *msg, const char *filena
    Return error code. */
 
 static node *
-parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
+parsetok(PyTokenizer_State *tok, grammar *g, int start, perrdetail *err_ret,
          int *flags)
 {
     parser_state *ps;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -40,9 +40,9 @@ extern char *PyOS_Readline(FILE *, FILE 
 #define TABSIZE 8
 
 /* Forward */
-static struct tok_state *tok_new(void);
-static int tok_nextc(struct tok_state *tok);
-static void tok_backup(struct tok_state *tok, int c);
+static PyTokenizer_State *tok_new(void);
+static int tok_nextc(PyTokenizer_State *tok);
+static void tok_backup(PyTokenizer_State *tok, int c);
 
 
 /* Token names */
@@ -110,11 +110,11 @@ const char *_PyParser_TokenNames[] = {
 
 /* Create and initialize a new tok_state structure */
 
-static struct tok_state *
+static PyTokenizer_State *
 tok_new(void)
 {
-    struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
-                                            sizeof(struct tok_state));
+    PyTokenizer_State *tok = (PyTokenizer_State *)PyMem_MALLOC(
+                                            sizeof(PyTokenizer_State));
     if (tok == NULL)
         return NULL;
     tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
@@ -148,7 +148,7 @@ tok_new(void)
 }
 
 static char *
-new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
+new_string(const char *s, Py_ssize_t len, PyTokenizer_State *tok)
 {
     char* result = (char *)PyMem_MALLOC(len + 1);
     if (!result) {
@@ -163,19 +163,19 @@ new_string(const char *s, Py_ssize_t len
 #ifdef PGEN
 
 static char *
-decoding_fgets(char *s, int size, struct tok_state *tok)
+decoding_fgets(char *s, int size, PyTokenizer_State *tok)
 {
     return fgets(s, size, tok->fp);
 }
 
 static int
-decoding_feof(struct tok_state *tok)
+decoding_feof(PyTokenizer_State *tok)
 {
     return feof(tok->fp);
 }
 
 static char *
-decode_str(const char *str, int exec_input, struct tok_state *tok)
+decode_str(const char *str, int exec_input, PyTokenizer_State *tok)
 {
     return new_string(str, strlen(str), tok);
 }
@@ -183,7 +183,7 @@ decode_str(const char *str, int exec_inp
 #else /* PGEN */
 
 static char *
-error_ret(struct tok_state *tok) /* XXX */
+error_ret(PyTokenizer_State *tok) /* XXX */
 {
     tok->decoding_erred = 1;
     if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
@@ -225,7 +225,7 @@ get_normal_name(char *s)        /* for u
 /* Return the coding spec in S, or NULL if none is found.  */
 
 static int
-get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
+get_coding_spec(const char *s, char **spec, Py_ssize_t size, PyTokenizer_State *tok)
 {
     Py_ssize_t i;
     *spec = NULL;
@@ -278,8 +278,8 @@ get_coding_spec(const char *s, char **sp
    Return 1 on success, 0 on failure.  */
 
 static int
-check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
-                  int set_readline(struct tok_state *, const char *))
+check_coding_spec(const char* line, Py_ssize_t size, PyTokenizer_State *tok,
+                  int set_readline(PyTokenizer_State *, const char *))
 {
     char *cs;
     int r = 1;
@@ -337,10 +337,10 @@ check_coding_spec(const char* line, Py_s
    Return 1 on success, 0 on failure.  */
 
 static int
-check_bom(int get_char(struct tok_state *),
-          void unget_char(int, struct tok_state *),
-          int set_readline(struct tok_state *, const char *),
-          struct tok_state *tok)
+check_bom(int get_char(PyTokenizer_State *),
+          void unget_char(int, PyTokenizer_State *),
+          int set_readline(PyTokenizer_State *, const char *),
+          PyTokenizer_State *tok)
 {
     int ch1, ch2, ch3;
     ch1 = get_char(tok);
@@ -414,7 +414,7 @@ check_bom(int get_char(struct tok_state 
 */
 
 static char *
-fp_readl(char *s, int size, struct tok_state *tok)
+fp_readl(char *s, int size, PyTokenizer_State *tok)
 {
     PyObject* bufobj;
     const char *buf;
@@ -485,7 +485,7 @@ error:
    Return 1 on success, 0 on failure. */
 
 static int
-fp_setreadl(struct tok_state *tok, const char* enc)
+fp_setreadl(PyTokenizer_State *tok, const char* enc)
 {
     PyObject *readline = NULL, *stream = NULL, *io = NULL;
     _Py_IDENTIFIER(open);
@@ -533,13 +533,13 @@ fp_setreadl(struct tok_state *tok, const
 
 /* Fetch the next byte from TOK. */
 
-static int fp_getc(struct tok_state *tok) {
+static int fp_getc(PyTokenizer_State *tok) {
     return getc(tok->fp);
 }
 
 /* Unfetch the last byte back into TOK.  */
 
-static void fp_ungetc(int c, struct tok_state *tok) {
+static void fp_ungetc(int c, PyTokenizer_State *tok) {
     ungetc(c, tok->fp);
 }
 
@@ -575,7 +575,7 @@ static int valid_utf8(const unsigned cha
    if necessary.  */
 
 static char *
-decoding_fgets(char *s, int size, struct tok_state *tok)
+decoding_fgets(char *s, int size, PyTokenizer_State *tok)
 {
     char *line = NULL;
     int badchar = 0;
@@ -632,7 +632,7 @@ decoding_fgets(char *s, int size, struct
 }
 
 static int
-decoding_feof(struct tok_state *tok)
+decoding_feof(PyTokenizer_State *tok)
 {
     if (tok->decoding_state != STATE_NORMAL) {
         return feof(tok->fp);
@@ -654,14 +654,14 @@ decoding_feof(struct tok_state *tok)
 /* Fetch a byte from TOK, using the string buffer. */
 
 static int
-buf_getc(struct tok_state *tok) {
+buf_getc(PyTokenizer_State *tok) {
     return Py_CHARMASK(*tok->str++);
 }
 
 /* Unfetch a byte from TOK, using the string buffer. */
 
 static void
-buf_ungetc(int c, struct tok_state *tok) {
+buf_ungetc(int c, PyTokenizer_State *tok) {
     tok->str--;
     assert(Py_CHARMASK(*tok->str) == c);        /* tok->cur may point to read-only segment */
 }
@@ -670,7 +670,7 @@ buf_ungetc(int c, struct tok_state *tok)
    tokenizer, this means to just record the encoding. */
 
 static int
-buf_setreadl(struct tok_state *tok, const char* enc) {
+buf_setreadl(PyTokenizer_State *tok, const char* enc) {
     tok->enc = enc;
     return 1;
 }
@@ -691,7 +691,7 @@ translate_into_utf8(const char* str, con
 
 
 static char *
-translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+translate_newlines(const char *s, int exec_input, PyTokenizer_State *tok) {
     int skip_next_lf = 0;
     size_t needed_length = strlen(s) + 2, final_length;
     char *buf, *current;
@@ -736,7 +736,7 @@ translate_newlines(const char *s, int ex
    inside TOK.  */
 
 static const char *
-decode_str(const char *input, int single, struct tok_state *tok)
+decode_str(const char *input, int single, PyTokenizer_State *tok)
 {
     PyObject* utf8 = NULL;
     const char *str;
@@ -795,10 +795,10 @@ decode_str(const char *input, int single
 
 /* Set up tokenizer for string */
 
-struct tok_state *
+PyTokenizer_State *
 PyTokenizer_FromString(const char *str, int exec_input)
 {
-    struct tok_state *tok = tok_new();
+    PyTokenizer_State *tok = tok_new();
     if (tok == NULL)
         return NULL;
     str = decode_str(str, exec_input, tok);
@@ -812,10 +812,10 @@ PyTokenizer_FromString(const char *str, 
     return tok;
 }
 
-struct tok_state *
+PyTokenizer_State *
 PyTokenizer_FromUTF8(const char *str, int exec_input)
 {
-    struct tok_state *tok = tok_new();
+    PyTokenizer_State *tok = tok_new();
     if (tok == NULL)
         return NULL;
 #ifndef PGEN
@@ -843,11 +843,11 @@ PyTokenizer_FromUTF8(const char *str, in
 
 /* Set up tokenizer for file */
 
-struct tok_state *
+PyTokenizer_State *
 PyTokenizer_FromFile(FILE *fp, const char* enc,
                      const char *ps1, const char *ps2)
 {
-    struct tok_state *tok = tok_new();
+    PyTokenizer_State *tok = tok_new();
     if (tok == NULL)
         return NULL;
     if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
@@ -877,7 +877,7 @@ PyTokenizer_FromFile(FILE *fp, const cha
 /* Free a tok_state structure */
 
 void
-PyTokenizer_Free(struct tok_state *tok)
+PyTokenizer_Free(PyTokenizer_State *tok)
 {
     if (tok->encoding != NULL)
         PyMem_FREE(tok->encoding);
@@ -896,7 +896,7 @@ PyTokenizer_Free(struct tok_state *tok)
 /* Get next char, updating state; error code goes into tok->done */
 
 static int
-tok_nextc(struct tok_state *tok)
+tok_nextc(PyTokenizer_State *tok)
 {
     for (;;) {
         if (tok->cur != tok->inp) {
@@ -1093,7 +1093,7 @@ tok_nextc(struct tok_state *tok)
 /* Back-up one character */
 
 static void
-tok_backup(struct tok_state *tok, int c)
+tok_backup(PyTokenizer_State *tok, int c)
 {
     if (c != EOF) {
         if (--tok->cur < tok->buf)
@@ -1276,7 +1276,7 @@ PyToken_ThreeChars(int c1, int c2, int c
 }
 
 static int
-indenterror(struct tok_state *tok)
+indenterror(PyTokenizer_State *tok)
 {
     if (tok->alterror) {
         tok->done = E_TABSPACE;
@@ -1303,7 +1303,7 @@ indenterror(struct tok_state *tok)
    All identifier strings are guaranteed to be "ready" unicode objects.
  */
 static int
-verify_identifier(struct tok_state *tok)
+verify_identifier(PyTokenizer_State *tok)
 {
     PyObject *s;
     int result;
@@ -1328,7 +1328,7 @@ verify_identifier(struct tok_state *tok)
 /* Get next token, after space stripping etc. */
 
 static int
-tok_get(struct tok_state *tok, char **p_start, char **p_end)
+tok_get(PyTokenizer_State *tok, char **p_start, char **p_end)
 {
     int c;
     int blankline, nonascii;
@@ -1739,7 +1739,7 @@ tok_get(struct tok_state *tok, char **p_
 }
 
 int
-PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
+PyTokenizer_Get(PyTokenizer_State *tok, char **p_start, char **p_end)
 {
     int result = tok_get(tok, p_start, p_end);
     if (tok->decoding_erred) {
@@ -1762,7 +1762,7 @@ PyTokenizer_Get(struct tok_state *tok, c
 char *
 PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
 {
-    struct tok_state *tok;
+    PyTokenizer_State *tok;
     FILE *fp;
     char *p_start =NULL , *p_end =NULL , *encoding = NULL;
 
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
deleted file mode 100644
--- a/Parser/tokenizer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef Py_TOKENIZER_H
-#define Py_TOKENIZER_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "object.h"
-
-/* Tokenizer interface */
-
-#include "token.h"      /* For token types */
-
-#define MAXINDENT 100   /* Max indentation level */
-
-enum decoding_state {
-    STATE_INIT,
-    STATE_RAW,
-    STATE_NORMAL        /* have a codec associated with input */
-};
-
-/* Tokenizer state */
-struct tok_state {
-    /* Input state; buf <= cur <= inp <= end */
-    /* NB an entire line is held in the buffer */
-    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
-    char *cur;          /* Next character in buffer */
-    char *inp;          /* End of data in buffer */
-    char *end;          /* End of input buffer if buf != NULL */
-    char *start;        /* Start of current token if not NULL */
-    int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
-    /* NB If done != E_OK, cur must be == inp!!! */
-    FILE *fp;           /* Rest of input; NULL if tokenizing a string */
-    int tabsize;        /* Tab spacing */
-    int indent;         /* Current indentation index */
-    int indstack[MAXINDENT];            /* Stack of indents */
-    int atbol;          /* Nonzero if at begin of new line */
-    int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
-    const char *prompt, *nextprompt;          /* For interactive prompting */
-    int lineno;         /* Current line number */
-    int level;          /* () [] {} Parentheses nesting level */
-            /* Used to allow free continuations inside them */
-    /* Stuff for checking on different tab sizes */
-#ifndef PGEN
-    /* pgen doesn't have access to Python codecs, it cannot decode the input
-       filename. The bytes filename might be kept, but it is only used by
-       indenterror() and it is not really needed: pgen only compiles one file
-       (Grammar/Grammar). */
-    PyObject *filename;
-#endif
-    int altwarning;     /* Issue warning if alternate tabs don't match */
-    int alterror;       /* Issue error if alternate tabs don't match */
-    int alttabsize;     /* Alternate tab spacing */
-    int altindstack[MAXINDENT];         /* Stack of alternate indents */
-    /* Stuff for PEP 0263 */
-    enum decoding_state decoding_state;
-    int decoding_erred;         /* whether erred in decoding  */
-    int read_coding_spec;       /* whether 'coding:...' has been read  */
-    char *encoding;         /* Source encoding. */
-    int cont_line;          /* whether we are in a continuation line. */
-    const char* line_start;     /* pointer to start of current line */
-#ifndef PGEN
-    PyObject *decoding_readline; /* open(...).readline */
-    PyObject *decoding_buffer;
-#endif
-    const char* enc;        /* Encoding for the current str. */
-    const char* str;
-    const char* input; /* Tokenizer's newline translated copy of the string. */
-};
-
-extern struct tok_state *PyTokenizer_FromString(const char *, int);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
-extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
-                                              const char *, const char *);
-extern void PyTokenizer_Free(struct tok_state *);
-extern int PyTokenizer_Get(struct tok_state *, char **, char **);
-extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
-                                          int len, int *offset);
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_TOKENIZER_H */