Index: Makefile.pre.in =================================================================== RCS file: /cvsroot/python/python/dist/src/Makefile.pre.in,v retrieving revision 1.83 diff -u -r1.83 Makefile.pre.in --- Makefile.pre.in 8 May 2002 08:59:59 -0000 1.83 +++ Makefile.pre.in 9 May 2002 13:36:24 -0000 @@ -190,15 +190,15 @@ Parser/node.o \ Parser/parser.o \ Parser/parsetok.o \ - Parser/tokenizer.o \ Parser/bitset.o \ Parser/metagrammar.o -PARSER_OBJS= $(POBJS) Parser/myreadline.o +PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o PGOBJS= \ Objects/obmalloc.o \ Python/mysnprintf.o \ + Parser/tokenizer_pgen.o \ Parser/firstsets.o \ Parser/grammar.o \ Parser/pgen.o \ @@ -415,6 +415,8 @@ $(srcdir)/Include/token.h \ $(srcdir)/Include/grammar.h Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c + +Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c Python/compile.o Python/symtable.o: $(GRAMMAR_H) Index: Grammar/Grammar =================================================================== RCS file: /cvsroot/python/python/dist/src/Grammar/Grammar,v retrieving revision 1.45 diff -u -r1.45 Grammar --- Grammar/Grammar 15 Oct 2001 15:44:04 -0000 1.45 +++ Grammar/Grammar 9 May 2002 13:36:24 -0000 @@ -100,3 +100,6 @@ list_iter: list_for | list_if list_for: 'for' exprlist 'in' testlist_safe [list_iter] list_if: 'if' test [list_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME Index: Include/errcode.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/errcode.h,v retrieving revision 2.14 diff -u -r2.14 errcode.h --- Include/errcode.h 1 Sep 2000 23:29:26 -0000 2.14 +++ Include/errcode.h 9 May 2002 13:36:24 -0000 @@ -25,6 +25,7 @@ #define E_OVERFLOW 19 /* Node had too many children */ #define E_TOODEEP 20 /* Too many indentation levels */ #define E_DEDENT 21 /* No matching outer block for dedent */ +#define E_DECODE 22 /* Error in decoding into Unicode */ #ifdef __cplusplus } Index: Include/graminit.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/graminit.h,v retrieving revision 2.18 diff -u -r2.18 graminit.h --- Include/graminit.h 15 Oct 2001 15:44:04 -0000 2.18 +++ Include/graminit.h 9 May 2002 13:36:24 -0000 @@ -64,3 +64,4 @@ #define list_iter 319 #define list_for 320 #define list_if 321 +#define encoding_decl 322 Index: Parser/parsetok.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/parsetok.c,v retrieving revision 2.30 diff -u -r2.30 parsetok.c --- Parser/parsetok.c 22 Mar 2002 23:53:03 -0000 2.30 +++ Parser/parsetok.c 9 May 2002 13:36:26 -0000 @@ -8,6 +8,7 @@ #include "parser.h" #include "parsetok.h" #include "errcode.h" +#include "graminit.h" int Py_TabcheckFlag; @@ -36,8 +37,8 @@ return NULL; } + tok->filename = ""; if (Py_TabcheckFlag || Py_VerboseFlag) { - tok->filename = ""; tok->altwarning = (tok->filename != NULL); if (Py_TabcheckFlag >= 2) tok->alterror++; @@ -69,8 +70,8 @@ err_ret->error = E_NOMEM; return NULL; } + tok->filename = filename; if (Py_TabcheckFlag || Py_VerboseFlag) { - tok->filename = filename; tok->altwarning = (filename != NULL); if (Py_TabcheckFlag >= 2) tok->alterror++; @@ -176,6 +177,13 @@ err_ret->text[len] = '\0'; } } + } else if (tok->encoding != NULL) { + node* r = PyNode_New(encoding_decl); + r->n_str = tok->encoding; + r->n_nchildren = 1; + r->n_child = n; + tok->encoding = NULL; + n = r; } PyTokenizer_Free(tok); Index: Parser/tokenizer.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v retrieving revision 2.54 diff -u -r2.54 tokenizer.c --- Parser/tokenizer.c 14 Apr 2002 20:12:41 -0000 2.54 +++ Parser/tokenizer.c 9 May 2002 13:36:26 -0000 @@ -5,10 +5,19 @@ #include "pgenheaders.h" #include +#include #include "tokenizer.h" #include "errcode.h" +#ifndef PGEN +#include "unicodeobject.h" +#include "stringobject.h" +#include "fileobject.h" +#include "codecs.h" +#include "abstract.h" +#endif /* PGEN */ + extern char *PyOS_Readline(char *); /* Return malloc'ed string including trailing \n; empty malloc'ed string for EOF; @@ -114,9 +123,351 @@ tok->alterror = 0; tok->alttabsize = 1; tok->altindstack[0] = 0; + tok->decoding_state = 0; + tok->decoding_erred = 0; + tok->read_coding_spec = 0; + tok->issued_encoding_warning = 0; + tok->encoding = NULL; + tok->decoding_readline = NULL; + tok->decoding_buffer = NULL; return tok; } +#ifdef PGEN + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + return fgets(s, size, tok->fp); +} + +static int +decoding_feof(struct tok_state *tok) +{ + return feof(tok->fp); +} + +static const char * +decode_str(const char *str, struct tok_state *tok) +{ + return str; +} + +#else /* PGEN */ + +static char * +error_ret(struct tok_state *tok) /* XXX */ +{ + tok->decoding_erred = 1; + if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ + PyMem_DEL(tok->buf); + tok->buf = NULL; + return NULL; /* as if it were EOF */ +} + +static char * +new_string(const char *s, int len) +{ + char* result = PyMem_NEW(char, len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + +static char * +get_normal_name(char *s) /* for utf-8 and latin-1 */ +{ + char buf[13]; + int i; + for (i = 0; i < 12; i++) { + int c = s[i]; + if (c == '\0') break; + else if (c == '_') buf[i] = '-'; + else buf[i] = tolower(c); + } + buf[i] = '\0'; + if (strcmp(buf, "utf-8") == 0 || + strncmp(buf, "utf-8-", 6) == 0) return "utf-8"; + else if (strcmp(buf, "latin-1") == 0 || + strcmp(buf, "iso-8859-1") == 0 || + strcmp(buf, "iso-latin-1") == 0 || + strncmp(buf, "latin-1-", 8) == 0 || + strncmp(buf, "iso-8859-1-", 11) == 0 || + strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1"; + else return s; +} + +static char * +get_coding_spec(const char *s, int size) +{ + int i; + for (i = 0; i < size - 6; i++) { /* XXX inefficient search */ + const char* t = s + i; + if (strncmp(t, "coding", 6) == 0) { + const char* begin = NULL; + t += 6; + if (t[0] != ':' && t[0] != '=') continue; + do t++; while (t[0] == '\x20' || t[0] == '\t'); + begin = t; + while (isalnum(t[0]) || t[0] == '-' || t[0] == '_' || + t[0] == '.') t++; + if (begin < t) { + char* r = new_string(begin, t - begin); + char* q = get_normal_name(r); + if (r != q) { + assert(strlen(r) >= strlen(q)); + strcpy(r, q); + } + return r; + } + } + } + return NULL; +} + +static int +check_coding_spec(const char* line, int size, struct tok_state *tok, + int set_readline(struct tok_state *, const char *)) +{ + int r = 1; + char* cs = get_coding_spec(line, size); + if (cs != NULL) { + tok->read_coding_spec = 1; + if (tok->encoding == NULL) { + assert(tok->decoding_state == 1); /* raw */ + if (strcmp(cs, "utf-8") == 0 || + strcmp(cs, "iso-8859-1") == 0) { + tok->encoding = cs; + } else { + r = set_readline(tok, cs); + if (r) { + tok->encoding = cs; + tok->decoding_state = -1; + } + } + } else { /* then, compare cs with BOM */ + r = (strcmp(tok->encoding, cs) == 0); + PyMem_DEL(cs); + } + } + return r; +} + +static int +check_bom(int get_char(struct tok_state *), + void unget_char(int, struct tok_state *), + int set_readline(struct tok_state *, const char *), + struct tok_state *tok) +{ + int ch = get_char(tok); + tok->decoding_state = 1; + if (ch == EOF) { + return 1; + } else if (ch == 0xEF) { + ch = get_char(tok); if (ch != 0xBB) goto NON_BOM; + ch = get_char(tok); if (ch != 0xBF) goto NON_BOM; + } else if (ch == 0xFE) { + ch = get_char(tok); if (ch != 0xFF) goto NON_BOM; + if (!set_readline(tok, "utf-16-be")) return 0; + tok->decoding_state = -1; + } else if (ch == 0xFF) { + ch = get_char(tok); if (ch != 0xFE) goto NON_BOM; + if (!set_readline(tok, "utf-16-le")) return 0; + tok->decoding_state = -1; + } else { + unget_char(ch, tok); + return 1; + } + tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ + return 1; + NON_BOM: + /* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */ + unget_char(0xFF, tok); /* XXX this will case a syntax error */ + return 1; +} + +static char * +fp_readl(char *s, int size, struct tok_state *tok) +{ + PyObject* utf8; + PyObject* bf = tok->decoding_buffer; + if (bf == NULL) { + bf = PyObject_CallObject(tok->decoding_readline, NULL); + if (bf == NULL) return error_ret(tok); + } else { + tok->decoding_buffer = NULL; + } + utf8 = PyUnicode_AsUTF8String(bf); + Py_DECREF(bf); + if (utf8 == NULL) return error_ret(tok); + else { + const char* str = PyString_AsString(utf8); + assert(strlen(str) < size); /* XXX */ + strcpy(s, str); + Py_DECREF(utf8); + if (s[0] == '\0') return NULL; /* EOF */ + return s; + } +} + +static int +fp_setreadl(struct tok_state *tok, const char* enc) +{ + PyObject *reader, *stream, *readline; + + stream = PyFile_FromFile(tok->fp, tok->filename, "rb", NULL); + if (stream == NULL) return 0; + + reader = PyCodec_StreamReader(enc, stream, NULL); + Py_DECREF(stream); + if (reader == NULL) return 0; + + readline = PyObject_GetAttrString(reader, "readline"); + Py_DECREF(reader); + if (readline == NULL) return 0; + + tok->decoding_readline = readline; + return 1; +} + +static int fp_getc(struct tok_state *tok) { + return getc(tok->fp); +} + +static void fp_ungetc(int c, struct tok_state *tok) { + ungetc(c, tok->fp); +} + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + char *line; + int warn = 0, badchar = 0; + for (;;) + if (tok->decoding_state < 0) { + line = fp_readl(s, size, tok); + break; + } else if (tok->decoding_state > 0) { + line = Py_UniversalNewlineFgets(s, size, + tok->fp, NULL); + warn = 1; + break; + } else { + if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) + return error_ret(tok); + assert(tok->decoding_state != 0); + } + if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { + if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { + return error_ret(tok); + } + } +#ifndef PGEN + if (warn && line && !tok->issued_encoding_warning && !tok->encoding) { + unsigned char *c; + for (c = line; *c; c++) + if (*c > 127) { + badchar = *c; + break; + } + } + if (badchar) { + char buf[200]; + sprintf(buf, "Non-ASCII character '\\x%.2x', " + "but no declared encoding", badchar); + PyErr_WarnExplicit(PyExc_DeprecationWarning, + buf, tok->filename, tok->lineno, + NULL, NULL); + tok->issued_encoding_warning = 1; + } +#endif + return line; +} + +static int +decoding_feof(struct tok_state *tok) +{ + if (tok->decoding_state >= 0) { + return feof(tok->fp); + } else { + PyObject* bf = tok->decoding_buffer; + if (bf == NULL) { + bf = PyObject_CallObject(tok->decoding_readline, NULL); + if (bf == NULL) { + error_ret(tok); + return 1; + } else { + tok->decoding_buffer = bf; + } + } + return PyObject_Length(bf) == 0; + } +} + +static int bf_getc(struct tok_state *tok) { + return *tok->str++; +} +static void bf_ungetc(int c, struct tok_state *tok) { + tok->str--; + assert(*tok->str == c); /* tok->cur may point to read-only segment */ +} +static int bf_setreadl(struct tok_state *tok, const char* enc) { + tok->enc = enc; + return 1; +} +static PyObject * +translate_into_utf8(const char* str, const char* enc) { + PyObject *utf8; + PyObject* bf = PyUnicode_Decode(str, strlen(str), enc, NULL); + if (bf == NULL) + return NULL; + utf8 = PyUnicode_AsUTF8String(bf); + Py_DECREF(bf); + return utf8; +} +static const char * +decode_str(const char *str, struct tok_state *tok) +{ + PyObject* utf8 = NULL; + const char *s; + int lineno = 0; + tok->enc = NULL; + tok->str = str; + if (!check_bom(bf_getc, bf_ungetc, bf_setreadl, tok)) + return NULL; + str = tok->str; /* string after BOM if any */ + assert(r); + if (tok->enc != NULL) { + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return NULL; + str = PyString_AsString(utf8); + } + for (s = str;; s++) { + if (*s == '\0') break; + else if (*s == '\n') { + lineno++; + if (lineno == 2) break; + } + } + tok->enc = NULL; + if (!check_coding_spec(str, s - str, tok, bf_setreadl)) + return NULL; + if (tok->enc != NULL) { + assert(utf8 == NULL); + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return NULL; + str = PyString_AsString(utf8); + } + assert(tok->decoding_buffer == NULL); + tok->decoding_buffer = utf8; /* CAUTION */ + return str; +} + +#endif /* PGEN */ /* Set up tokenizer for string */ @@ -126,6 +477,9 @@ struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; + str = (char *)decode_str(str, tok); + if (str == NULL) + return NULL; tok->buf = tok->cur = tok->end = tok->inp = str; return tok; } @@ -157,6 +511,10 @@ void PyTokenizer_Free(struct tok_state *tok) { + if (tok->encoding != NULL) + PyMem_DEL(tok->encoding); + Py_XDECREF(tok->decoding_readline); + Py_XDECREF(tok->decoding_buffer); if (tok->fp != NULL && tok->buf != NULL) PyMem_DEL(tok->buf); PyMem_DEL(tok); @@ -246,8 +604,8 @@ } tok->end = tok->buf + BUFSIZ; } - if (Py_UniversalNewlineFgets(tok->buf, (int)(tok->end - tok->buf), - tok->fp, NULL) == NULL) { + if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), + tok) == NULL) { tok->done = E_EOF; done = 1; } @@ -259,7 +617,7 @@ } else { cur = tok->cur - tok->buf; - if (feof(tok->fp)) { + if (decoding_feof(tok)) { tok->done = E_EOF; done = 1; } @@ -285,9 +643,9 @@ tok->end = tok->buf + newsize; tok->start = curstart < 0 ? NULL : tok->buf + curstart; - if (Py_UniversalNewlineFgets(tok->inp, + if (decoding_fgets(tok->inp, (int)(tok->end - tok->inp), - tok->fp, NULL) == NULL) { + tok) == NULL) { /* Last line does not end in \n, fake one */ strcpy(tok->inp, "\n"); @@ -506,9 +864,8 @@ /* Get next token, after space stripping etc. */ -int -PyTokenizer_Get(register struct tok_state *tok, char **p_start, - char **p_end) +static int +tok_get(register struct tok_state *tok, char **p_start, char **p_end) { register int c; int blankline; @@ -915,6 +1272,16 @@ return PyToken_OneChar(c); } +int +PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) +{ + int result = tok_get(tok, p_start, p_end); + if (tok->decoding_erred) { + result = ERRORTOKEN; + tok->done = E_DECODE; + } + return result; +} #ifdef Py_DEBUG Index: Parser/tokenizer.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.h,v retrieving revision 2.16 diff -u -r2.16 tokenizer.h --- Parser/tokenizer.h 1 Sep 2000 23:29:28 -0000 2.16 +++ Parser/tokenizer.h 9 May 2002 13:36:26 -0000 @@ -4,6 +4,7 @@ extern "C" { #endif +#include "object.h" /* Tokenizer interface */ @@ -38,6 +39,16 @@ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ int altindstack[MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + int decoding_state; /* -1:decoding, 0:init, 1:raw */ + int decoding_erred; /* whether erred in decoding */ + int read_coding_spec; /* whether 'coding:...' has been read */ + int issued_encoding_warning; /* whether non-ASCII warning was issued */ + char *encoding; + PyObject *decoding_readline; /* codecs.open(...).readline */ + PyObject *decoding_buffer; + const char* enc; + const char* str; }; extern struct tok_state *PyTokenizer_FromString(char *); Index: Python/bltinmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/bltinmodule.c,v retrieving revision 2.254 diff -u -r2.254 bltinmodule.c --- Python/bltinmodule.c 29 Apr 2002 21:27:32 -0000 2.254 +++ Python/bltinmodule.c 9 May 2002 13:36:27 -0000 @@ -1453,19 +1453,26 @@ result = PyString_FromStringAndSize(s, (int)(len-1)); } } - PyMem_FREE(s); - return result; - } - if (v != NULL) { f = PySys_GetObject("stdout"); if (f == NULL) { PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); return NULL; } + PyFile_SoftSpace(f, 0); + PyMem_FREE(s); + return result; + } + f = PySys_GetObject("stdout"); + if (f == NULL) { + PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); + return NULL; + } + if (v != NULL) { if (Py_FlushLine() != 0 || PyFile_WriteObject(v, f, Py_PRINT_RAW) != 0) return NULL; } + PyFile_SoftSpace(f, 0); f = PySys_GetObject("stdin"); if (f == NULL) { PyErr_SetString(PyExc_RuntimeError, "lost sys.stdin"); Index: Python/compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.242 diff -u -r2.242 compile.c --- Python/compile.c 26 Apr 2002 01:57:19 -0000 2.242 +++ Python/compile.c 9 May 2002 13:36:29 -0000 @@ -422,6 +422,7 @@ int c_closure; /* Is nested w/freevars? */ struct symtable *c_symtable; /* pointer to module symbol table */ PyFutureFeatures *c_future; /* pointer to module's __future__ */ + char *c_encoding; /* source encoding (a borrowed reference) */ }; static int @@ -1119,6 +1120,23 @@ } static PyObject * +decode_utf8(char **sPtr, char *end, char* encoding) +{ + PyObject *u, *v; + char *s, *t; + t = s = *sPtr; + /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ + while (s < end && (*s & 0x80)) s++; + *sPtr = s; + u = PyUnicode_DecodeUTF8(t, s - t, NULL); + if (u == NULL) + return NULL; + v = PyUnicode_AsEncodedString(u, encoding, NULL); + Py_DECREF(u); + return v; +} + +static PyObject * parsestr(struct compiling *com, char *s) { PyObject *v; @@ -1130,6 +1148,8 @@ int first = *s; int quote = first; int rawmode = 0; + char* encoding = ((com == NULL) ? NULL : com->c_encoding); + int need_encoding; #ifdef Py_USING_UNICODE int unicode = 0; #endif @@ -1174,28 +1194,101 @@ } #ifdef Py_USING_UNICODE if (unicode || Py_UnicodeFlag) { + PyObject *u, *w; + if (encoding == NULL) { + buf = s; + u = NULL; + } else if (strcmp(encoding, "iso-8859-1") == 0) { + buf = s; + u = NULL; + } else { + /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ + u = PyString_FromStringAndSize((char *)NULL, len * 4); + if (u == NULL) + return NULL; + p = buf = PyString_AsString(u); + end = s + len; + while (s < end) { + if (*s == '\\') { + *p++ = *s++; + if (*s & 0x80) { + strcpy(p, "u005c"); + p += 5; + } + } + if (*s & 0x80) { /* XXX inefficient */ + char *r; + int rn, i; + w = decode_utf8(&s, end, "utf-16-be"); + if (w == NULL) { + Py_DECREF(u); + return NULL; + } + r = PyString_AsString(w); + rn = PyString_Size(w); + assert(rn % 2 == 0); + for (i = 0; i < rn; i += 2) { + sprintf(p, "\\u%02x%02x", + r[i + 0] & 0xFF, + r[i + 1] & 0xFF); + p += 6; + } + Py_DECREF(w); + } else { + *p++ = *s++; + } + } + len = p - buf; + } if (rawmode) - v = PyUnicode_DecodeRawUnicodeEscape( - s, len, NULL); + v = PyUnicode_DecodeRawUnicodeEscape(buf, len, NULL); else - v = PyUnicode_DecodeUnicodeEscape( - s, len, NULL); + v = PyUnicode_DecodeUnicodeEscape(buf, len, NULL); + Py_XDECREF(u); if (v == NULL) PyErr_SyntaxLocation(com->c_filename, com->c_lineno); return v; } #endif - if (rawmode || strchr(s, '\\') == NULL) - return PyString_FromStringAndSize(s, len); - v = PyString_FromStringAndSize((char *)NULL, len); + need_encoding = (encoding != NULL && + strcmp(encoding, "utf-8") != 0 && + strcmp(encoding, "iso-8859-1") != 0); + if (rawmode || strchr(s, '\\') == NULL) { + if (need_encoding) { + PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL); + if (u == NULL) + return NULL; + v = PyUnicode_AsEncodedString(u, encoding, NULL); + Py_DECREF(u); + return v; + } else { + return PyString_FromStringAndSize(s, len); + } + } + v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */ + need_encoding ? len * 4 : len); if (v == NULL) return NULL; p = buf = PyString_AsString(v); end = s + len; while (s < end) { if (*s != '\\') { - *p++ = *s++; + ORDINAL: + if (need_encoding && (*s & 0x80)) { + char *r; + int rn; + PyObject* w = decode_utf8(&s, end, encoding); + if (w == NULL) + return NULL; + r = PyString_AsString(w); + rn = PyString_Size(w); + memcpy(p, r, rn); + p += rn; + Py_DECREF(w); + } else { + *p++ = *s++; + } continue; } s++; @@ -1252,8 +1345,8 @@ return NULL; default: *p++ = '\\'; - *p++ = s[-1]; - break; + s--; + goto ORDINAL; } } _PyString_Resize(&v, (int)(p - buf)); @@ -4075,6 +4168,12 @@ PyCodeObject *co; if (!com_init(&sc, filename)) return NULL; + if (TYPE(n) == encoding_decl) { + sc.c_encoding = STR(n); + n = CHILD(n, 0); + } else { + sc.c_encoding = NULL; + } if (base) { sc.c_private = base->c_private; sc.c_symtable = base->c_symtable; @@ -4083,6 +4182,10 @@ || (sc.c_symtable->st_cur->ste_type == TYPE_FUNCTION)) sc.c_nested = 1; sc.c_flags |= base->c_flags & PyCF_MASK; + if (base->c_encoding != NULL) { + assert(sc.c_encoding == NULL); + sc.c_encoding = base->c_encoding; + } } else { sc.c_private = NULL; sc.c_future = PyNode_Future(n, filename); Index: Python/pythonrun.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/pythonrun.c,v retrieving revision 2.160 diff -u -r2.160 pythonrun.c --- Python/pythonrun.c 23 Apr 2002 20:31:01 -0000 2.160 +++ Python/pythonrun.c 9 May 2002 13:36:30 -0000 @@ -1201,6 +1201,7 @@ err_input(perrdetail *err) { PyObject *v, *w, *errtype; + PyObject* u = NULL; char *msg = NULL; errtype = PyExc_SyntaxError; v = Py_BuildValue("(ziiz)", err->filename, @@ -1252,12 +1253,24 @@ errtype = PyExc_IndentationError; msg = "too many levels of indentation"; break; + case E_DECODE: { /* XXX */ + PyThreadState* tstate = PyThreadState_Get(); + PyObject* value = tstate->curexc_value; + if (value != NULL) { + u = PyObject_Repr(value); + if (u != NULL) { + msg = PyString_AsString(u); + break; + } + } + } default: fprintf(stderr, "error=%d\n", err->error); msg = "unknown parsing error"; break; } w = Py_BuildValue("(sO)", msg, v); + Py_XDECREF(u); Py_XDECREF(v); PyErr_SetObject(errtype, w); Py_XDECREF(w); --- /dev/null Sat Mar 23 20:46:34 2002 +++ Parser/tokenizer_pgen.c Thu May 9 13:03:27 2002 @@ -0,0 +1,2 @@ +#define PGEN +#include "tokenizer.c"