Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 75201) +++ Python/pythonrun.c (working copy) @@ -1296,7 +1296,7 @@ if (arena == NULL) return NULL; - mod = PyParser_ASTFromString(str, "", start, flags, arena); + mod = PyParser_ASTFromString(str, "", NULL, start, flags, arena); if (mod != NULL) ret = run_mod(mod, "", globals, locals, flags, arena); PyArena_Free(arena); @@ -1373,7 +1373,8 @@ } PyObject * -Py_CompileStringFlags(const char *str, const char *filename, int start, +Py_CompileStringFlags(const char *str, const char *filename, + const char *encoding, int start, PyCompilerFlags *flags) { PyCodeObject *co; @@ -1382,7 +1383,7 @@ if (arena == NULL) return NULL; - mod = PyParser_ASTFromString(str, filename, start, flags, arena); + mod = PyParser_ASTFromString(str, filename, encoding, start, flags, arena); if (mod == NULL) { PyArena_Free(arena); return NULL; @@ -1409,7 +1410,7 @@ flags.cf_flags = 0; - mod = PyParser_ASTFromString(str, filename, start, &flags, arena); + mod = PyParser_ASTFromString(str, filename, NULL, start, &flags, arena); if (mod == NULL) { PyArena_Free(arena); return NULL; @@ -1421,7 +1422,8 @@ /* Preferred access to parser is through AST. */ mod_ty -PyParser_ASTFromString(const char *s, const char *filename, int start, +PyParser_ASTFromString(const char *s, const char *filename, + const char *encoding, int start, PyCompilerFlags *flags, PyArena *arena) { mod_ty mod; @@ -1429,7 +1431,7 @@ perrdetail err; int iflags = PARSER_FLAGS(flags); - node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, + node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, encoding, &_PyParser_Grammar, start, &err, &iflags); if (flags == NULL) { @@ -1941,7 +1943,7 @@ PyAPI_FUNC(PyObject *) Py_CompileString(const char *str, const char *p, int s) { - return Py_CompileStringFlags(str, p, s, NULL); + return Py_CompileStringFlags(str, p, NULL, s, NULL); } #undef PyRun_InteractiveOne Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (revision 75201) +++ Python/bltinmodule.c (working copy) @@ -467,14 +467,16 @@ int supplied_flags = 0; PyCompilerFlags cf; PyObject *result = NULL, *cmd, *tmp = NULL; + char *encoding = NULL; Py_ssize_t length; static char *kwlist[] = {"source", "filename", "mode", "flags", - "dont_inherit", NULL}; + "dont_inherit", "encoding", NULL}; int start[] = {Py_file_input, Py_eval_input, Py_single_input}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|iis:compile", kwlist, &cmd, &filename, &startstr, - &supplied_flags, &dont_inherit)) + &supplied_flags, &dont_inherit, + &encoding)) return NULL; cf.cf_flags = supplied_flags; @@ -528,11 +530,19 @@ #ifdef Py_USING_UNICODE if (PyUnicode_Check(cmd)) { - tmp = PyUnicode_AsUTF8String(cmd); - if (tmp == NULL) - return NULL; - cmd = tmp; - cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + if (encoding) { + tmp = PyUnicode_AsEncodedString(cmd, encoding, "strict"); + if (tmp == NULL) + return NULL; + cmd = tmp; + } + else { + tmp = PyUnicode_AsUTF8String(cmd); + if (tmp == NULL) + return NULL; + cmd = tmp; + cf.cf_flags |= PyCF_SOURCE_IS_UTF8; + } } #endif @@ -543,14 +553,14 @@ "compile() expected string without null bytes"); goto cleanup; } - result = Py_CompileStringFlags(str, filename, start[mode], &cf); + result = Py_CompileStringFlags(str, filename, encoding, start[mode], &cf); cleanup: Py_XDECREF(tmp); return result; } PyDoc_STRVAR(compile_doc, -"compile(source, filename, mode[, flags[, dont_inherit]]) -> code object\n\ +"compile(source, filename, mode[, flags[, dont_inherit[, encoding]]]) -> code object\n\ \n\ Compile the source string (a Python module, statement or expression)\n\ into a code object that can be executed by the exec statement or eval().\n\ @@ -562,7 +572,8 @@ The dont_inherit argument, if non-zero, stops the compilation inheriting\n\ the effects of any future statements in effect in the code calling\n\ compile; if absent or zero these statements do influence the compilation,\n\ -in addition to any features explicitly specified."); +in addition to any features explicitly specified.\n\ +The encoding argument specifys context encoding."); static PyObject * builtin_dir(PyObject *self, PyObject *args) Index: Include/parsetok.h =================================================================== --- Include/parsetok.h (revision 75201) +++ Include/parsetok.h (working copy) @@ -52,8 +52,9 @@ perrdetail *, int); PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, const char *, + const char *, grammar *, int, - perrdetail *, int *); + perrdetail *, int *); /* Note that he following function is defined in pythonrun.c not parsetok.c. */ PyAPI_FUNC(void) PyParser_SetError(perrdetail *); Index: Include/pythonrun.h =================================================================== --- Include/pythonrun.h (revision 75201) +++ Include/pythonrun.h (working copy) @@ -39,7 +39,8 @@ PyAPI_FUNC(int) PyRun_InteractiveOneFlags(FILE *, const char *, PyCompilerFlags *); PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, +PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, + const char *, int, PyCompilerFlags *flags, PyArena *); PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, @@ -62,8 +63,9 @@ PyObject *, PyObject *, int, PyCompilerFlags *); -#define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, s, NULL) -PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, int, +#define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, NULL, s, NULL) +PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, + const char *, int, PyCompilerFlags *); PyAPI_FUNC(struct symtable *) Py_SymtableString(const char *, const char *, int); Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (revision 75201) +++ Parser/tokenizer.c (working copy) @@ -130,6 +130,17 @@ return tok; } +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + #ifdef PGEN static char * @@ -163,17 +174,6 @@ } static char * -new_string(const char *s, Py_ssize_t len) -{ - char* result = (char *)PyMem_MALLOC(len + 1); - if (result != NULL) { - memcpy(result, s, len); - result[len] = '\0'; - } - return result; -} - -static char * get_normal_name(char *s) /* for utf-8 and latin-1 */ { char buf[13]; @@ -587,8 +587,8 @@ str = tok->str; /* string after BOM if any */ assert(str); #ifdef Py_USING_UNICODE - if (tok->enc != NULL) { - utf8 = translate_into_utf8(str, tok->enc); + if (tok->encoding != NULL) { + utf8 = translate_into_utf8(str, tok->encoding); if (utf8 == NULL) return error_ret(tok); str = PyString_AsString(utf8); @@ -634,11 +634,15 @@ /* Set up tokenizer for string */ struct tok_state * -PyTokenizer_FromString(const char *str) +PyTokenizer_FromString(const char *str, const char *encoding) { struct tok_state *tok = tok_new(); if (tok == NULL) return NULL; + if (encoding) { + tok->enc = encoding; + tok->encoding = new_string(encoding, strlen(encoding)); + } str = (char *)decode_str(str, tok); if (str == NULL) { PyTokenizer_Free(tok); @@ -646,7 +650,7 @@ } /* XXX: constify members. */ - tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; return tok; } Index: Parser/tokenizer.h =================================================================== --- Parser/tokenizer.h (revision 75201) +++ Parser/tokenizer.h (working copy) @@ -54,7 +54,7 @@ const char* str; }; -extern struct tok_state *PyTokenizer_FromString(const char *); +extern struct tok_state *PyTokenizer_FromString(const char *, const char *); extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); Index: Parser/parsetok.c =================================================================== --- Parser/parsetok.c (revision 75201) +++ Parser/parsetok.c (working copy) @@ -38,20 +38,20 @@ perrdetail *err_ret, int flags) { int iflags = flags; - return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + return PyParser_ParseStringFlagsFilenameEx(s, filename, NULL, g, start, err_ret, &iflags); } node * PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, - grammar *g, int start, + const char *encoding, grammar *g, int start, perrdetail *err_ret, int *flags) { struct tok_state *tok; initerr(err_ret, filename); - if ((tok = PyTokenizer_FromString(s)) == NULL) { + if ((tok = PyTokenizer_FromString(s, encoding)) == NULL) { err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; return NULL; } @@ -257,7 +257,7 @@ goto done; } strcpy(r->n_str, tok->encoding); - PyMem_FREE(tok->encoding); + PyMem_FREE((char*)tok->encoding); tok->encoding = NULL; r->n_nchildren = 1; r->n_child = n; Index: Lib/distutils/archive_util.py =================================================================== --- Lib/distutils/archive_util.py (revision 75201) +++ Lib/distutils/archive_util.py (working copy) @@ -21,7 +21,7 @@ try: from grp import getgrnam -except AttributeError: +except (AttributeError, ImportError): getgrnam = None def _get_gid(name): Index: Lib/test/test_builtin.py =================================================================== --- Lib/test/test_builtin.py (revision 75201) +++ Lib/test/test_builtin.py (working copy) @@ -222,6 +222,18 @@ self.assertRaises(ValueError, compile, unicode('a = 1'), 'f', 'bad') + def test_compile_encoding(self): + code = compile("s = u'\x82\xa0'", "test1", "single", encoding='Shift_JIS') + d = {} + exec code in d + self.assertEqual(d['s'], u'\u3042') + + code = compile(u"s = b'\u3042'", "test2", "single", encoding='Shift_JIS') + d = {} + exec code in d + self.assertEqual(d['s'], '\x82\xa0') + + def test_delattr(self): import sys sys.spam = 1 Index: Modules/parsermodule.c =================================================================== --- Modules/parsermodule.c (revision 75201) +++ Modules/parsermodule.c (working copy) @@ -549,7 +549,7 @@ static char *keywords[] = {"source", NULL}; if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) { - node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL, + node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL, NULL, &_PyParser_Grammar, (type == PyST_EXPR) ? eval_input : file_input,