Index: Grammar/Grammar
===================================================================
RCS file: /cvsroot/python/python/dist/src/Grammar/Grammar,v
retrieving revision 1.45
diff -c -r1.45 Grammar
*** Grammar/Grammar	15 Oct 2001 15:44:04 -0000	1.45
--- Grammar/Grammar	21 Mar 2002 10:16:54 -0000
***************
*** 100,102 ****
--- 100,105 ----
  list_iter: list_for | list_if
  list_for: 'for' exprlist 'in' testlist_safe [list_iter]
  list_if: 'if' test [list_iter]
+ 
+ # not used in grammar, but may appear as the root of the tree
+ encoding_decl: NAME
\ No newline at end of file
Index: Include/unicodeobject.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Include/unicodeobject.h,v
retrieving revision 2.36
diff -c -r2.36 unicodeobject.h
*** Include/unicodeobject.h	19 Oct 2001 02:01:31 -0000	2.36
--- Include/unicodeobject.h	21 Mar 2002 10:16:55 -0000
***************
*** 718,723 ****
--- 718,730 ----
      const char *errors		/* error handling */
      );
  
+ extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscapeSize(
+     const char *string, 	/* Unicode-Escape encoded string */
+     int length,	 		/* size of string */
+     const char *errors,		/* error handling */
+     int itemsize
+     );
+ 
  extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
      PyObject *unicode	 	/* Unicode object */
      );
***************
*** 733,738 ****
--- 740,752 ----
      const char *string, 	/* Raw-Unicode-Escape encoded string */
      int length,	 		/* size of string */
      const char *errors		/* error handling */
+     );
+ 
+ extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscapeSize(
+     const char *string, 	/* Raw-Unicode-Escape encoded string */
+     int length,	 		/* size of string */
+     const char *errors,		/* error handling */
+     int itemsize
      );
  
  extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
Index: Objects/unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.131
diff -c -r2.131 unicodeobject.c
*** Objects/unicodeobject.c	21 Mar 2002 08:55:28 -0000	2.131
--- Objects/unicodeobject.c	21 Mar 2002 10:16:58 -0000
***************
*** 1548,1559 ****
--- 1548,1572 ----
  					int size,
  					const char *errors)
  {
+     return PyUnicode_DecodeUnicodeEscapeSize(s, size, errors, 1);
+ }
+ 
+ PyObject *PyUnicode_DecodeUnicodeEscapeSize(const char *s,
+ 					    int size,
+ 					    const char *errors,
+ 					    int itemsize)
+ {
      PyUnicodeObject *v;
      Py_UNICODE *p, *buf;
      const char *end;
      char* message;
+     char* narrow = NULL;
      Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
  
+ #define ITEM(p)	 ((itemsize == 1) ? (*(unsigned char*)(p)) : (*(Py_UNICODE*)(p)))
+ #define ITEMINC(p) (((p)+=itemsize),ITEM((p)-itemsize))
+ 
+     assert(itemsize == 1 || itemsize == sizeof(Py_UNICODE));
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
         length after conversion to the true value. */
***************
*** 1564,1585 ****
          return (PyObject *)v;
  
      p = buf = PyUnicode_AS_UNICODE(v);
!     end = s + size;
  
      while (s < end) {
!         unsigned char c;
          Py_UNICODE x;
          int i, digits;
  
          /* Non-escape characters are interpreted as Unicode ordinals */
!         if (*s != '\\') {
!             *p++ = (unsigned char) *s++;
              continue;
          }
  
          /* \ - Escapes */
!         s++;
!         switch (*s++) {
  
          /* \x escapes */
          case '\n': break;
--- 1577,1598 ----
          return (PyObject *)v;
  
      p = buf = PyUnicode_AS_UNICODE(v);
!     end = s + size * itemsize;
  
      while (s < end) {
!         Py_UNICODE c;
          Py_UNICODE x;
          int i, digits;
  
          /* Non-escape characters are interpreted as Unicode ordinals */
!         if (ITEM(s) != '\\') {
!             *p++ = ITEMINC(s);
              continue;
          }
  
          /* \ - Escapes */
!         s += itemsize;
!         switch (ITEMINC(s)) {
  
          /* \x escapes */
          case '\n': break;
***************
*** 1597,1607 ****
          /* \OOO (octal) escapes */
          case '0': case '1': case '2': case '3':
          case '4': case '5': case '6': case '7':
!             x = s[-1] - '0';
!             if ('0' <= *s && *s <= '7') {
!                 x = (x<<3) + *s++ - '0';
!                 if ('0' <= *s && *s <= '7')
!                     x = (x<<3) + *s++ - '0';
              }
              *p++ = x;
              break;
--- 1610,1620 ----
          /* \OOO (octal) escapes */
          case '0': case '1': case '2': case '3':
          case '4': case '5': case '6': case '7':
!             x = ITEM(s-itemsize) - '0';
!             if ('0' <= ITEM(s) && ITEM(s) <= '7') {
!                 x = (x<<3) + ITEMINC(s) - '0';
!                 if ('0' <= ITEM(s) && ITEM(s) <= '7')
!                     x = (x<<3) + ITEMINC(s) - '0';
              }
              *p++ = x;
              break;
***************
*** 1626,1648 ****
          hexescape:
              chr = 0;
              for (i = 0; i < digits; i++) {
!                 c = (unsigned char) s[i];
!                 if (!isxdigit(c)) {
!                     if (unicodeescape_decoding_error(&p, errors, message))
!                         goto onError;
!                     chr = 0xffffffff;
!                     i++;
!                     break;
!                 }
                  chr = (chr<<4) & ~0xF;
                  if (c >= '0' && c <= '9')
                      chr += c - '0';
                  else if (c >= 'a' && c <= 'f')
                      chr += 10 + c - 'a';
!                 else
                      chr += 10 + c - 'A';
!             }
!             s += i;
  	    if (chr == 0xffffffff)
  		    /* _decoding_error will have already written into the
  		       target buffer. */
--- 1639,1661 ----
          hexescape:
              chr = 0;
              for (i = 0; i < digits; i++) {
!                 c = ITEM(s + i * itemsize); /* s[i] */
!                 if (!isxdigit(c)) 
                  chr = (chr<<4) & ~0xF;
                  if (c >= '0' && c <= '9')
                      chr += c - '0';
                  else if (c >= 'a' && c <= 'f')
                      chr += 10 + c - 'a';
!                 else if (c >= 'A' && c <= 'F')
                      chr += 10 + c - 'A';
! 		else {
! 		    if (unicodeescape_decoding_error(&p, errors, message))
!                         goto onError;
!                     chr = 0xffffffff;
!                     i++;
!                     break;
!                 }            }
!             s += i * itemsize;
  	    if (chr == 0xffffffff)
  		    /* _decoding_error will have already written into the
  		       target buffer. */
***************
*** 1690,1707 ****
                      goto ucnhashError;
              }
              if (*s == '{') {
!                 const char *start = s+1;
                  /* look for the closing brace */
                  while (*s != '}' && s < end)
!                     s++;
                  if (s > start && s < end && *s == '}') {
                      /* found a name.  look it up in the unicode database */
                      message = "unknown Unicode character name";
!                     s++;
!                     if (ucnhash_CAPI->getcode(start, s-start-1, &chr))
                          goto store;
                  }
              }
              if (unicodeescape_decoding_error(&p, errors, message))
                  goto onError;
              break;
--- 1703,1738 ----
                      goto ucnhashError;
              }
              if (*s == '{') {
!                 const char *start = s + itemsize;
                  /* look for the closing brace */
                  while (*s != '}' && s < end)
!                     s += itemsize;
                  if (s > start && s < end && *s == '}') {
                      /* found a name.  look it up in the unicode database */
+ 		    if (itemsize == 1) {
+ 			i = (s-start)/itemsize;
+ 			if (narrow)
+ 			    PyMem_DEL(narrow);
+ 			narrow = PyMem_NEW(char, i);
+ 			if (!narrow) {
+ 			    message = "out of memory";
+ 			    goto malformed_ucn;
+ 			}
+ 			while (i-- > 0) {
+ 			    if (ITEM(start + i * itemsize) > 128)
+ 				goto malformed_ucn;
+ 			    narrow[i] = ITEM(start + i * itemsize);
+ 			}
+ 			start = narrow;
+ 		    }
                      message = "unknown Unicode character name";
! 		    i = (s - start) / itemsize;
!                     s += itemsize;
!                     if (ucnhash_CAPI->getcode(start, i, &chr))
                          goto store;
                  }
              }
+  	  malformed_ucn:
              if (unicodeescape_decoding_error(&p, errors, message))
                  goto onError;
              break;
***************
*** 1713,1725 ****
  	    }
  	    else {
  		*p++ = '\\';
! 		*p++ = (unsigned char)s[-1];
  	    }
              break;
          }
      }
      if (_PyUnicode_Resize(&v, (int)(p - buf)))
! 		goto onError;
      return (PyObject *)v;
      
  ucnhashError:
--- 1744,1758 ----
  	    }
  	    else {
  		*p++ = '\\';
! 		*p++ = ITEM(s - itemsize);
  	    }
              break;
          }
      }
+     if (narrow)
+ 	PyMem_DEL(narrow);
      if (_PyUnicode_Resize(&v, (int)(p - buf)))
! 	goto onError;
      return (PyObject *)v;
      
  ucnhashError:
***************
*** 1731,1737 ****
--- 1764,1773 ----
  
  onError:
      Py_XDECREF(v);
+     if (narrow)
+ 	PyMem_DEL(narrow);
      return NULL;
+ #undef ITEM
  }
  
  /* Return a Unicode-Escape string version of the Unicode object.
***************
*** 1900,1910 ****
--- 1936,1957 ----
  					   int size,
  					   const char *errors)
  {
+     return PyUnicode_DecodeRawUnicodeEscapeSize(s, size, errors, 1);
+ }
+ 
+ 
+ PyObject *PyUnicode_DecodeRawUnicodeEscapeSize(const char *s,
+ 					       int size,
+ 					       const char *errors,
+ 					       int itemsize)
+ {
      PyUnicodeObject *v;
      Py_UNICODE *p, *buf;
      const char *end;
      const char *bs;
+ #define ITEM(p)	((itemsize == 1) ? (*(unsigned char*)(p)) : (*(Py_UNICODE*)(p)))
      
+     assert(itemsize == 1 || itemsize == sizeof(Py_UNICODE));
      /* Escaped strings will always be longer than the resulting
         Unicode string, so we start with size here and then reduce the
         length after conversion to the true value. */
***************
*** 1914,1928 ****
      if (size == 0)
  	return (PyObject *)v;
      p = buf = PyUnicode_AS_UNICODE(v);
!     end = s + size;
      while (s < end) {
! 	unsigned char c;
  	Py_UCS4 x;
  	int i;
  
  	/* Non-escape characters are interpreted as Unicode ordinals */
! 	if (*s != '\\') {
! 	    *p++ = (unsigned char)*s++;
  	    continue;
  	}
  
--- 1961,1976 ----
      if (size == 0)
  	return (PyObject *)v;
      p = buf = PyUnicode_AS_UNICODE(v);
!     end = s + size * itemsize;
      while (s < end) {
! 	Py_UNICODE c;
  	Py_UCS4 x;
  	int i;
  
  	/* Non-escape characters are interpreted as Unicode ordinals */
! 	if (ITEM(s) != '\\') {
! 	    *p++ = ITEM(s);
! 	    s += itemsize;
  	    continue;
  	}
  
***************
*** 1930,1951 ****
  	   backslashes if odd */
  	bs = s;
  	for (;s < end;) {
! 	    if (*s != '\\')
  		break;
! 	    *p++ = (unsigned char)*s++;
  	}
! 	if (((s - bs) & 1) == 0 ||
  	    s >= end ||
! 	    *s != 'u') {
  	    continue;
  	}
  	p--;
! 	s++;
  
  	/* \uXXXX with 4 hex digits */
  	for (x = 0, i = 0; i < 4; i++) {
! 	    c = (unsigned char)s[i];
! 	    if (!isxdigit(c)) {
  		if (unicodeescape_decoding_error(&p, errors,
  						 "truncated \\uXXXX"))
  		    goto onError;
--- 1978,2007 ----
  	   backslashes if odd */
  	bs = s;
  	for (;s < end;) {
! 	    if (ITEM(s) != '\\')
  		break;
! 	    *p++ = ITEM(s);
! 	    s += itemsize;
  	}
! 	if ((((s - bs) / itemsize) & 1) == 0 ||
  	    s >= end ||
! 	    ITEM(s) != 'u') {
  	    continue;
  	}
  	p--;
! 	s += itemsize;
  
  	/* \uXXXX with 4 hex digits */
  	for (x = 0, i = 0; i < 4; i++) {
! 	    c = ITEM(s + i*itemsize); /* s[i] */
! 	    x = (x<<4) & ~0xF;
! 	    if (c >= '0' && c <= '9')
! 		x += c - '0';
! 	    else if (c >= 'a' && c <= 'f')
! 		x += 10 + c - 'a';
! 	    else if (c >= 'A' && c <= 'F')
! 		x += 10 + c - 'A';
! 	    else {
  		if (unicodeescape_decoding_error(&p, errors,
  						 "truncated \\uXXXX"))
  		    goto onError;
***************
*** 1953,1967 ****
  		i++;
  		break;
  	    }
- 	    x = (x<<4) & ~0xF;
- 	    if (c >= '0' && c <= '9')
- 		x += c - '0';
- 	    else if (c >= 'a' && c <= 'f')
- 		x += 10 + c - 'a';
- 	    else
- 		x += 10 + c - 'A';
  	}
! 	s += i;
  	if (x != 0xffffffff)
  		*p++ = x;
      }
--- 2009,2016 ----
  		i++;
  		break;
  	    }
  	}
! 	s += i * itemsize;
  	if (x != 0xffffffff)
  		*p++ = x;
      }
***************
*** 1972,1977 ****
--- 2021,2027 ----
   onError:
      Py_XDECREF(v);
      return NULL;
+ #undef ITEM
  }
  
  PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
Index: Parser/parsetok.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/parsetok.c,v
retrieving revision 2.29
diff -c -r2.29 parsetok.c
*** Parser/parsetok.c	5 Jan 2002 21:40:08 -0000	2.29
--- Parser/parsetok.c	21 Mar 2002 10:16:58 -0000
***************
*** 36,43 ****
  		return NULL;
  	}
  
  	if (Py_TabcheckFlag || Py_VerboseFlag) {
- 		tok->filename = "<string>";
  		tok->altwarning = (tok->filename != NULL);
  		if (Py_TabcheckFlag >= 2)
  			tok->alterror++;
--- 36,43 ----
  		return NULL;
  	}
  
+ 	tok->filename = "<string>";
  	if (Py_TabcheckFlag || Py_VerboseFlag) {
  		tok->altwarning = (tok->filename != NULL);
  		if (Py_TabcheckFlag >= 2)
  			tok->alterror++;
***************
*** 69,76 ****
  		err_ret->error = E_NOMEM;
  		return NULL;
  	}
  	if (Py_TabcheckFlag || Py_VerboseFlag) {
- 		tok->filename = filename;
  		tok->altwarning = (filename != NULL);
  		if (Py_TabcheckFlag >= 2)
  			tok->alterror++;
--- 69,76 ----
  		err_ret->error = E_NOMEM;
  		return NULL;
  	}
+ 	tok->filename = filename;
  	if (Py_TabcheckFlag || Py_VerboseFlag) {
  		tok->altwarning = (filename != NULL);
  		if (Py_TabcheckFlag >= 2)
  			tok->alterror++;
***************
*** 170,175 ****
--- 170,180 ----
  				err_ret->text[len] = '\0';
  			}
  		}
+ 	}
+ 	else if (tok->encoding) {
+ 		node* _Py_AddEncoding(node*, char *);
+ 		n = _Py_AddEncoding(n, tok->encoding);
+ 		tok->encoding = 0; /* XXX PyMem */
  	}
  
  	PyTokenizer_Free(tok);
Index: Parser/pgenmain.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/pgenmain.c,v
retrieving revision 2.24
diff -c -r2.24 pgenmain.c
*** Parser/pgenmain.c	11 Sep 2001 16:43:16 -0000	2.24
--- Parser/pgenmain.c	21 Mar 2002 10:16:58 -0000
***************
*** 22,27 ****
--- 22,39 ----
  int Py_DebugFlag;
  int Py_VerboseFlag;
  
+ struct node*
+ _Py_AddEncoding(struct node* n, char* encoding)
+ {
+ 	return n;
+ }
+ 
+ int
+ _Py_WarnBadEncoding(char* file, int line, char* start, char* end, char* enc)
+ {
+ 	return 0;
+ }
+ 
  /* Forward */
  grammar *getgrammar(char *filename);
  #ifdef THINK_C
Index: Parser/tokenizer.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v
retrieving revision 2.53
diff -c -r2.53 tokenizer.c
*** Parser/tokenizer.c	30 Aug 2001 20:51:59 -0000	2.53
--- Parser/tokenizer.c	21 Mar 2002 10:16:58 -0000
***************
*** 9,14 ****
--- 9,16 ----
  #include "errcode.h"
  
  extern char *PyOS_Readline(char *);
+ extern int _Py_WarnBadEncoding(char *file, int line, 
+ 			       char *text, char *end, char *encoding);
  /* Return malloc'ed string including trailing \n;
     empty malloc'ed string for EOF;
     NULL if interrupted */
***************
*** 28,33 ****
--- 30,36 ----
  static struct tok_state *tok_new(void);
  static int tok_nextc(struct tok_state *tok);
  static void tok_backup(struct tok_state *tok, int c);
+ static void check_encoding(struct tok_state *tok);
  
  /* Token names */
  
***************
*** 113,118 ****
--- 116,123 ----
  	tok->alterror = 0;
  	tok->alttabsize = 1;
  	tok->altindstack[0] = 0;
+ 	tok->encoding = 0;
+ 	tok->issued_encoding_warning = 0;
  	return tok;
  }
  
***************
*** 294,299 ****
--- 299,305 ----
  				tok->inp = strchr(tok->inp, '\0');
  				done = tok->inp[-1] == '\n';
  			}
+ 			check_encoding(tok);
  			tok->cur = tok->buf + cur;
  #ifndef macintosh
  			/* replace "\r\n" with "\n" */
***************
*** 502,507 ****
--- 508,525 ----
  	return 0;
  }
  
+ static void
+ check_encoding(struct tok_state *tok)
+ {
+ 	int res;
+ 	if (tok->issued_encoding_warning)
+ 		return;
+ 	res = _Py_WarnBadEncoding(tok->filename, tok->lineno,
+ 				  tok->buf, tok->end, tok->encoding);
+ 	if (res) {
+ 		tok->issued_encoding_warning = 1;
+ 	}
+ }
  
  /* Get next token, after space stripping etc. */
  
***************
*** 522,527 ****
--- 540,556 ----
  		register int col = 0;
  		register int altcol = 0;
  		tok->atbol = 0;
+ 		/* UTF-8 signature: EF BB BF */
+ 		c = tok_nextc(tok);
+ 		if (c == Py_CHARMASK('\xef') && tok->lineno == 1 &&
+ 		    ((tok->inp - tok->cur) > 2) &&
+ 		    tok->cur[0] == '\xbb' && tok->cur[1] == '\xbf') {
+ 			tok->encoding = strdup("utf-8");
+ 			tok->cur += 2;
+ 		} 
+ 		else {
+ 			tok_backup(tok, c);
+ 		}
  		for (;;) {
  			c = tok_nextc(tok);
  			if (c == ' ')
***************
*** 626,633 ****
  			"set tabsize=",		/* will vi never die? */
  		/* more templates can be added here to support other editors */
  		};
  		char cbuf[80];
! 		char *tp, **cp;
  		tp = cbuf;
  		do {
  			*tp++ = c = tok_nextc(tok);
--- 655,667 ----
  			"set tabsize=",		/* will vi never die? */
  		/* more templates can be added here to support other editors */
  		};
+ 		static char *codings[] = {
+ 			/* From PEP 263 */
+ 			"coding:",
+ 			"coding=",
+ 		};
  		char cbuf[80];
! 		char *tp, **cp, *end, *coding;
  		tp = cbuf;
  		do {
  			*tp++ = c = tok_nextc(tok);
***************
*** 649,656 ****
  				}
  			}
  		}
! 		while (c != EOF && c != '\n')
  			c = tok_nextc(tok);
  	}
  	
  	/* Check for EOF and errors now */
--- 683,723 ----
  				}
  			}
  		}
! 		coding = NULL;
! 		for (cp = codings; 
! 		     cp < codings + sizeof(codings)/sizeof(codings[0]);
! 		     cp++) {
! 			if ((tp = strstr(cbuf, *cp))) {
! 				tp += strlen(*cp);
! 				while (*tp == ' ' || *tp == '\t')
! 					tp++;
! 				end = coding = tp;
! 				while ((*end >= 'a' && *end <='z') ||
! 				       (*end >= 'A' && *end <='Z') ||
! 				       (*end >= '0' && *end <='9') ||
! 				       *end == '-' || *end == '_' || 
! 				       *end == '.')
! 					end++;
! 				if (end == tp)
! 					coding = 0;
! 				else
! 					*end = '\0';
! 			}
! 		}
! 		/* Accept coding declarations only on the first two lines. */
! 		if (coding && tok->lineno < 3) {
! 			for (tp = coding; *tp; tp++)
! 				if (*tp >= 'A' && *tp <= 'Z')
! 					*tp = *tp - 'A' + 'a';
! 			if (tok->encoding 
! 			    && strcmp(tok->encoding, coding)!=0) {
! 				/* XXX: error: duplicate encoding */
! 			}
! 			tok->encoding = strdup(coding);
! 		}
! 		while (c != EOF && c != '\n') {
  			c = tok_nextc(tok);
+ 		}
  	}
  	
  	/* Check for EOF and errors now */
Index: Parser/tokenizer.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.h,v
retrieving revision 2.16
diff -c -r2.16 tokenizer.h
*** Parser/tokenizer.h	1 Sep 2000 23:29:28 -0000	2.16
--- Parser/tokenizer.h	21 Mar 2002 10:16:58 -0000
***************
*** 38,43 ****
--- 38,46 ----
  	int alterror;	/* Issue error if alternate tabs don't match */
  	int alttabsize;	/* Alternate tab spacing */
  	int altindstack[MAXINDENT];	/* Stack of alternate indents */
+ 	/* Source encodings. */
+ 	char *encoding;
+ 	int issued_encoding_warning;
  };
  
  extern struct tok_state *PyTokenizer_FromString(char *);
Index: Python/compile.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v
retrieving revision 2.239
diff -c -r2.239 compile.c
*** Python/compile.c	3 Mar 2002 21:30:27 -0000	2.239
--- Python/compile.c	21 Mar 2002 10:17:00 -0000
***************
*** 426,431 ****
--- 426,432 ----
  	int c_closure;		/* Is nested w/freevars? */
  	struct symtable *c_symtable; /* pointer to module symbol table */
          PyFutureFeatures *c_future; /* pointer to module's __future__ */
+ 	const char *c_encoding; /* Source encoding, if any; borrowed memory. */
  };
  
  static int
***************
*** 1178,1191 ****
  	}
  #ifdef Py_USING_UNICODE
  	if (unicode || Py_UnicodeFlag) {
! 		if (rawmode)
! 			v = PyUnicode_DecodeRawUnicodeEscape(
! 				 s, len, NULL);
! 		else
! 			v = PyUnicode_DecodeUnicodeEscape(
! 				s, len, NULL);
  		if (v == NULL)
  			PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
  		return v;
  			
  	}
--- 1179,1237 ----
  	}
  #ifdef Py_USING_UNICODE
  	if (unicode || Py_UnicodeFlag) {
! 		int i, allascii = 1, noescapes = 1, elemsize = 0;
! 		PyObject *u = NULL;
! 		for (i = 0; i < len; i++) {
! 			if (s[i] == '\\')
! 				noescapes = 0;
! 			if (Py_CHARMASK(s[i]) > 127) {
! 				allascii = 0;
! 			}
! 		}
! 		if (allascii && noescapes) {
! 			v = PyUnicode_DecodeASCII (s, len, NULL);
! 		}
! 		else if (allascii) {
! 			/* Decode escapes from ASCII buffer. */
! 			elemsize = 1;
! 		}
! 		else {
! 			/* Need to decode to charset first,
! 			   then decode escapes. */
! 			u = PyUnicode_Decode(s, len, com->c_encoding, NULL);
! 			if (u) {
! 				if(noescapes) {
! 					v = u;
! 					u = NULL;
! 				}
! 				else {
! 					elemsize = sizeof(Py_UNICODE);
! 					s = (char*)PyUnicode_AS_UNICODE(u);
! 				}
! 			}
! 			else {
! 				/* If we got an ASCII decoding error,
! 				   make the error message more precise. */
! 				if (strcmp(com->c_encoding, "ascii") == 0 &&
! 				    PyErr_ExceptionMatches(PyExc_UnicodeError)) {
! 					PyErr_Clear();
! 					PyErr_SetString(PyExc_UnicodeError,
! 							"Non-ASCII characters but no declared encoding");
! 				}
! 				v = NULL;
! 			}
! 		}
! 		if (elemsize) {
! 			if (rawmode)
! 				v = PyUnicode_DecodeRawUnicodeEscapeSize(
! 					s, len, NULL, elemsize);
! 			else
! 				v = PyUnicode_DecodeUnicodeEscapeSize(
! 					s, len, NULL, elemsize);
! 		}
  		if (v == NULL)
  			PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
+ 		Py_XDECREF(u);
  		return v;
  			
  	}
***************
*** 4087,4092 ****
--- 4133,4145 ----
  	PyCodeObject *co;
  	if (!com_init(&sc, filename))
  		return NULL;
+ 	if (TYPE(n) == encoding_decl) {
+ 		sc.c_encoding = STR(n);
+ 		n = CHILD(n, 0);
+ 	}
+ 	else {
+ 		sc.c_encoding = "ascii";
+ 	}
  	if (base) {
  		sc.c_private = base->c_private;
  		sc.c_symtable = base->c_symtable;
***************
*** 4095,4100 ****
--- 4148,4157 ----
  		    || (sc.c_symtable->st_cur->ste_type == TYPE_FUNCTION))
  			sc.c_nested = 1;
  		sc.c_flags |= base->c_flags & PyCF_MASK;
+ 		if (base->c_encoding) {
+ 			assert(sc.c_encoding == NULL);
+ 			sc.c_encoding = base->c_encoding;
+ 		}
  	} else {
  		sc.c_private = NULL;
  		sc.c_future = PyNode_Future(n, filename);
***************
*** 5514,5517 ****
--- 5571,5618 ----
  			if (TYPE(CHILD(n, i)) >= single_input)
  				symtable_assign(st, CHILD(n, i), def_flag);
  	}
+ }
+ 
+ node*
+ _Py_AddEncoding(node* n, char *encoding)
+ {
+ 	node *r = PyNode_New(encoding_decl);
+ 	r->n_str = encoding;
+ 	r->n_nchildren = 1;
+ 	r->n_child = n;
+ 	return r;
+ }
+ 
+ int
+ _Py_WarnBadEncoding(char* file, int line, 
+ 		    char *start, char *end, char *encoding)
+ {
+ 	char *c;
+ 	char buf[200];
+ 	if (encoding != NULL) {
+ #ifdef Py_USING_UNICODE
+ 		PyObject *uni = PyUnicode_Decode(start, end-start,
+ 						 encoding, "strict");
+ 		if (!uni) {
+ 			PyErr_Clear();
+ 			PyErr_WarnExplicit(PyExc_DeprecationWarning,
+ 					   "line violates declared encoding",
+ 					   file, line, NULL, NULL);
+ 			return 1;
+ 		}
+ 		Py_DECREF(uni);
+ #endif
+ 		return 0;
+ 	}
+ 	/* Default to ASCII. */
+ 	for (c = start; c < end; c++)
+ 		if (Py_CHARMASK(*c) > 127)
+ 			break;
+ 	if (c == end)
+ 		return 0;
+ 	sprintf(buf, "Non-ASCII character '\\x%2x', but no declared encoding",
+ 		Py_CHARMASK(*c));
+ 	PyErr_WarnExplicit(PyExc_DeprecationWarning,
+ 			   buf, file, line, NULL, NULL);
+ 	return 1;
  }