Hum, support UTF-16* or UTF-32* is not so easy because most functions in
the parser supposes that the string does not contain any nul byte (eg. it
uses strlen(str)). With the attached patch, it's possible to parse
UTF-16LE, UTF-16BE and UTF-32LE... but not UTF-32BE

 - compile() builtin rejects string with a nul byte

Index: Parser/tokenizer.c
===================================================================
--- Parser/tokenizer.c	(révision 70501)
+++ Parser/tokenizer.c	(copie de travail)
@@ -286,8 +286,10 @@
 				else
 					PyMem_FREE(cs);
 			}
-		} else {	/* then, compare cs with BOM */
-			r = (strcmp(tok->encoding, cs) == 0);
+		} else {
+			/* then, compare cs with BOM */
+			/*r = (strcmp(tok->encoding, cs) == 0);*/
+			r = 1;
 			PyMem_FREE(cs);
 		}
 	}
@@ -330,26 +332,56 @@
 			/* any token beginning with '\xEF' is a bad token */
 			return 1;
 		}
-#if 0
-	/* Disable support for UTF-16 BOMs until a decision
-	   is made whether this needs to be supported.  */
+
+		if (tok->encoding != NULL)
+			PyMem_FREE(tok->encoding);
+		tok->encoding = new_string("utf-8", 5);	/* resulting is in utf-8 */
+		/* No need to set_readline: input is already utf-8 */
 	} else if (ch == 0xFE) {
-		ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
-		if (!set_readline(tok, "utf-16-be")) return 0;
+		ch = get_char(tok);
+		if (ch != 0xFF) {
+			unget_char(ch, tok);
+			unget_char(0xFE, tok);
+			/* any token beginning with '\xFE' is a bad token */
+			return 1;
+		}
+		/* 0xFE 0xFF: UTF-16-BE BOM, but use UTF-16 to read the BOM */
+		if (!set_readline(tok, "utf-16")) return 0;
+		tok->encoding = new_string("UTF-16BE", 8);
 		tok->decoding_state = STATE_NORMAL;
 	} else if (ch == 0xFF) {
-		ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
-		if (!set_readline(tok, "utf-16-le")) return 0;
+		ch = get_char(tok);
+		if (ch != 0xFE) {
+			unget_char(ch, tok);
+			unget_char(0xFF, tok);
+			/* any token beginning with '\xFF' is a bad token */
+			return 1;
+		}
+		ch = get_char(tok);
+		if (ch != 0x00) {
+			unget_char(ch, tok);
+			/* 0xFF 0xFE: UTF-16-LE BOM, but use UTF-16 to read the BOM */
+			if (!set_readline(tok, "utf-16")) return 0;
+			tok->encoding = new_string("UTF16-LE", 8);
 		tok->decoding_state = STATE_NORMAL;
-#endif
+			return 1;
+		}
+		ch = get_char(tok);
+		if (ch != 0x00) {
+			unget_char(ch, tok);
+			unget_char(0x00, tok);
+			unget_char(0xFE, tok);
+			unget_char(0xFF, tok);
+			return 1;
+		}
+		if (!set_readline(tok, "utf-32")) return 0;
+		tok->encoding = new_string("UTF32-LE", 8);
+		tok->decoding_state = STATE_NORMAL;
+		return 1;
 	} else {
 		unget_char(ch, tok);
 		return 1;
 	}
-	if (tok->encoding != NULL)
-		PyMem_FREE(tok->encoding);
-	tok->encoding = new_string("utf-8", 5);	/* resulting is in utf-8 */
-	/* No need to set_readline: input is already utf-8 */
 	return 1;
 }

Index: Lib/test/test_pep263.py
===================================================================
--- Lib/test/test_pep263.py	(révision 70501)
+++ Lib/test/test_pep263.py	(copie de travail)
@@ -2,6 +2,9 @@

 import unittest
 from test import support
+import codecs
+import sys
+import subprocess

 class PEP263Test(unittest.TestCase):

@@ -36,6 +39,24 @@
         exec(c, d)
         self.assertEquals(d['\xc6'], '\xc6')

+    def test_bom(self):
+        source = "# coding: %s\nx = '\u0a20'\nprint(ascii(x))"
+        for bom, encoding in (
+            (codecs.BOM_UTF8, "utf-8"),
+            (codecs.BOM_LE, "utf-16-le"),
+            (codecs.BOM_BE, "utf-16-be"),
+            (codecs.BOM_UTF32_LE, "utf-32-le"),
+#            (codecs.BOM_UTF32_BE, "utf-32-be"),
+        ):
+            source_bytes = bom + source.encode(encoding)
+            filename = "test.py"
+            with open(filename, "wb") as fp:
+                fp.write(source_bytes)
+            p = subprocess.Popen([sys.executable, filename],
+                stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+            stdout, stderr = p.communicate()
+            self.assertEquals(stdout, b"'\\u0a20'\n")
+
 def test_main():
     support.run_unittest(PEP263Test)