Index: Lib/encodings/utf_8_sig.py
===================================================================
RCS file: Lib/encodings/utf_8_sig.py
diff -N Lib/encodings/utf_8_sig.py
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ Lib/encodings/utf_8_sig.py	9 Aug 2005 13:37:53 -0000
@@ -0,0 +1,57 @@
+""" Python 'utf-8-sig' Codec
+This work similar to UTF-8 with the following changes:
+
+* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
+  first three bytes.
+
+* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
+  bytes will be skipped.
+"""
+import codecs
+
+### Codec APIs
+
+def encode(input, errors='strict'):
+    return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
+
+def decode(input, errors='strict'):
+    prefix = 0
+    if input.startswith(codecs.BOM_UTF8):
+        input = input[3:]
+        prefix = 3
+    (output, consumed) = codecs.utf_8_decode(input, errors, True)
+    return (output, consumed+prefix)
+
+class StreamWriter(codecs.StreamWriter):
+    def reset(self):
+        codecs.StreamWriter.reset(self)
+        try:
+            del self.encode
+        except AttributeError:
+            pass
+
+    def encode(self, input, errors='strict'):
+        self.encode = codecs.utf_8_encode
+        return encode(input, errors)
+
+class StreamReader(codecs.StreamReader):
+    def reset(self):
+        codecs.StreamReader.reset(self)
+        try:
+            del self.decode
+        except AttributeError:
+            pass
+
+    def decode(self, input, errors='strict'):
+        if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
+            # not enough data to decide if this is a BOM
+            # => try again on the next call
+            return (u"", 0)
+        self.decode = codecs.utf_8_decode
+        return decode(input, errors)
+
+### encodings module API
+
+def getregentry():
+
+    return (encode,decode,StreamReader,StreamWriter)
Index: Lib/test/test_codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v
retrieving revision 1.24
diff -u -r1.24 test_codecs.py
--- Lib/test/test_codecs.py	20 Jul 2005 22:15:39 -0000	1.24
+++ Lib/test/test_codecs.py	9 Aug 2005 13:37:54 -0000
@@ -317,6 +317,33 @@
             ]
         )
 
+class UTF8SigTest(ReadTest):
+    encoding = "utf-8-sig"
+
+    def test_partial(self):
+        self.check_partial(
+            u"\ufeff\x00\xff\u07ff\u0800\uffff",
+            [
+                u"",
+                u"",
+                u"", # First BOM has been read and skipped
+                u"",
+                u"",
+                u"\ufeff", # Second BOM has been read and emitted
+                u"\ufeff\x00", # "\x00" read and emitted
+                u"\ufeff\x00", # First byte of encoded u"\xff" read
+                u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
+                u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
+                u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
+                u"\ufeff\x00\xff\u07ff",
+                u"\ufeff\x00\xff\u07ff",
+                u"\ufeff\x00\xff\u07ff\u0800",
+                u"\ufeff\x00\xff\u07ff\u0800",
+                u"\ufeff\x00\xff\u07ff\u0800",
+                u"\ufeff\x00\xff\u07ff\u0800\uffff",
+            ]
+        )
+
 class EscapeDecodeTest(unittest.TestCase):
     def test_empty_escape_decode(self):
         self.assertEquals(codecs.escape_decode(""), ("", 0))
@@ -876,6 +903,7 @@
         UTF16LETest,
         UTF16BETest,
         UTF8Test,
+        UTF8SigTest,
         EscapeDecodeTest,
         RecodingTest,
         PunycodeTest,
Index: Misc/NEWS
===================================================================
RCS file: /cvsroot/python/python/dist/src/Misc/NEWS,v
retrieving revision 1.1320
diff -u -r1.1320 NEWS
--- Misc/NEWS	18 Jul 2005 08:53:17 -0000	1.1320
+++ Misc/NEWS	9 Aug 2005 13:38:00 -0000
@@ -370,6 +370,7 @@
   line ending. Remove the special handling of a "\r\n" that has been split
   between two lines.
 
+- Patch #1177307: Added a new codec utf_8_sig for UTF-8 with a BOM signature.
 
 Build
 -----
Index: Doc/lib/libcodecs.tex
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/lib/libcodecs.tex,v
retrieving revision 1.35
diff -u -r1.35 libcodecs.tex
--- Doc/lib/libcodecs.tex	1 Jan 2005 00:28:34 -0000	1.35
+++ Doc/lib/libcodecs.tex	9 Aug 2005 13:38:01 -0000
@@ -886,6 +886,10 @@
         {U8, UTF, utf8}
         {all languages}
 
+\lineiii{utf_8_sig}
+        {}
+        {all languages}
+
 \end{longtableiii}
 
 A number of codecs are specific to Python, so their codec names have
@@ -1054,3 +1058,20 @@
 \begin{funcdesc}{ToUnicode}{label}
 Convert a label to Unicode, as specified in \rfc{3490}.
 \end{funcdesc}
+
+\subsection{\module{encodings.idna} ---
+            Internationalized Domain Names in Applications}
+
+\declaremodule{standard}{encodings.utf_8_sig}
+\modulesynopsis{UTF-8 codec with BOM signature}
+% XXX The next line triggers a formatting bug, so it's commented out
+% until that can be fixed.
+%\moduleauthor{Walter D\"orwald}
+
+\versionadded{2.5}
+
+This module implements a variant of the UTF-8 codec: On encoding a UTF-8
+encoded BOM will be prepended to the UTF-8 encoded bytes. For the stateful
+encoder this is only done once (on the first write to the byte stream).
+For decoding an optional  UTF-8 encoded BOM at the start of the data will be
+skipped.