Index: Lib/_pyio.py
===================================================================
--- Lib/_pyio.py	(révision 77361)
+++ Lib/_pyio.py	(copie de travail)
@@ -16,6 +16,8 @@
 from io import __all__
 from io import SEEK_SET, SEEK_CUR, SEEK_END
 
+from codecs import BOMS
+
 # open() uses st_blksize whenever we can
 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
 
@@ -1428,21 +1430,7 @@
             raise TypeError("illegal newline type: %r" % (type(newline),))
         if newline not in (None, "", "\n", "\r", "\r\n"):
             raise ValueError("illegal newline value: %r" % (newline,))
-        if encoding is None:
-            try:
-                encoding = os.device_encoding(buffer.fileno())
-            except (AttributeError, UnsupportedOperation):
-                pass
-            if encoding is None:
-                try:
-                    import locale
-                except ImportError:
-                    # Importing locale may fail if Python is being built
-                    encoding = "ascii"
-                else:
-                    encoding = locale.getpreferredencoding()
-
-        if not isinstance(encoding, str):
+        if encoding is not None and not isinstance(encoding, str):
             raise ValueError("invalid encoding: %r" % encoding)
 
         if errors is None:
@@ -1453,7 +1441,20 @@
 
         self.buffer = buffer
         self._line_buffering = line_buffering
-        self._encoding = encoding
+        if encoding == "BOM":
+            if self.writable():
+                raise ValueError(
+                    "BOM encoding can only be used to read a text file, "
+                    "not for writing")
+            self._encoding = None
+            self._bom_checked = False
+        else:
+            if encoding is None:
+                self._encoding = self._create_encoding()
+            else:
+                self._encoding = encoding
+            self._bom_checked = True
+        self._has_bom = False
         self._errors = errors
         self._readuniversal = not newline
         self._readtranslate = newline is None
@@ -1562,6 +1563,41 @@
             self._decoder.reset()
         return length
 
+    def _create_encoding(self):
+        try:
+            encoding = os.device_encoding(self.buffer.fileno())
+        except (AttributeError, UnsupportedOperation):
+            encoding = None
+
+        if not encoding:
+            try:
+                import locale
+            except ImportError:
+                # Importing locale may fail if Python is being built
+                encoding = "ascii"
+            else:
+                encoding = locale.getpreferredencoding()
+        return encoding
+
+    def _search_bom(self, chunk):
+        for encoding, bom in BOMS.items():
+            if chunk.startswith(bom):
+                self._encoding = encoding
+                self._has_bom = True
+                self._get_decoder()
+                return chunk[len(bom):]
+        self._encoding = self._create_encoding()
+        self._get_decoder()
+        return chunk
+
+    def _check_bom(self, chunk):
+        if not self._bom_checked:
+            self._bom_checked = True
+            chunk = self._search_bom(chunk)
+        elif self._decoder is None:
+            self._get_decoder()
+        return chunk
+
     def _get_encoder(self):
         make_encoder = codecs.getincrementalencoder(self._encoding)
         self._encoder = make_encoder(self._errors)
@@ -1610,9 +1646,12 @@
         # some of it may remain buffered in the decoder, yet to be
         # converted.
 
-        if self._decoder is None:
-            raise ValueError("no decoder")
+        # Read a chunk, decode it, and put the result in self._decoded_chars.
+        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
+        eof = not input_chunk
 
+        input_chunk = self._check_bom(input_chunk)
+
         if self._telling:
             # To prepare for tell(), we need to snapshot a point in the
             # file where the decoder's input buffer is empty.
@@ -1621,9 +1660,6 @@
             # Given this, we know there was a valid snapshot point
             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars.
-        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
-        eof = not input_chunk
         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
 
         if self._telling:
@@ -1769,6 +1805,8 @@
 
         # Restore the decoder to its state from the safe start point.
         if cookie == 0 and self._decoder:
+            if self._bom_checked and self._has_bom:
+                self._bom_checked = False
             self._decoder.reset()
         elif self._decoder or dec_flags or chars_to_skip:
             self._decoder = self._decoder or self._get_decoder()
@@ -1804,11 +1842,12 @@
         self._checkReadable()
         if n is None:
             n = -1
-        decoder = self._decoder or self._get_decoder()
         if n < 0:
             # Read everything.
-            result = (self._get_decoded_chars() +
-                      decoder.decode(self.buffer.read(), final=True))
+            result = self._get_decoded_chars()
+            chunk = self.buffer.read()
+            chunk = self._check_bom(chunk)
+            result += self._decoder.decode(chunk, final=True)
             self._set_decoded_chars('')
             self._snapshot = None
             return result
@@ -1842,10 +1881,6 @@
         line = self._get_decoded_chars()
 
         start = 0
-        # Make the decoder if it doesn't already exist.
-        if not self._decoder:
-            self._get_decoder()
-
         pos = endpos = None
         while True:
             if self._readtranslate:
Index: Lib/test/test_io.py
===================================================================
--- Lib/test/test_io.py	(révision 77361)
+++ Lib/test/test_io.py	(copie de travail)
@@ -1946,6 +1946,29 @@
             self.assertEquals(f.read(), data * 2)
             self.assertEquals(buf.getvalue(), (data * 2).encode(encoding))
 
+    def test_encoding_bom(self):
+        filename = support.TESTFN
+        text = "abc\ndef\n123"
+        lines = text.splitlines(1)
+        tests = ("utf-8-sig",
+                 "utf-16",
+                 "utf-32")
+        for encoding in tests:
+            with self.open(filename, 'w', encoding=encoding) as f:
+                f.write(text)
+
+            with self.open(filename, encoding="BOM") as f:
+                self.assertEquals(f.read(), text)
+                f.seek(0)
+                self.assertEquals(f.read(), text)
+
+            with self.open(filename, encoding="BOM") as f:
+                self.assertEquals(f.readlines(), lines)
+                f.seek(0)
+                self.assertEquals(f.readlines(), lines)
+
+        self.assertRaises(ValueError, self.open, filename, "w", encoding="BOM")
+
     def test_unreadable(self):
         class UnReadable(self.BytesIO):
             def readable(self):
Index: Lib/codecs.py
===================================================================
--- Lib/codecs.py	(révision 77361)
+++ Lib/codecs.py	(copie de travail)
@@ -47,6 +47,14 @@
 # UTF-32, big endian
 BOM_UTF32_BE = b'\x00\x00\xfe\xff'
 
+BOMS = {
+    "UTF_8": BOM_UTF8,
+    "UTF_16_LE": BOM_UTF16_LE,
+    "UTF_16_BE": BOM_UTF16_BE,
+    "UTF_32_LE": BOM_UTF32_LE,
+    "UTF_32_BE": BOM_UTF32_BE,
+}
+
 if sys.byteorder == 'little':
 
     # UTF-16, native endianness