diff -r e532937914fc -r fa4c6160c518 Doc/library/difflib.rst
--- a/Doc/library/difflib.rst	Thu Apr 16 18:54:56 2015 -0400
+++ b/Doc/library/difflib.rst	Thu Apr 16 21:19:10 2015 -0400
@@ -315,6 +315,20 @@
 
    See :ref:`difflib-interface` for a more detailed example.
 
+.. function:: diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'', fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\\n')
+
+   Compare *a* and *b* (lists of bytes objects) using *dfunc*; yield a
+   sequence of delta lines (also bytes) in the format returned by *dfunc*.
+   *dfunc* must be a callable, typically either :func:`unified_diff` or
+   :func:`context_diff`.
+
+   Allows you to compare data with unknown or inconsistent encoding. All
+   inputs except *n* must be bytes objects, not str. Works by losslessly
+   converting all inputs (except *n*) to str, and calling *dfunc(a, b,
+   fromfile, tofile, fromfiledate, tofiledate, n, lineterm)*. The output of
+   *dfunc* is then converted back to bytes, so the delta lines that you
+   receive have the same unknown/inconsistent encodings as *a* and *b*.
+
 
 .. function:: IS_LINE_JUNK(line)
 
diff -r e532937914fc -r fa4c6160c518 Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst	Thu Apr 16 18:54:56 2015 -0400
+++ b/Doc/whatsnew/3.5.rst	Thu Apr 16 21:19:10 2015 -0400
@@ -302,6 +302,9 @@
   charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
   (Contributed by Berker Peksag in :issue:`2052`.)
 
+* It's now possible to compare lists of byte strings with
+  :func:`difflib.diff_bytes` (fixes a regression from Python 2).
+
 distutils
 ---------
 
diff -r e532937914fc -r fa4c6160c518 Lib/difflib.py
--- a/Lib/difflib.py	Thu Apr 16 18:54:56 2015 -0400
+++ b/Lib/difflib.py	Thu Apr 16 21:19:10 2015 -0400
@@ -28,7 +28,7 @@
 
 __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
            'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
-           'unified_diff', 'HtmlDiff', 'Match']
+           'unified_diff', 'diff_bytes', 'HtmlDiff', 'Match']
 
 from heapq import nlargest as _nlargest
 from collections import namedtuple as _namedtuple
@@ -1174,6 +1174,7 @@
      four
     """
 
+    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
     started = False
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
         if not started:
@@ -1261,6 +1262,7 @@
       four
     """
 
+    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
     prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
     started = False
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
@@ -1292,6 +1294,53 @@
                     for line in b[j1:j2]:
                         yield prefix[tag] + line
 
+def _check_types(a, b, *args):
+    # Checking types is weird, but the alternative is garbled output when
+    # someone passes mixed bytes and str to {unified,context}_diff(). E.g.
+    # without this check, passing filenames as bytes results in output like
+    #   --- b'oldfile.txt'
+    #   +++ b'newfile.txt'
+    # because of how str.format() incorporates bytes objects.
+    if a and not isinstance(a[0], str):
+        raise TypeError('lines to compare must be str, not %s (%r)' %
+                        (type(a[0]).__name__, a[0]))
+    if b and not isinstance(b[0], str):
+        raise TypeError('lines to compare must be str, not %s (%r)' %
+                        (type(b[0]).__name__, b[0]))
+    for arg in args:
+        if not isinstance(arg, str):
+            raise TypeError('all arguments must be str, not: %r' % (arg,))
+
+def diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'',
+               fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\n'):
+    r"""
+    Compare `a` and `b`, two sequences of lines represented as bytes rather
+    than str. This is a wrapper for `dfunc`, which is typically either
+    unified_diff() or context_diff(). Inputs are losslessly converted to
+    strings so that `dfunc` only has to worry about strings, and encoded
+    back to bytes on return. This is necessary to compare files with
+    unknown or inconsistent encoding. All other inputs (except `n`) must be
+    bytes rather than str.
+    """
+    def decode(s):
+        try:
+            return s.decode('ascii', 'surrogateescape')
+        except AttributeError as err:
+            msg = ('all arguments must be bytes, not %s (%r)' %
+                   (type(s).__name__, s))
+            raise TypeError(msg) from err
+    a = list(map(decode, a))
+    b = list(map(decode, b))
+    fromfile = decode(fromfile)
+    tofile = decode(tofile)
+    fromfiledate = decode(fromfiledate)
+    tofiledate = decode(tofiledate)
+    lineterm = decode(lineterm)
+
+    lines = dfunc(a, b, fromfile, tofile, fromfiledate, tofiledate, n, lineterm)
+    for line in lines:
+        yield line.encode('ascii', 'surrogateescape')
+
 def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
     r"""
     Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
diff -r e532937914fc -r fa4c6160c518 Lib/test/test_difflib.py
--- a/Lib/test/test_difflib.py	Thu Apr 16 18:54:56 2015 -0400
+++ b/Lib/test/test_difflib.py	Thu Apr 16 21:19:10 2015 -0400
@@ -322,12 +322,157 @@
         self.assertEqual(fmt(0,0), '0')
 
 
+class TestBytes(unittest.TestCase):
+    # don't really care about the content of the output, just the fact
+    # that it's bytes and we don't crash
+    def check(self, diff):
+        diff = list(diff)   # trigger exceptions first
+        for line in diff:
+            self.assertIsInstance(
+                line, bytes,
+                "all lines of diff should be bytes, but got: %r" % line)
+
+    def test_byte_content(self):
+        # if we receive byte strings, we return byte strings
+        a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
+        b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
+
+        unified = difflib.unified_diff
+        context = difflib.context_diff
+
+        check = self.check
+        check(difflib.diff_bytes(unified, a, a))
+        check(difflib.diff_bytes(unified, a, b))
+
+        # now with filenames (content and filenames are all bytes!)
+        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
+        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
+
+        # and with filenames and dates
+        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
+        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
+
+        # same all over again, with context diff
+        check(difflib.diff_bytes(context, a, a))
+        check(difflib.diff_bytes(context, a, b))
+        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
+        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
+        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
+        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
+
+    def test_byte_filenames(self):
+        # somebody renamed a file from ISO-8859-2 to UTF-8
+        fna = b'\xb3odz.txt'    # "łodz.txt"
+        fnb = b'\xc5\x82odz.txt'
+
+        # they transcoded the content at the same time
+        a = [b'\xa3odz is a city in Poland.']
+        b = [b'\xc5\x81odz is a city in Poland.']
+
+        check = self.check
+        unified = difflib.unified_diff
+        context = difflib.context_diff
+        check(difflib.diff_bytes(unified, a, b, fna, fnb))
+        check(difflib.diff_bytes(context, a, b, fna, fnb))
+
+        def assertDiff(expect, actual):
+            # do not compare expect and equal as lists, because unittest
+            # uses difflib to report difference between lists
+            actual = list(actual)
+            self.assertEqual(len(expect), len(actual))
+            for e, a in zip(expect, actual):
+                self.assertEqual(e, a)
+
+        expect = [
+            b'--- \xb3odz.txt',
+            b'+++ \xc5\x82odz.txt',
+            b'@@ -1 +1 @@',
+            b'-\xa3odz is a city in Poland.',
+            b'+\xc5\x81odz is a city in Poland.',
+        ]
+        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
+        assertDiff(expect, actual)
+
+        # with dates (plain ASCII)
+        datea = b'2005-03-18'
+        dateb = b'2005-03-19'
+        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
+        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
+
+        expect = [
+            # note the mixed encodings here: this is deeply wrong by every
+            # tenet of Unicode, but it doesn't crash, it's parseable by
+            # patch, and it's how UNIX(tm) diff behaves
+            b'--- \xb3odz.txt\t2005-03-18',
+            b'+++ \xc5\x82odz.txt\t2005-03-19',
+            b'@@ -1 +1 @@',
+            b'-\xa3odz is a city in Poland.',
+            b'+\xc5\x81odz is a city in Poland.',
+        ]
+        actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
+                                    lineterm=b'')
+        assertDiff(expect, actual)
+
+    def test_mixed_types_content(self):
+        # type of input content must be consistent: all str or all bytes
+        a = [b'hello']
+        b = ['hello']
+
+        unified = difflib.unified_diff
+        context = difflib.context_diff
+
+        expect = "lines to compare must be str, not bytes (b'hello')"
+        self._assert_type_error(expect, unified, a, b)
+        self._assert_type_error(expect, unified, b, a)
+        self._assert_type_error(expect, context, a, b)
+        self._assert_type_error(expect, context, b, a)
+
+        expect = "all arguments must be bytes, not str ('hello')"
+        self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
+        self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
+        self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
+        self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
+
+    def test_mixed_types_filenames(self):
+        # cannot pass filenames as bytes if content is str (this may not be
+        # the right behaviour, but at least the test demonstrates how
+        # things work)
+        a = ['hello\n']
+        b = ['ohell\n']
+        fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
+        fnb = b'ol\xc3a9.txt'   # to UTF-8
+        self._assert_type_error(
+            "all arguments must be str, not: b'ol\\xe9.txt'",
+            difflib.unified_diff, a, b, fna, fnb)
+
+    def test_mixed_types_dates(self):
+        # type of dates must be consistent with type of contents
+        a = [b'foo\n']
+        b = [b'bar\n']
+        datea = '1 fév'
+        dateb = '3 fév'
+        self._assert_type_error(
+            "all arguments must be bytes, not str ('1 fév')",
+            difflib.diff_bytes, difflib.unified_diff,
+            a, b, b'a', b'b', datea, dateb)
+
+        # if input is str, non-ASCII dates are fine
+        a = ['foo\n']
+        b = ['bar\n']
+        list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
+
+    def _assert_type_error(self, msg, generator, *args):
+        with self.assertRaises(TypeError) as ctx:
+            list(generator(*args))
+        self.assertEqual(msg, str(ctx.exception))
+
+
 def test_main():
     difflib.HtmlDiff._default_prefix = 0
     Doctests = doctest.DocTestSuite(difflib)
     run_unittest(
         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
-        TestOutputFormat, Doctests)
+        TestOutputFormat, TestBytes, Doctests)
 
 if __name__ == '__main__':
     test_main()
diff -r e532937914fc -r fa4c6160c518 Misc/NEWS
--- a/Misc/NEWS	Thu Apr 16 18:54:56 2015 -0400
+++ b/Misc/NEWS	Thu Apr 16 21:19:10 2015 -0400
@@ -173,6 +173,10 @@
 - Issue #23310: Fix MagicMock's initializer to work with __methods__, just
   like configure_mock().  Patch by Kasia Jachim.
 
+- Issue #17445: add difflib.diff_bytes() to support comparison of
+  byte strings (fixes a regression from Python 2).
+
+
 Build
 -----