diff -r e55cc0834e9c -r 13161c1d9c5f Lib/difflib.py
--- a/Lib/difflib.py	Wed Apr 15 17:08:45 2015 -0400
+++ b/Lib/difflib.py	Wed Apr 15 09:26:22 2015 -0400
@@ -1173,7 +1173,7 @@
     +tree
      four
     """
-
+    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
     started = False
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
         if not started:
@@ -1260,7 +1260,7 @@
     ! tree
       four
     """
-
+    _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
     prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
     started = False
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
@@ -1292,6 +1292,49 @@
                     for line in b[j1:j2]:
                         yield prefix[tag] + line
 
+def _check_types(a, b, *args):
+    # Checking types is weird, but it's better than producing garbled
+    # output. E.g. if someone passes filenames as bytes to unified_diff(),
+    # without this check we would produce output lines like:
+    #   --- b'oldfile.txt'
+    #   +++ b'newfile.txt'
+    # (because that's how str.format() incorporates bytes objects).
+    if a and not isinstance(a[0], str):
+        raise TypeError('lines to compare must be str, not %r' % a[0])
+    if b and not isinstance(b[0], str):
+        raise TypeError('lines to compare must be str, not %r' % b[0])
+    for arg in args:
+        if not isinstance(arg, str):
+            raise TypeError('all arguments must be str, not: %r' % (arg,))
+
+def diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'',
+               fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\n'):
+    r"""
+    Compare `a` and `b`, two sequences of lines represented as bytes rather
+    than strings. This is a wrapper for `dfunc`, which is typically either
+    unified_diff() or context_diff() (the function object, not the name).
+    All inputs are losslessly converted to strings so that `dfunc` only has
+    to worry about strings, and decoded back to bytes on return. This is
+    necessary to compare files with unknown or inconsistent encoding. All
+    other inputs (except `n`) must be bytes rather than str.
+    """
+    def decode(s):
+        try:
+            return s.decode('ascii', 'surrogateescape')
+        except AttributeError:
+            raise TypeError('all arguments must be bytes, not %r' % s)
+    a = list(map(decode, a))
+    b = list(map(decode, b))
+    fromfile = decode(fromfile)
+    tofile = decode(tofile)
+    fromfiledate = decode(fromfiledate)
+    tofiledate = decode(tofiledate)
+    lineterm = decode(lineterm)
+
+    lines = dfunc(a, b, fromfile, tofile, fromfiledate, tofiledate, n, lineterm)
+    for line in lines:
+        yield line.encode('ascii', 'surrogateescape')
+
 def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
     r"""
     Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
diff -r e55cc0834e9c -r 13161c1d9c5f Lib/test/test_difflib.py
--- a/Lib/test/test_difflib.py	Wed Apr 15 17:08:45 2015 -0400
+++ b/Lib/test/test_difflib.py	Wed Apr 15 09:26:22 2015 -0400
@@ -322,12 +322,158 @@
         self.assertEqual(fmt(0,0), '0')
 
 
+class TestBytes(unittest.TestCase):
+    # don't really care about the content of the output, just the fact
+    # that it's bytes and we don't crash
+    def check(self, diff):
+        diff = list(diff)   # trigger exceptions first
+        for line in diff:
+            self.assertTrue(
+                isinstance(line, bytes),
+                "all lines of diff should be bytes, but got: %r" % line)
+
+    def test_byte_content(self):
+        "if we receive byte strings, we return byte strings"
+        a = [b'hello', b'andrew']
+        b = [b'hello', b'andr\xe9'] # latin-1 bytes
+
+        unified = difflib.unified_diff
+        context = difflib.context_diff
+
+        check = self.check
+        check(difflib.diff_bytes(unified, a, a))
+        check(difflib.diff_bytes(unified, a, b))
+
+        # now with filenames (content and filenames are all bytes!)
+        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
+        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
+
+        # and with filenames and dates
+        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
+        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
+
+        # same all over again, with context diff
+        check(difflib.diff_bytes(context, a, a))
+        check(difflib.diff_bytes(context, a, b))
+        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
+        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
+        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
+        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
+
+    def test_byte_filenames(self):
+        # somebody renamed a file from ISO-8859-2 to UTF-8
+        fna = b'\xb3odz.txt'    # "łodz.txt"
+        fnb = b'\xc5\x82odz.txt'
+
+        # they transcoded the content at the same time
+        a = [b'\xa3odz is a city in Poland.']
+        b = [b'\xc5\x81odz is a city in Poland.']
+
+        check = self.check
+        unified = difflib.unified_diff
+        context = difflib.context_diff
+        check(difflib.diff_bytes(unified, a, b, fna, fnb))
+        check(difflib.diff_bytes(context, a, b, fna, fnb))
+
+        def assertDiff(expect, actual):
+            # do not compare expect and equal as lists, because unittest
+            # uses difflib to report difference between lists
+            actual = list(actual)
+            self.assertEqual(len(expect), len(actual))
+            for e, a in zip(expect, actual):
+                self.assertEqual(e, a)
+
+        expect = [
+            b'--- \xb3odz.txt',
+            b'+++ \xc5\x82odz.txt',
+            b'@@ -1 +1 @@',
+            b'-\xa3odz is a city in Poland.',
+            b'+\xc5\x81odz is a city in Poland.',
+            ]
+        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
+        assertDiff(expect, actual)
+
+        # with dates (plain ASCII)
+        datea = b'2005-03-18'
+        dateb = b'2005-03-19'
+        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
+        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
+
+        expect = [
+            # note the mixed encodings here: this is deeply wrong by every
+            # tenet of Unicode, but it doesn't crash, it's parseable by
+            # patch, and it's how UNIX(tm) diff behaves
+            b'--- \xb3odz.txt\t2005-03-18',
+            b'+++ \xc5\x82odz.txt\t2005-03-19',
+            b'@@ -1 +1 @@',
+            b'-\xa3odz is a city in Poland.',
+            b'+\xc5\x81odz is a city in Poland.',
+            ]
+        actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
+                                    lineterm=b'')
+        assertDiff(expect, actual)
+
+    def test_mixed_types_content(self):
+        'type of input content must be consistent: all str or all bytes'
+        a = [b'hello']
+        b = ['hello']
+
+        def assertTypeError(generator, *args):
+            try:
+                list(callable(args, *args))
+                self.fail('expected TypeError')
+            except TypeError:
+                pass
+
+        assertTypeError(difflib.unified_diff, a, b)
+        assertTypeError(difflib.unified_diff, b, a)
+        assertTypeError(difflib.context_diff, a, b)
+        assertTypeError(difflib.context_diff, b, a)
+
+        assertTypeError(difflib.diff_bytes, difflib.unified_diff, a, b)
+        assertTypeError(difflib.diff_bytes, difflib.unified_diff, b, a)
+        assertTypeError(difflib.diff_bytes, difflib.context_diff, a, b)
+        assertTypeError(difflib.diff_bytes, difflib.context_diff, b, a)
+
+    def test_mixed_types_filenames(self):
+        'cannot pass filenames as bytes if content is str'
+        # this may not be the right behaviour, but at least the test
+        # demonstrates how things work
+        a = ['hello\n']
+        b = ['ohell\n']
+        fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
+        fnb = b'ol\xc3a9.txt'   # to UTF-8
+        try:
+            list(difflib.unified_diff(a, b, fna, fnb))
+            self.fail('expected TypeError')
+        except TypeError:
+            pass
+
+    def test_mixed_types_dates(self):
+        'type of dates must be consistent with type of contents'
+        a = [b'foo\n']
+        b = [b'bar\n']
+        datea = '1 fév'
+        dateb = '3 fév'
+        try:
+            list(difflib.diff_bytes(a, b, 'a', 'b', datea, dateb,
+                                    dfunc=difflib.unified_diff))
+            self.fail('expected TypeError')
+        except TypeError:
+            pass
+
+        # if input is str, non-ASCII dates are fine
+        a = ['foo\n']
+        b = ['bar\n']
+        list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
+
+
 def test_main():
     difflib.HtmlDiff._default_prefix = 0
     Doctests = doctest.DocTestSuite(difflib)
     run_unittest(
         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
-        TestOutputFormat, Doctests)
+        TestOutputFormat, TestBytes, Doctests)
 
 if __name__ == '__main__':
     test_main()