diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -175,10 +175,9 @@ # Some useful functions #--------------------------------------------------------- -def stn(s, length, encoding, errors): +def stn(s, length): """Convert a string to a null-terminated bytes object. """ - s = s.encode(encoding, errors) return s[:length] + (length - len(s)) * NUL def nts(s, encoding, errors): @@ -831,6 +830,8 @@ def create_ustar_header(self, info, encoding, errors): """Return the object as a ustar header block. """ + self._apply_encoding(info, encoding, errors) + info["magic"] = POSIX_MAGIC if len(info["linkname"]) > LENGTH_LINK: @@ -839,21 +840,23 @@ if len(info["name"]) > LENGTH_NAME: info["prefix"], info["name"] = self._posix_split_name(info["name"]) - return self._create_header(info, USTAR_FORMAT, encoding, errors) + return self._create_header(info, USTAR_FORMAT) def create_gnu_header(self, info, encoding, errors): """Return the object as a GNU header block sequence. """ + self._apply_encoding(info, encoding, errors) + info["magic"] = GNU_MAGIC buf = b"" if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) + buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) + buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) - return buf + self._create_header(info, GNU_FORMAT, encoding, errors) + return buf + self._create_header(info, GNU_FORMAT) def create_pax_header(self, info, encoding): """Return the object as a ustar header block. If it cannot be @@ -902,7 +905,9 @@ else: buf = b"" - return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") + self._apply_encoding(info, "ascii", "replace") + + return buf + self._create_header(info, USTAR_FORMAT) @classmethod def create_pax_global_header(cls, pax_headers): @@ -915,7 +920,7 @@ and a name part. """ prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": + while prefix and not prefix.endswith(b"/"): prefix = prefix[:-1] name = name[len(prefix):] @@ -925,13 +930,19 @@ raise ValueError("name is too long") return prefix, name + def _apply_encoding(self, info, encoding, errors): + """Apply encoding to all name values of a dictionary. + """ + for key in ("name", "linkname", "uname", "gname"): + info[key] = info[key].encode(encoding, errors) + @staticmethod - def _create_header(info, format, encoding, errors): + def _create_header(info, format): """Return a header block. info is a dictionary with file information, format must be one of the *_FORMAT constants. """ parts = [ - stn(info.get("name", ""), 100, encoding, errors), + stn(info.get("name", b""), 100), itn(info.get("mode", 0) & 0o7777, 8, format), itn(info.get("uid", 0), 8, format), itn(info.get("gid", 0), 8, format), @@ -939,13 +950,13 @@ itn(info.get("mtime", 0), 12, format), b" ", # checksum field info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100, encoding, errors), + stn(info.get("linkname", b""), 100), info.get("magic", POSIX_MAGIC), - stn(info.get("uname", ""), 32, encoding, errors), - stn(info.get("gname", ""), 32, encoding, errors), + stn(info.get("uname", b""), 32), + stn(info.get("gname", b""), 32), itn(info.get("devmajor", 0), 8, format), itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155, encoding, errors) + stn(info.get("prefix", b""), 155) ] buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) @@ -964,20 +975,20 @@ return payload @classmethod - def _create_gnu_long_header(cls, name, type, encoding, errors): + def _create_gnu_long_header(cls, name, type): """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence for name. """ - name = name.encode(encoding, errors) + NUL + name += NUL info = {} - info["name"] = "././@LongLink" + info["name"] = b"././@LongLink" info["type"] = type info["size"] = len(name) info["magic"] = GNU_MAGIC # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ + return cls._create_header(info, USTAR_FORMAT) + \ cls._create_payload(name) @classmethod @@ -1022,13 +1033,13 @@ # We use a hardcoded "././@PaxHeader" name like star does # instead of the one that POSIX recommends. info = {} - info["name"] = "././@PaxHeader" + info["name"] = b"././@PaxHeader" info["type"] = type info["size"] = len(records) info["magic"] = POSIX_MAGIC # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ + return cls._create_header(info, USTAR_FORMAT) + \ cls._create_payload(records) @classmethod diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1174,18 +1174,18 @@ tarinfo.linkname = link tarinfo.type = tarfile.LNKTYPE - tar = tarfile.open(tmpname, "w") + tar = tarfile.open(tmpname, "w", encoding="utf8") try: tar.format = tarfile.GNU_FORMAT tar.addfile(tarinfo) - v1 = self._calc_size(name, link) + v1 = self._calc_size(name.encode("utf8"), link) v2 = tar.offset self.assertTrue(v1 == v2, "GNU longname/longlink creation failed") finally: tar.close() - tar = tarfile.open(tmpname) + tar = tarfile.open(tmpname, encoding="utf8") try: member = tar.next() self.assertIsNotNone(member, @@ -1227,6 +1227,27 @@ self._test(("longnam/" * 127) + "longname_", ("longlnk/" * 127) + "longlink_") + # Issue 15875: If the filename contained special characters + # tarfile failed to correctly use a longname extended header + # which resulted in the filename being truncated. + def test_longname_special_99(self): + self._test("0" * 98 + "ä") + + def test_longname_special_100(self): + self._test("0" * 99 + "ä") + + def test_longname_special_101(self): + self._test("0" * 100 + "ä") + + def test_longname_special_1023(self): + self._test(("longnam/" * 127) + "000000ä") + + def test_longname_special_1024(self): + self._test(("longnam/" * 127) + "0000000ä") + + def test_longname_special_1025(self): + self._test(("longnam/" * 127) + "00000000ä") + class HardlinkTest(unittest.TestCase): # Test the creation of LNKTYPE (hardlink) members in an archive. @@ -1622,8 +1643,8 @@ class MiscTest(unittest.TestCase): def test_char_fields(self): - self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"), b"foo\0\0\0\0\0") - self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"), b"foo") + self.assertEqual(tarfile.stn(b"foo", 8), b"foo\0\0\0\0\0") + self.assertEqual(tarfile.stn(b"foobar", 3), b"foo") self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo") self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo")