diff -r a88310d86455 Lib/encodings/idna.py --- a/Lib/encodings/idna.py Sun Apr 14 19:22:47 2013 +0200 +++ b/Lib/encodings/idna.py Mon Apr 22 21:04:13 2013 -0400 @@ -59,6 +59,31 @@ return label +def validate_max_length(label): + """ + :param label: ascii encoded + """ + if len(label) >= 64: + raise UnicodeError("label %r is too long" % label.decode()) + +def validate_min_length(label, labels=None): + """ + :param label: ascii encoded + :param labels: ascii encoded + """ + if not label: + value = ' in %r' % labels.decode() if labels else '' + raise UnicodeError("empty label%r" % value) + +def validate_label(label, labels=None): + """ + labels supplied must be of size 0 < label < 64 + :param label: ascii encoded + :param labels: ascii encoded + """ + validate_max_length(label) + validate_min_length(label, labels) + def ToASCII(label): try: # Step 1: try ASCII @@ -68,9 +93,8 @@ else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + validate_label(label) + return label # Step 2: nameprep label = nameprep(label) @@ -83,9 +107,8 @@ pass else: # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + validate_label(label) + return label # Step 5: Check ACE prefix if label.startswith(sace_prefix): @@ -98,9 +121,8 @@ label = ace_prefix + label # Step 8: Check size - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + validate_max_length(label) + return label def ToUnicode(label): # Step 1: Check for ASCII @@ -161,10 +183,10 @@ # ASCII name: fast path labels = result.split(b'.') for label in labels[:-1]: - if not (0 < len(label) < 64): - raise UnicodeError("label empty or too long") - if len(labels[-1]) >= 64: - raise UnicodeError("label too long") + validate_label(label, result) + + validate_max_length(labels[-1]) + return result, len(input) result = bytearray() diff -r a88310d86455 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sun Apr 14 19:22:47 2013 +0200 +++ b/Lib/test/test_unicode.py Mon Apr 22 21:04:13 2013 -0400 @@ -10,9 +10,15 @@ import itertools import struct import sys + import unittest +from encodings.idna import \ + validate_max_length, \ + validate_min_length, \ + ToASCII import warnings from test import support, string_tests +from unittest import mock # Error handling (bad decoder return) def search_function(encoding): @@ -1673,6 +1679,98 @@ # Test whether trailing dot is preserved self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.") + def test_max_length_validation(self): + expected = "Z" * 64 + label = expected.encode('ascii') + with self.assertRaisesRegexp(UnicodeError, expected): + validate_max_length(label) + + def test_min_length_validation(self): + label = "".encode('ascii') + labels = "abcdefg".encode('ascii') + with self.assertRaisesRegexp(UnicodeError, " in 'abcdefg'"): + validate_min_length(label, labels) + + with self.assertRaisesRegexp(UnicodeError, "empty label"): + validate_min_length(label) + + def test_min_codecs_error_messages(self): + # Test Empty + self.assertRaisesRegexp( + UnicodeError, + '1..com', + '1..com'.encode, + 'idna') + + def test_max_codecs_errors_messages(self): + # Test label greater then 64 + label = "A" * 64 + value = "first." + label + ".last" + self.assertRaisesRegexp( + UnicodeError, + label, + value.encode, + 'idna') + # Test last label length 64 + value = "first.second." + label + self.assertRaisesRegexp( + UnicodeError, + label, + value.encode, + 'idna') + # Test last label length greater then 64 + label += "A" + value += "A" + self.assertRaisesRegexp( + UnicodeError, + label, + value.encode, + 'idna') + + def test_min_label_encode_ascii(self): + with self.assertRaisesRegexp(UnicodeError, "empty label"): + ToASCII('') + + def test_max_label_encode_ascii(self): + label = "Z" * 64 + with self.assertRaisesRegexp(UnicodeError, label): + ToASCII(label) + + @mock.patch('encodings.idna.nameprep') + def test_escaped_min_label(self, nameprep): + label = mock.Mock() + label.encode.side_effect = UnicodeError + nameprep.return_value = '' + with self.assertRaisesRegexp(UnicodeError, "empty label"): + ToASCII(label) + + @mock.patch('encodings.idna.nameprep') + def test_escaped_max_label(self, nameprep): + mock_label = mock.Mock() + mock_label.encode.side_effect = UnicodeError + label = "Z" * 64 + nameprep.return_value = label + with self.assertRaisesRegexp(UnicodeError, label): + ToASCII(mock_label) + + @mock.patch('encodings.idna.nameprep') + def test_escape_max_punycode_ascii(self, nameprep): + mock_label = mock.Mock() + mock_label.encode.side_effect = UnicodeError + name_space_label = mock.Mock() + expected_label = "Z" * 64 + expected_label = expected_label.encode('ascii') + def encode(value): + if value == "ascii": + raise UnicodeError + return expected_label + + name_space_label.encode = encode + name_space_label.startswith.return_value = False + nameprep.return_value = name_space_label + with self.assertRaisesRegexp(UnicodeError, "Z" * 64): + ToASCII(mock_label) + def test_codecs_errors(self): # Error handling (encoding) self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')