Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(3)

Side by Side Diff: Lib/encodings/idna.py

Issue 9682: socket.create_connection error message for domain subpart with invalid length is very confusing
Patch Set: Created 6 years, 10 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | Lib/test/test_unicode.py » ('j') | Lib/test/test_unicode.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) 1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
2 2
3 import stringprep, re, codecs 3 import stringprep, re, codecs
4 from unicodedata import ucd_3_2_0 as unicodedata 4 from unicodedata import ucd_3_2_0 as unicodedata
5 5
6 # IDNA section 3.1 6 # IDNA section 3.1
7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") 7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
8 8
9 # IDNA section 5 9 # IDNA section 5
10 ace_prefix = b"xn--" 10 ace_prefix = b"xn--"
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 raise UnicodeError("Violation of BIDI requirement 2") 51 raise UnicodeError("Violation of BIDI requirement 2")
52 52
53 # 3) If a string contains any RandALCat character, a 53 # 3) If a string contains any RandALCat character, a
54 # RandALCat character MUST be the first character of the 54 # RandALCat character MUST be the first character of the
55 # string, and a RandALCat character MUST be the last 55 # string, and a RandALCat character MUST be the last
56 # character of the string. 56 # character of the string.
57 if not RandAL[0] or not RandAL[-1]: 57 if not RandAL[0] or not RandAL[-1]:
58 raise UnicodeError("Violation of BIDI requirement 3") 58 raise UnicodeError("Violation of BIDI requirement 3")
59 59
60 return label 60 return label
61
62 def validate_max_length(label):
63 """
64 :param label: ascii encoded
r.david.murray 2013/04/27 03:04:44 We don't use this style of docstring in the stdlib
mmilkin 2013/04/27 16:04:56 good to know will fix On 2013/04/27 03:04:44, r.da
65 """
66 if len(label) >= 64:
67 raise UnicodeError("label %r is too long" % label.decode())
68
69 def validate_min_length(label, labels=None):
70 """
71 :param label: ascii encoded
72 :param labels: ascii encoded
73 """
74 if not label:
75 value = ' in %r' % labels.decode() if labels else ''
76 raise UnicodeError("empty label%r" % value)
77
78 def validate_label(label, labels=None):
79 """
80 labels supplied must be of size 0 < label < 64
81 :param label: ascii encoded
82 :param labels: ascii encoded
83 """
84 validate_max_length(label)
85 validate_min_length(label, labels)
61 86
62 def ToASCII(label): 87 def ToASCII(label):
63 try: 88 try:
64 # Step 1: try ASCII 89 # Step 1: try ASCII
65 label = label.encode("ascii") 90 label = label.encode("ascii")
66 except UnicodeError: 91 except UnicodeError:
67 pass 92 pass
68 else: 93 else:
69 # Skip to step 3: UseSTD3ASCIIRules is false, so 94 # Skip to step 3: UseSTD3ASCIIRules is false, so
70 # Skip to step 8. 95 # Skip to step 8.
71 if 0 < len(label) < 64: 96 validate_label(label)
r.david.murray 2013/04/27 03:04:44 I may get overruled on this :), but there is a que
mmilkin 2013/04/27 16:06:04 Fair enough i can break the function down into jus
72 return label 97 return label
73 raise UnicodeError("label empty or too long")
74 98
75 # Step 2: nameprep 99 # Step 2: nameprep
76 label = nameprep(label) 100 label = nameprep(label)
77 101
78 # Step 3: UseSTD3ASCIIRules is false 102 # Step 3: UseSTD3ASCIIRules is false
79 # Step 4: try ASCII 103 # Step 4: try ASCII
80 try: 104 try:
81 label = label.encode("ascii") 105 label = label.encode("ascii")
82 except UnicodeError: 106 except UnicodeError:
83 pass 107 pass
84 else: 108 else:
85 # Skip to step 8. 109 # Skip to step 8.
86 if 0 < len(label) < 64: 110 validate_label(label)
87 return label 111 return label
88 raise UnicodeError("label empty or too long")
89 112
90 # Step 5: Check ACE prefix 113 # Step 5: Check ACE prefix
91 if label.startswith(sace_prefix): 114 if label.startswith(sace_prefix):
92 raise UnicodeError("Label starts with ACE prefix") 115 raise UnicodeError("Label starts with ACE prefix")
93 116
94 # Step 6: Encode with PUNYCODE 117 # Step 6: Encode with PUNYCODE
95 label = label.encode("punycode") 118 label = label.encode("punycode")
96 119
97 # Step 7: Prepend ACE prefix 120 # Step 7: Prepend ACE prefix
98 label = ace_prefix + label 121 label = ace_prefix + label
99 122
100 # Step 8: Check size 123 # Step 8: Check size
101 if 0 < len(label) < 64: 124 validate_max_length(label)
102 return label 125 return label
103 raise UnicodeError("label empty or too long")
104 126
105 def ToUnicode(label): 127 def ToUnicode(label):
106 # Step 1: Check for ASCII 128 # Step 1: Check for ASCII
107 if isinstance(label, bytes): 129 if isinstance(label, bytes):
108 pure_ascii = True 130 pure_ascii = True
109 else: 131 else:
110 try: 132 try:
111 label = label.encode("ascii") 133 label = label.encode("ascii")
112 pure_ascii = True 134 pure_ascii = True
113 except UnicodeError: 135 except UnicodeError:
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 return b'', 0 176 return b'', 0
155 177
156 try: 178 try:
157 result = input.encode('ascii') 179 result = input.encode('ascii')
158 except UnicodeEncodeError: 180 except UnicodeEncodeError:
159 pass 181 pass
160 else: 182 else:
161 # ASCII name: fast path 183 # ASCII name: fast path
162 labels = result.split(b'.') 184 labels = result.split(b'.')
163 for label in labels[:-1]: 185 for label in labels[:-1]:
164 if not (0 < len(label) < 64): 186 validate_label(label, result)
165 raise UnicodeError("label empty or too long") 187
166 if len(labels[-1]) >= 64: 188 validate_max_length(labels[-1])
167 raise UnicodeError("label too long") 189
168 return result, len(input) 190 return result, len(input)
169 191
170 result = bytearray() 192 result = bytearray()
171 labels = dots.split(input) 193 labels = dots.split(input)
172 if labels and not labels[-1]: 194 if labels and not labels[-1]:
173 trailing_dot = b'.' 195 trailing_dot = b'.'
174 del labels[-1] 196 del labels[-1]
175 else: 197 else:
176 trailing_dot = b'' 198 trailing_dot = b''
177 for label in labels: 199 for label in labels:
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
300 def getregentry(): 322 def getregentry():
301 return codecs.CodecInfo( 323 return codecs.CodecInfo(
302 name='idna', 324 name='idna',
303 encode=Codec().encode, 325 encode=Codec().encode,
304 decode=Codec().decode, 326 decode=Codec().decode,
305 incrementalencoder=IncrementalEncoder, 327 incrementalencoder=IncrementalEncoder,
306 incrementaldecoder=IncrementalDecoder, 328 incrementaldecoder=IncrementalDecoder,
307 streamwriter=StreamWriter, 329 streamwriter=StreamWriter,
308 streamreader=StreamReader, 330 streamreader=StreamReader,
309 ) 331 )
OLDNEW
« no previous file with comments | « no previous file | Lib/test/test_unicode.py » ('j') | Lib/test/test_unicode.py » ('J')

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+