Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(19)

Side by Side Diff: Lib/encodings/idna.py

Issue 9682: socket.create_connection error message for domain subpart with invalid length is very confusing
Patch Set: Created 6 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | Lib/test/test_unicode.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) 1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
2 2
3 import stringprep, re, codecs 3 import stringprep, re, codecs
4 from unicodedata import ucd_3_2_0 as unicodedata 4 from unicodedata import ucd_3_2_0 as unicodedata
5 5
6 # IDNA section 3.1 6 # IDNA section 3.1
7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") 7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
8 8
9 # IDNA section 5 9 # IDNA section 5
10 ace_prefix = b"xn--" 10 ace_prefix = b"xn--"
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 52
53 # 3) If a string contains any RandALCat character, a 53 # 3) If a string contains any RandALCat character, a
54 # RandALCat character MUST be the first character of the 54 # RandALCat character MUST be the first character of the
55 # string, and a RandALCat character MUST be the last 55 # string, and a RandALCat character MUST be the last
56 # character of the string. 56 # character of the string.
57 if not RandAL[0] or not RandAL[-1]: 57 if not RandAL[0] or not RandAL[-1]:
58 raise UnicodeError("Violation of BIDI requirement 3") 58 raise UnicodeError("Violation of BIDI requirement 3")
59 59
60 return label 60 return label
61 61
62 def _raise_max_length_error(label):
63 raise UnicodeError("label %r is too long" % label.decode())
64
65 def _raise_min_length_error(label, labels=None):
66 value = ' in %r' % labels.decode() if labels else ''
67 raise UnicodeError("empty label%r" % value)
68
62 def ToASCII(label): 69 def ToASCII(label):
70
63 try: 71 try:
64 # Step 1: try ASCII 72 # Step 1: try ASCII
65 label = label.encode("ascii") 73 label = label.encode("ascii")
66 except UnicodeError: 74 except UnicodeError:
67 pass 75 pass
68 else: 76 else:
69 # Skip to step 3: UseSTD3ASCIIRules is false, so 77 # Skip to step 3: UseSTD3ASCIIRules is false, so
70 # Skip to step 8. 78 # Skip to step 8.
71 if 0 < len(label) < 64: 79 if not label:
72 return label 80 _raise_min_length_error(label)
73 raise UnicodeError("label empty or too long") 81 if len(label) >=64 :
82 _raise_max_length_error(label)
83 return label
74 84
75 # Step 2: nameprep 85 # Step 2: nameprep
76 label = nameprep(label) 86 label = nameprep(label)
77 87
78 # Step 3: UseSTD3ASCIIRules is false 88 # Step 3: UseSTD3ASCIIRules is false
79 # Step 4: try ASCII 89 # Step 4: try ASCII
80 try: 90 try:
81 label = label.encode("ascii") 91 label = label.encode("ascii")
82 except UnicodeError: 92 except UnicodeError:
83 pass 93 pass
84 else: 94 else:
85 # Skip to step 8. 95 # Skip to step 8.
86 if 0 < len(label) < 64: 96 if not label:
87 return label 97 _raise_min_length_error(label)
88 raise UnicodeError("label empty or too long") 98 if len(label) >= 64:
99 _raise_max_length_error(label)
100 return label
89 101
90 # Step 5: Check ACE prefix 102 # Step 5: Check ACE prefix
91 if label.startswith(sace_prefix): 103 if label.startswith(sace_prefix):
92 raise UnicodeError("Label starts with ACE prefix") 104 raise UnicodeError("Label starts with ACE prefix")
93 105
94 # Step 6: Encode with PUNYCODE 106 # Step 6: Encode with PUNYCODE
95 label = label.encode("punycode") 107 label = label.encode("punycode")
96 108
97 # Step 7: Prepend ACE prefix 109 # Step 7: Prepend ACE prefix
98 label = ace_prefix + label 110 label = ace_prefix + label
99 111
100 # Step 8: Check size 112 # Step 8: Check size
101 if 0 < len(label) < 64: 113 if len(label) >= 64:
102 return label 114 _raise_max_length_error(label)
103 raise UnicodeError("label empty or too long") 115 return label
104 116
105 def ToUnicode(label): 117 def ToUnicode(label):
106 # Step 1: Check for ASCII 118 # Step 1: Check for ASCII
107 if isinstance(label, bytes): 119 if isinstance(label, bytes):
108 pure_ascii = True 120 pure_ascii = True
109 else: 121 else:
110 try: 122 try:
111 label = label.encode("ascii") 123 label = label.encode("ascii")
112 pure_ascii = True 124 pure_ascii = True
113 except UnicodeError: 125 except UnicodeError:
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 return b'', 0 166 return b'', 0
155 167
156 try: 168 try:
157 result = input.encode('ascii') 169 result = input.encode('ascii')
158 except UnicodeEncodeError: 170 except UnicodeEncodeError:
159 pass 171 pass
160 else: 172 else:
161 # ASCII name: fast path 173 # ASCII name: fast path
162 labels = result.split(b'.') 174 labels = result.split(b'.')
163 for label in labels[:-1]: 175 for label in labels[:-1]:
164 if not (0 < len(label) < 64): 176 if not label:
165 raise UnicodeError("label empty or too long") 177 _raise_min_length_error(label, result)
178 if len(label) >= 64:
179 _raise_max_length_error(label)
166 if len(labels[-1]) >= 64: 180 if len(labels[-1]) >= 64:
167 raise UnicodeError("label too long") 181 _raise_max_length_error(labels[-1])
182
168 return result, len(input) 183 return result, len(input)
169 184
170 result = bytearray() 185 result = bytearray()
171 labels = dots.split(input) 186 labels = dots.split(input)
172 if labels and not labels[-1]: 187 if labels and not labels[-1]:
173 trailing_dot = b'.' 188 trailing_dot = b'.'
174 del labels[-1] 189 del labels[-1]
175 else: 190 else:
176 trailing_dot = b'' 191 trailing_dot = b''
177 for label in labels: 192 for label in labels:
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
300 def getregentry(): 315 def getregentry():
301 return codecs.CodecInfo( 316 return codecs.CodecInfo(
302 name='idna', 317 name='idna',
303 encode=Codec().encode, 318 encode=Codec().encode,
304 decode=Codec().decode, 319 decode=Codec().decode,
305 incrementalencoder=IncrementalEncoder, 320 incrementalencoder=IncrementalEncoder,
306 incrementaldecoder=IncrementalDecoder, 321 incrementaldecoder=IncrementalDecoder,
307 streamwriter=StreamWriter, 322 streamwriter=StreamWriter,
308 streamreader=StreamReader, 323 streamreader=StreamReader,
309 ) 324 )
OLDNEW
« no previous file with comments | « no previous file | Lib/test/test_unicode.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+