Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(15)

Delta Between Two Patch Sets: Lib/encodings/idna.py

Issue 9682: socket.create_connection error message for domain subpart with invalid length is very confusing
Left Patch Set: Created 6 years, 10 months ago
Right Patch Set: Created 6 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | Lib/test/test_unicode.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) 1 # This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
2 2
3 import stringprep, re, codecs 3 import stringprep, re, codecs
4 from unicodedata import ucd_3_2_0 as unicodedata 4 from unicodedata import ucd_3_2_0 as unicodedata
5 5
6 # IDNA section 3.1 6 # IDNA section 3.1
7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]") 7 dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
8 8
9 # IDNA section 5 9 # IDNA section 5
10 ace_prefix = b"xn--" 10 ace_prefix = b"xn--"
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 52
53 # 3) If a string contains any RandALCat character, a 53 # 3) If a string contains any RandALCat character, a
54 # RandALCat character MUST be the first character of the 54 # RandALCat character MUST be the first character of the
55 # string, and a RandALCat character MUST be the last 55 # string, and a RandALCat character MUST be the last
56 # character of the string. 56 # character of the string.
57 if not RandAL[0] or not RandAL[-1]: 57 if not RandAL[0] or not RandAL[-1]:
58 raise UnicodeError("Violation of BIDI requirement 3") 58 raise UnicodeError("Violation of BIDI requirement 3")
59 59
60 return label 60 return label
61 61
62 def validate_max_length(label): 62 def _raise_max_length_error(label):
63 """ 63 raise UnicodeError("label %r is too long" % label.decode())
64 :param label: ascii encoded 64
r.david.murray 2013/04/27 03:04:44 We don't use this style of docstring in the stdlib
mmilkin 2013/04/27 16:04:56 good to know will fix On 2013/04/27 03:04:44, r.da
65 """ 65 def _raise_min_length_error(label, labels=None):
66 if len(label) >= 64: 66 value = ' in %r' % labels.decode() if labels else ''
67 raise UnicodeError("label %r is too long" % label.decode()) 67 raise UnicodeError("empty label%r" % value)
68
69 def validate_min_length(label, labels=None):
70 """
71 :param label: ascii encoded
72 :param labels: ascii encoded
73 """
74 if not label:
75 value = ' in %r' % labels.decode() if labels else ''
76 raise UnicodeError("empty label%r" % value)
77
78 def validate_label(label, labels=None):
79 """
80 labels supplied must be of size 0 < label < 64
81 :param label: ascii encoded
82 :param labels: ascii encoded
83 """
84 validate_max_length(label)
85 validate_min_length(label, labels)
86 68
87 def ToASCII(label): 69 def ToASCII(label):
70
88 try: 71 try:
89 # Step 1: try ASCII 72 # Step 1: try ASCII
90 label = label.encode("ascii") 73 label = label.encode("ascii")
91 except UnicodeError: 74 except UnicodeError:
92 pass 75 pass
93 else: 76 else:
94 # Skip to step 3: UseSTD3ASCIIRules is false, so 77 # Skip to step 3: UseSTD3ASCIIRules is false, so
95 # Skip to step 8. 78 # Skip to step 8.
96 validate_label(label) 79 if not label:
r.david.murray 2013/04/27 03:04:44 I may get overruled on this :), but there is a que
mmilkin 2013/04/27 16:06:04 Fair enough i can break the function down into jus
80 _raise_min_length_error(label)
81 if len(label) >=64 :
82 _raise_max_length_error(label)
97 return label 83 return label
98 84
99 # Step 2: nameprep 85 # Step 2: nameprep
100 label = nameprep(label) 86 label = nameprep(label)
101 87
102 # Step 3: UseSTD3ASCIIRules is false 88 # Step 3: UseSTD3ASCIIRules is false
103 # Step 4: try ASCII 89 # Step 4: try ASCII
104 try: 90 try:
105 label = label.encode("ascii") 91 label = label.encode("ascii")
106 except UnicodeError: 92 except UnicodeError:
107 pass 93 pass
108 else: 94 else:
109 # Skip to step 8. 95 # Skip to step 8.
110 validate_label(label) 96 if not label:
97 _raise_min_length_error(label)
98 if len(label) >= 64:
99 _raise_max_length_error(label)
111 return label 100 return label
112 101
113 # Step 5: Check ACE prefix 102 # Step 5: Check ACE prefix
114 if label.startswith(sace_prefix): 103 if label.startswith(sace_prefix):
115 raise UnicodeError("Label starts with ACE prefix") 104 raise UnicodeError("Label starts with ACE prefix")
116 105
117 # Step 6: Encode with PUNYCODE 106 # Step 6: Encode with PUNYCODE
118 label = label.encode("punycode") 107 label = label.encode("punycode")
119 108
120 # Step 7: Prepend ACE prefix 109 # Step 7: Prepend ACE prefix
121 label = ace_prefix + label 110 label = ace_prefix + label
122 111
123 # Step 8: Check size 112 # Step 8: Check size
124 validate_max_length(label) 113 if len(label) >= 64:
114 _raise_max_length_error(label)
125 return label 115 return label
126 116
127 def ToUnicode(label): 117 def ToUnicode(label):
128 # Step 1: Check for ASCII 118 # Step 1: Check for ASCII
129 if isinstance(label, bytes): 119 if isinstance(label, bytes):
130 pure_ascii = True 120 pure_ascii = True
131 else: 121 else:
132 try: 122 try:
133 label = label.encode("ascii") 123 label = label.encode("ascii")
134 pure_ascii = True 124 pure_ascii = True
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 return b'', 0 166 return b'', 0
177 167
178 try: 168 try:
179 result = input.encode('ascii') 169 result = input.encode('ascii')
180 except UnicodeEncodeError: 170 except UnicodeEncodeError:
181 pass 171 pass
182 else: 172 else:
183 # ASCII name: fast path 173 # ASCII name: fast path
184 labels = result.split(b'.') 174 labels = result.split(b'.')
185 for label in labels[:-1]: 175 for label in labels[:-1]:
186 validate_label(label, result) 176 if not label:
187 177 _raise_min_length_error(label, result)
188 validate_max_length(labels[-1]) 178 if len(label) >= 64:
179 _raise_max_length_error(label)
180 if len(labels[-1]) >= 64:
181 _raise_max_length_error(labels[-1])
189 182
190 return result, len(input) 183 return result, len(input)
191 184
192 result = bytearray() 185 result = bytearray()
193 labels = dots.split(input) 186 labels = dots.split(input)
194 if labels and not labels[-1]: 187 if labels and not labels[-1]:
195 trailing_dot = b'.' 188 trailing_dot = b'.'
196 del labels[-1] 189 del labels[-1]
197 else: 190 else:
198 trailing_dot = b'' 191 trailing_dot = b''
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
322 def getregentry(): 315 def getregentry():
323 return codecs.CodecInfo( 316 return codecs.CodecInfo(
324 name='idna', 317 name='idna',
325 encode=Codec().encode, 318 encode=Codec().encode,
326 decode=Codec().decode, 319 decode=Codec().decode,
327 incrementalencoder=IncrementalEncoder, 320 incrementalencoder=IncrementalEncoder,
328 incrementaldecoder=IncrementalDecoder, 321 incrementaldecoder=IncrementalDecoder,
329 streamwriter=StreamWriter, 322 streamwriter=StreamWriter,
330 streamreader=StreamReader, 323 streamreader=StreamReader,
331 ) 324 )
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+