Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(50)

Delta Between Two Patch Sets: Lib/idlelib/HyperParser.py

Issue 21686: IDLE - Test hyperparser
Left Patch Set: Created 5 years, 8 months ago
Right Patch Set: Created 5 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | Lib/idlelib/idle_test/test_hyperparser.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """ 1 """Provide advanced parsing abilities for ParenMatch and other extensions.
2 HyperParser 2
3 =========== 3 HyperParser uses PyParser. PyParser mostly gives information on the
4 This module defines the HyperParser class, which provides advanced parsing 4 proper indentation of code. HyperParser gives additional information on
5 abilities for the ParenMatch and other extensions. 5 the structure of code.
6 The HyperParser uses PyParser. PyParser is intended mostly to give information
7 on the proper indentation of code. HyperParser gives some information on the
8 structure of code, used by extensions to help the user.
9 """ 6 """
10 7
11 import string 8 import string
12 import keyword 9 import keyword
13 from idlelib import PyParse 10 from idlelib import PyParse
14 11
15 class HyperParser: 12 class HyperParser:
16 13
17 def __init__(self, editwin, index): 14 def __init__(self, editwin, index):
18 """Initialize the HyperParser to analyze the surroundings of the given 15 "To initialize, analyze the surroundings of the given index."
19 index.
20 """
21 16
22 self.editwin = editwin 17 self.editwin = editwin
23 self.text = text = editwin.text 18 self.text = text = editwin.text
24 19
25 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) 20 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
26 21
27 def index2line(index): 22 def index2line(index):
28 return int(float(index)) 23 return int(float(index))
29 lno = index2line(text.index(index)) 24 lno = index2line(text.index(index))
30 25
31 if not editwin.context_use_ps1: 26 if not editwin.context_use_ps1:
32 for context in editwin.num_context_lines: 27 for context in editwin.num_context_lines:
33 startat = max(lno - context, 1) 28 startat = max(lno - context, 1)
34 startatindex = repr(startat) + ".0" 29 startatindex = repr(startat) + ".0"
35 stopatindex = "%d.end" % lno 30 stopatindex = "%d.end" % lno
36 # We add the newline because PyParse requires a newline at end. 31 # We add the newline because PyParse requires a newline
37 # We add a space so that index won't be at end of line, so that 32 # at end. We add a space so that index won't be at end
38 # its status will be the same as the char before it, if should. 33 # of line, so that its status will be the same as the
34 # char before it, if should.
39 parser.set_str(text.get(startatindex, stopatindex)+' \n') 35 parser.set_str(text.get(startatindex, stopatindex)+' \n')
40 bod = parser.find_good_parse_start( 36 bod = parser.find_good_parse_start(
41 editwin._build_char_in_string_func(startatindex)) 37 editwin._build_char_in_string_func(startatindex))
42 if bod is not None or startat == 1: 38 if bod is not None or startat == 1:
43 break 39 break
44 parser.set_lo(bod or 0) 40 parser.set_lo(bod or 0)
45 else: 41 else:
46 r = text.tag_prevrange("console", index) 42 r = text.tag_prevrange("console", index)
47 if r: 43 if r:
48 startatindex = r[1] 44 startatindex = r[1]
49 else: 45 else:
50 startatindex = "1.0" 46 startatindex = "1.0"
51 stopatindex = "%d.end" % lno 47 stopatindex = "%d.end" % lno
52 # We add the newline because PyParse requires a newline at end. 48 # We add the newline because PyParse requires it. We add a
53 # We add a space so that index won't be at end of line, so that 49 # space so that index won't be at end of line, so that its
54 # its status will be the same as the char before it, if should. 50 # status will be the same as the char before it, if should.
55 parser.set_str(text.get(startatindex, stopatindex)+' \n') 51 parser.set_str(text.get(startatindex, stopatindex)+' \n')
56 parser.set_lo(0) 52 parser.set_lo(0)
57 53
58 # We want what the parser has, except for the last newline and space. 54 # We want what the parser has, minus the last newline and space.
59 self.rawtext = parser.str[:-2] 55 self.rawtext = parser.str[:-2]
60 # As far as I can see, parser.str preserves the statement we are in, 56 # Parser.str apparently preserves the statement we are in, so
61 # so that stopatindex can be used to synchronize the string with the 57 # that stopatindex can be used to synchronize the string with
62 # text box indices. 58 # the text box indices.
63 self.stopatindex = stopatindex 59 self.stopatindex = stopatindex
64 self.bracketing = parser.get_last_stmt_bracketing() 60 self.bracketing = parser.get_last_stmt_bracketing()
65 # find which pairs of bracketing are openers. These always correspond 61 # find which pairs of bracketing are openers. These always
66 # to a character of rawtext. 62 # correspond to a character of rawtext.
67 self.isopener = [i>0 and self.bracketing[i][1]>self.bracketing[i-1][1] 63 self.isopener = [i>0 and self.bracketing[i][1] >
64 self.bracketing[i-1][1]
68 for i in range(len(self.bracketing))] 65 for i in range(len(self.bracketing))]
69 66
70 self.set_index(index) 67 self.set_index(index)
71 68
72 def set_index(self, index): 69 def set_index(self, index):
73 """Set the index to which the functions relate. Note that it must be 70 """Set the index to which the functions relate.
74 in the same statement. 71
72 The index must be in the same statement.
75 """ 73 """
76 indexinrawtext = \ 74 indexinrawtext = (len(self.rawtext) -
77 len(self.rawtext) - len(self.text.get(index, self.stopatindex)) 75 len(self.text.get(index, self.stopatindex)))
78 if indexinrawtext < 0: 76 if indexinrawtext < 0:
79 raise ValueError("The index given is before the analyzed " 77 raise ValueError("Index %s precedes the analyzed statement"
80 "statement") 78 % index)
81 self.indexinrawtext = indexinrawtext 79 self.indexinrawtext = indexinrawtext
82 # find the rightmost bracket to which index belongs 80 # find the rightmost bracket to which index belongs
83 self.indexbracket = 0 81 self.indexbracket = 0
84 while self.indexbracket < len(self.bracketing)-1 and \ 82 while (self.indexbracket < len(self.bracketing)-1 and
85 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: 83 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
86 self.indexbracket += 1 84 self.indexbracket += 1
87 if self.indexbracket < len(self.bracketing)-1 and \ 85 if (self.indexbracket < len(self.bracketing)-1 and
88 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ 86 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
89 not self.isopener[self.indexbracket+1]: 87 not self.isopener[self.indexbracket+1]):
90 self.indexbracket += 1 88 self.indexbracket += 1
91 89
92 def is_in_string(self): 90 def is_in_string(self):
93 """Is the index given to the HyperParser is in a string?""" 91 """Is the index given to the HyperParser in a string?"""
94 # The bracket to which we belong should be an opener. 92 # The bracket to which we belong should be an opener.
95 # If it's an opener, it has to have a character. 93 # If it's an opener, it has to have a character.
96 return self.isopener[self.indexbracket] and \ 94 return (self.isopener[self.indexbracket] and
97 self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") 95 self.rawtext[self.bracketing[self.indexbracket][0]]
96 in ('"', "'"))
98 97
99 def is_in_code(self): 98 def is_in_code(self):
100 """Is the index given to the HyperParser is in a normal code?""" 99 """Is the index given to the HyperParser in normal code?"""
101 return not self.isopener[self.indexbracket] or \ 100 return (not self.isopener[self.indexbracket] or
102 self.rawtext[self.bracketing[self.indexbracket][0]] not in \ 101 self.rawtext[self.bracketing[self.indexbracket][0]]
103 ('#', '"', "'") 102 not in ('#', '"', "'"))
104 103
105 def get_surrounding_brackets(self, openers='([{', mustclose=False): 104 def get_surrounding_brackets(self, openers='([{', mustclose=False):
106 """If the index given to the HyperParser is surrounded by a bracket 105 """Return bracket indexes or None.
107 defined in openers (or at least has one before it), return the 106
108 indices of the opening bracket and the closing bracket (or the 107 If the index given to the HyperParser is surrounded by a
109 end of line, whichever comes first). 108 bracket defined in openers (or at least has one before it),
110 If it is not surrounded by brackets, or the end of line comes before 109 return the indices of the opening bracket and the closing
111 the closing bracket and mustclose is True, returns None. 110 bracket (or the end of line, whichever comes first).
111
112 If it is not surrounded by brackets, or the end of line comes
113 before the closing bracket and mustclose is True, returns None.
112 """ 114 """
115
113 bracketinglevel = self.bracketing[self.indexbracket][1] 116 bracketinglevel = self.bracketing[self.indexbracket][1]
114 before = self.indexbracket 117 before = self.indexbracket
115 while not self.isopener[before] or \ 118 while (not self.isopener[before] or
116 self.rawtext[self.bracketing[before][0]] not in openers or \ 119 self.rawtext[self.bracketing[before][0]] not in openers or
117 self.bracketing[before][1] > bracketinglevel: 120 self.bracketing[before][1] > bracketinglevel):
118 before -= 1 121 before -= 1
119 if before < 0: 122 if before < 0:
120 return None 123 return None
121 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) 124 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
122 after = self.indexbracket + 1 125 after = self.indexbracket + 1
123 while after < len(self.bracketing) and \ 126 while (after < len(self.bracketing) and
124 self.bracketing[after][1] >= bracketinglevel: 127 self.bracketing[after][1] >= bracketinglevel):
125 after += 1 128 after += 1
126 129
127 beforeindex = self.text.index("%s-%dc" % 130 beforeindex = self.text.index("%s-%dc" %
128 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) 131 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
129 if after >= len(self.bracketing) or \ 132 if (after >= len(self.bracketing) or
130 self.bracketing[after][0] > len(self.rawtext): 133 self.bracketing[after][0] > len(self.rawtext)):
131 if mustclose: 134 if mustclose:
132 return None 135 return None
133 afterindex = self.stopatindex 136 afterindex = self.stopatindex
134 else: 137 else:
135 # We are after a real char, so it is a ')' and we give the index 138 # We are after a real char, so it is a ')' and we give the
136 # before it. 139 # index before it.
137 afterindex = self.text.index("%s-%dc" % 140 afterindex = self.text.index(
138 (self.stopatindex, 141 "%s-%dc" % (self.stopatindex,
139 len(self.rawtext)-(self.bracketing[after][0]-1))) 142 len(self.rawtext)-(self.bracketing[after][0]-1)))
140 143
141 return beforeindex, afterindex 144 return beforeindex, afterindex
142 145
143 # This string includes all chars that may be in a white space 146 # Ascii chars that may be in a white space
144 _whitespace_chars = " \t\n\\" 147 _whitespace_chars = " \t\n\\"
145 # This string includes all chars that may be in an identifier 148 # Ascii chars that may be in an identifier
146 _id_chars = string.ascii_letters + string.digits + "_" 149 _id_chars = string.ascii_letters + string.digits + "_"
147 # This string includes all chars that may be the 1st char of an identifier 150 # Ascii chars that may be the first char of an identifier
148 _id_first_chars = string.ascii_letters + "_" 151 _id_first_chars = string.ascii_letters + "_"
149 152
150 # Given a string and pos, return the number of chars in the identifier 153 # Given a string and pos, return the number of chars in the
151 # which ends at pos, or 0 if there is no such one. Saved words are not 154 # identifier which ends at pos, or 0 if there is no such one. Saved
152 # identifiers. 155 # words are not identifiers.
153 def _eat_identifier(self, str, limit, pos): 156 def _eat_identifier(self, str, limit, pos):
154 i = pos 157 i = pos
155 while i > limit and str[i-1] in self._id_chars: 158 while i > limit and str[i-1] in self._id_chars:
156 i -= 1 159 i -= 1
157 if i < pos and (str[i] not in self._id_first_chars or \ 160 if (i < pos and (str[i] not in self._id_first_chars or
158 keyword.iskeyword(str[i:pos])): 161 (keyword.iskeyword(str[i:pos]) and
162 str[i:pos] not in {'None', 'False', 'True'}))):
159 i = pos 163 i = pos
160 return pos - i 164 return pos - i
161 165
162 def get_expression(self): 166 def get_expression(self):
163 """Return a string with the Python expression which ends at the given 167 """Return a string with the Python expression which ends at the
164 index, which is empty if there is no real one. 168 given index, which is empty if there is no real one.
165 """ 169 """
166 if not self.is_in_code(): 170 if not self.is_in_code():
167 raise ValueError("get_expression should only be called if index "\ 171 raise ValueError("get_expression should only be called"
168 "is inside a code.") 172 "if index is inside a code.")
169 173
170 rawtext = self.rawtext 174 rawtext = self.rawtext
171 bracketing = self.bracketing 175 bracketing = self.bracketing
172 176
173 brck_index = self.indexbracket 177 brck_index = self.indexbracket
174 brck_limit = bracketing[brck_index][0] 178 brck_limit = bracketing[brck_index][0]
175 pos = self.indexinrawtext 179 pos = self.indexinrawtext
176 180
177 last_identifier_pos = pos 181 last_identifier_pos = pos
178 postdot_phase = True 182 postdot_phase = True
179 183
180 while 1: 184 while 1:
181 # Eat whitespaces, comments, and if postdot_phase is False - a dot 185 # Eat whitespaces, comments, and if postdot_phase is False - a dot
182 while 1: 186 while 1:
183 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: 187 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
184 # Eat a whitespace 188 # Eat a whitespace
185 pos -= 1 189 pos -= 1
186 elif not postdot_phase and \ 190 elif (not postdot_phase and
187 pos > brck_limit and rawtext[pos-1] == '.': 191 pos > brck_limit and rawtext[pos-1] == '.'):
188 # Eat a dot 192 # Eat a dot
189 pos -= 1 193 pos -= 1
190 postdot_phase = True 194 postdot_phase = True
191 # The next line will fail if we are *inside* a comment, but we 195 # The next line will fail if we are *inside* a comment,
192 # shouldn't be. 196 # but we shouldn't be.
193 elif pos == brck_limit and brck_index > 0 and \ 197 elif (pos == brck_limit and brck_index > 0 and
194 rawtext[bracketing[brck_index-1][0]] == '#': 198 rawtext[bracketing[brck_index-1][0]] == '#'):
195 # Eat a comment 199 # Eat a comment
196 brck_index -= 2 200 brck_index -= 2
197 brck_limit = bracketing[brck_index][0] 201 brck_limit = bracketing[brck_index][0]
198 pos = bracketing[brck_index+1][0] 202 pos = bracketing[brck_index+1][0]
199 else: 203 else:
200 # If we didn't eat anything, quit. 204 # If we didn't eat anything, quit.
201 break 205 break
202 206
203 if not postdot_phase: 207 if not postdot_phase:
204 # We didn't find a dot, so the expression end at the last 208 # We didn't find a dot, so the expression end at the
205 # identifier pos. 209 # last identifier pos.
206 break 210 break
207 211
208 ret = self._eat_identifier(rawtext, brck_limit, pos) 212 ret = self._eat_identifier(rawtext, brck_limit, pos)
209 if ret: 213 if ret:
210 # There is an identifier to eat 214 # There is an identifier to eat
211 pos = pos - ret 215 pos = pos - ret
212 last_identifier_pos = pos 216 last_identifier_pos = pos
213 # Now, in order to continue the search, we must find a dot. 217 # Now, to continue the search, we must find a dot.
214 postdot_phase = False 218 postdot_phase = False
215 # (the loop continues now) 219 # (the loop continues now)
216 220
217 elif pos == brck_limit: 221 elif pos == brck_limit:
218 # We are at a bracketing limit. If it is a closing bracket, 222 # We are at a bracketing limit. If it is a closing
219 # eat the bracket, otherwise, stop the search. 223 # bracket, eat the bracket, otherwise, stop the search.
220 level = bracketing[brck_index][1] 224 level = bracketing[brck_index][1]
221 while brck_index > 0 and bracketing[brck_index-1][1] > level: 225 while brck_index > 0 and bracketing[brck_index-1][1] > level:
222 brck_index -= 1 226 brck_index -= 1
223 if bracketing[brck_index][0] == brck_limit: 227 if bracketing[brck_index][0] == brck_limit:
224 # We were not at the end of a closing bracket 228 # We were not at the end of a closing bracket
225 break 229 break
226 pos = bracketing[brck_index][0] 230 pos = bracketing[brck_index][0]
227 brck_index -= 1 231 brck_index -= 1
228 brck_limit = bracketing[brck_index][0] 232 brck_limit = bracketing[brck_index][0]
229 last_identifier_pos = pos 233 last_identifier_pos = pos
230 if rawtext[pos] in "([": 234 if rawtext[pos] in "([":
231 # [] and () may be used after an identifier, so we 235 # [] and () may be used after an identifier, so we
232 # continue. postdot_phase is True, so we don't allow a dot. 236 # continue. postdot_phase is True, so we don't allow a dot.
233 pass 237 pass
234 else: 238 else:
235 # We can't continue after other types of brackets 239 # We can't continue after other types of brackets
236 if rawtext[pos] in "'\"": 240 if rawtext[pos] in "'\"":
237 # Scan a string prefix 241 # Scan a string prefix
238 while pos > 0 and rawtext[pos - 1] in "rRbBuU": 242 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
239 pos -= 1 243 pos -= 1
240 last_identifier_pos = pos 244 last_identifier_pos = pos
241 break 245 break
242 246
243 else: 247 else:
244 # We've found an operator or something. 248 # We've found an operator or something.
245 break 249 break
246 250
247 return rawtext[last_identifier_pos:self.indexinrawtext] 251 return rawtext[last_identifier_pos:self.indexinrawtext]
248 252
253
249 if __name__ == '__main__': 254 if __name__ == '__main__':
250 import unittest 255 import unittest
251 unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2) 256 unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+