|
1 """ |
|
2 HyperParser |
|
3 =========== |
|
4 This module defines the HyperParser class, which provides advanced parsing |
|
5 abilities for the ParenMatch and other extensions. |
|
6 The HyperParser uses PyParser. PyParser is intended mostly to give information |
|
7 on the proper indentation of code. HyperParser gives some information on the |
|
8 structure of code, used by extensions to help the user. |
|
9 """ |
|
10 |
|
11 import string |
|
12 import keyword |
|
13 import PyParse |
|
14 |
|
15 class HyperParser: |
|
16 |
|
17 def __init__(self, editwin, index): |
|
18 """Initialize the HyperParser to analyze the surroundings of the given |
|
19 index. |
|
20 """ |
|
21 |
|
22 self.editwin = editwin |
|
23 self.text = text = editwin.text |
|
24 |
|
25 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) |
|
26 |
|
27 def index2line(index): |
|
28 return int(float(index)) |
|
29 lno = index2line(text.index(index)) |
|
30 |
|
31 if not editwin.context_use_ps1: |
|
32 for context in editwin.num_context_lines: |
|
33 startat = max(lno - context, 1) |
|
34 startatindex = `startat` + ".0" |
|
35 stopatindex = "%d.end" % lno |
|
36 # We add the newline because PyParse requires a newline at end. |
|
37 # We add a space so that index won't be at end of line, so that |
|
38 # its status will be the same as the char before it, if should. |
|
39 parser.set_str(text.get(startatindex, stopatindex)+' \n') |
|
40 bod = parser.find_good_parse_start( |
|
41 editwin._build_char_in_string_func(startatindex)) |
|
42 if bod is not None or startat == 1: |
|
43 break |
|
44 parser.set_lo(bod or 0) |
|
45 else: |
|
46 r = text.tag_prevrange("console", index) |
|
47 if r: |
|
48 startatindex = r[1] |
|
49 else: |
|
50 startatindex = "1.0" |
|
51 stopatindex = "%d.end" % lno |
|
52 # We add the newline because PyParse requires a newline at end. |
|
53 # We add a space so that index won't be at end of line, so that |
|
54 # its status will be the same as the char before it, if should. |
|
55 parser.set_str(text.get(startatindex, stopatindex)+' \n') |
|
56 parser.set_lo(0) |
|
57 |
|
58 # We want what the parser has, except for the last newline and space. |
|
59 self.rawtext = parser.str[:-2] |
|
60 # As far as I can see, parser.str preserves the statement we are in, |
|
61 # so that stopatindex can be used to synchronize the string with the |
|
62 # text box indices. |
|
63 self.stopatindex = stopatindex |
|
64 self.bracketing = parser.get_last_stmt_bracketing() |
|
65 # find which pairs of bracketing are openers. These always correspond |
|
66 # to a character of rawtext. |
|
67 self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1] |
|
68 for i in range(len(self.bracketing))] |
|
69 |
|
70 self.set_index(index) |
|
71 |
|
72 def set_index(self, index): |
|
73 """Set the index to which the functions relate. Note that it must be |
|
74 in the same statement. |
|
75 """ |
|
76 indexinrawtext = \ |
|
77 len(self.rawtext) - len(self.text.get(index, self.stopatindex)) |
|
78 if indexinrawtext < 0: |
|
79 raise ValueError("The index given is before the analyzed statement") |
|
80 self.indexinrawtext = indexinrawtext |
|
81 # find the rightmost bracket to which index belongs |
|
82 self.indexbracket = 0 |
|
83 while self.indexbracket < len(self.bracketing)-1 and \ |
|
84 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: |
|
85 self.indexbracket += 1 |
|
86 if self.indexbracket < len(self.bracketing)-1 and \ |
|
87 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ |
|
88 not self.isopener[self.indexbracket+1]: |
|
89 self.indexbracket += 1 |
|
90 |
|
91 def is_in_string(self): |
|
92 """Is the index given to the HyperParser is in a string?""" |
|
93 # The bracket to which we belong should be an opener. |
|
94 # If it's an opener, it has to have a character. |
|
95 return self.isopener[self.indexbracket] and \ |
|
96 self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") |
|
97 |
|
98 def is_in_code(self): |
|
99 """Is the index given to the HyperParser is in a normal code?""" |
|
100 return not self.isopener[self.indexbracket] or \ |
|
101 self.rawtext[self.bracketing[self.indexbracket][0]] not in \ |
|
102 ('#', '"', "'") |
|
103 |
|
104 def get_surrounding_brackets(self, openers='([{', mustclose=False): |
|
105 """If the index given to the HyperParser is surrounded by a bracket |
|
106 defined in openers (or at least has one before it), return the |
|
107 indices of the opening bracket and the closing bracket (or the |
|
108 end of line, whichever comes first). |
|
109 If it is not surrounded by brackets, or the end of line comes before |
|
110 the closing bracket and mustclose is True, returns None. |
|
111 """ |
|
112 bracketinglevel = self.bracketing[self.indexbracket][1] |
|
113 before = self.indexbracket |
|
114 while not self.isopener[before] or \ |
|
115 self.rawtext[self.bracketing[before][0]] not in openers or \ |
|
116 self.bracketing[before][1] > bracketinglevel: |
|
117 before -= 1 |
|
118 if before < 0: |
|
119 return None |
|
120 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) |
|
121 after = self.indexbracket + 1 |
|
122 while after < len(self.bracketing) and \ |
|
123 self.bracketing[after][1] >= bracketinglevel: |
|
124 after += 1 |
|
125 |
|
126 beforeindex = self.text.index("%s-%dc" % |
|
127 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) |
|
128 if after >= len(self.bracketing) or \ |
|
129 self.bracketing[after][0] > len(self.rawtext): |
|
130 if mustclose: |
|
131 return None |
|
132 afterindex = self.stopatindex |
|
133 else: |
|
134 # We are after a real char, so it is a ')' and we give the index |
|
135 # before it. |
|
136 afterindex = self.text.index("%s-%dc" % |
|
137 (self.stopatindex, |
|
138 len(self.rawtext)-(self.bracketing[after][0]-1))) |
|
139 |
|
140 return beforeindex, afterindex |
|
141 |
|
142 # This string includes all chars that may be in a white space |
|
143 _whitespace_chars = " \t\n\\" |
|
144 # This string includes all chars that may be in an identifier |
|
145 _id_chars = string.ascii_letters + string.digits + "_" |
|
146 # This string includes all chars that may be the first char of an identifier |
|
147 _id_first_chars = string.ascii_letters + "_" |
|
148 |
|
149 # Given a string and pos, return the number of chars in the identifier |
|
150 # which ends at pos, or 0 if there is no such one. Saved words are not |
|
151 # identifiers. |
|
152 def _eat_identifier(self, str, limit, pos): |
|
153 i = pos |
|
154 while i > limit and str[i-1] in self._id_chars: |
|
155 i -= 1 |
|
156 if i < pos and (str[i] not in self._id_first_chars or \ |
|
157 keyword.iskeyword(str[i:pos])): |
|
158 i = pos |
|
159 return pos - i |
|
160 |
|
161 def get_expression(self): |
|
162 """Return a string with the Python expression which ends at the given |
|
163 index, which is empty if there is no real one. |
|
164 """ |
|
165 if not self.is_in_code(): |
|
166 raise ValueError("get_expression should only be called if index "\ |
|
167 "is inside a code.") |
|
168 |
|
169 rawtext = self.rawtext |
|
170 bracketing = self.bracketing |
|
171 |
|
172 brck_index = self.indexbracket |
|
173 brck_limit = bracketing[brck_index][0] |
|
174 pos = self.indexinrawtext |
|
175 |
|
176 last_identifier_pos = pos |
|
177 postdot_phase = True |
|
178 |
|
179 while 1: |
|
180 # Eat whitespaces, comments, and if postdot_phase is False - one dot |
|
181 while 1: |
|
182 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: |
|
183 # Eat a whitespace |
|
184 pos -= 1 |
|
185 elif not postdot_phase and \ |
|
186 pos > brck_limit and rawtext[pos-1] == '.': |
|
187 # Eat a dot |
|
188 pos -= 1 |
|
189 postdot_phase = True |
|
190 # The next line will fail if we are *inside* a comment, but we |
|
191 # shouldn't be. |
|
192 elif pos == brck_limit and brck_index > 0 and \ |
|
193 rawtext[bracketing[brck_index-1][0]] == '#': |
|
194 # Eat a comment |
|
195 brck_index -= 2 |
|
196 brck_limit = bracketing[brck_index][0] |
|
197 pos = bracketing[brck_index+1][0] |
|
198 else: |
|
199 # If we didn't eat anything, quit. |
|
200 break |
|
201 |
|
202 if not postdot_phase: |
|
203 # We didn't find a dot, so the expression end at the last |
|
204 # identifier pos. |
|
205 break |
|
206 |
|
207 ret = self._eat_identifier(rawtext, brck_limit, pos) |
|
208 if ret: |
|
209 # There is an identifier to eat |
|
210 pos = pos - ret |
|
211 last_identifier_pos = pos |
|
212 # Now, in order to continue the search, we must find a dot. |
|
213 postdot_phase = False |
|
214 # (the loop continues now) |
|
215 |
|
216 elif pos == brck_limit: |
|
217 # We are at a bracketing limit. If it is a closing bracket, |
|
218 # eat the bracket, otherwise, stop the search. |
|
219 level = bracketing[brck_index][1] |
|
220 while brck_index > 0 and bracketing[brck_index-1][1] > level: |
|
221 brck_index -= 1 |
|
222 if bracketing[brck_index][0] == brck_limit: |
|
223 # We were not at the end of a closing bracket |
|
224 break |
|
225 pos = bracketing[brck_index][0] |
|
226 brck_index -= 1 |
|
227 brck_limit = bracketing[brck_index][0] |
|
228 last_identifier_pos = pos |
|
229 if rawtext[pos] in "([": |
|
230 # [] and () may be used after an identifier, so we |
|
231 # continue. postdot_phase is True, so we don't allow a dot. |
|
232 pass |
|
233 else: |
|
234 # We can't continue after other types of brackets |
|
235 break |
|
236 |
|
237 else: |
|
238 # We've found an operator or something. |
|
239 break |
|
240 |
|
241 return rawtext[last_identifier_pos:self.indexinrawtext] |