|
1 # |
|
2 # Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 # All rights reserved. |
|
4 # This component and the accompanying materials are made available |
|
5 # under the terms of "Eclipse Public License v1.0" |
|
6 # which accompanies this distribution, and is available |
|
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 # |
|
9 # Initial Contributors: |
|
10 # Nokia Corporation - initial contribution. |
|
11 # |
|
12 # Contributors: |
|
13 # |
|
14 # Description: File image handling functions for CDS script |
|
15 # |
|
16 |
|
17 import sys, pprint |
|
18 from array import array |
|
19 |
|
20 _DBG = False |
|
21 def dbg(str): |
|
22 if _DBG: |
|
23 print str |
|
24 |
|
25 TT_UNKNOWN = '?'; TT_COMMENT = '#'; TT_STRING = '"'; TT_CODE = '.' |
|
26 def translate_mark_for_print(mark): |
|
27 if mark == "\n": |
|
28 return "\\n" |
|
29 elif mark == "\r": |
|
30 return "\\r" |
|
31 elif mark == "\t": |
|
32 return "\\t" |
|
33 return mark |
|
34 |
|
35 |
|
36 class CCLineImage(object): |
|
37 def __init__(self, data, mask, lineno=0, startPos=-1, endPos=-1): |
|
38 self.__data = data |
|
39 self.__mask = mask |
|
40 self.lineno = lineno |
|
41 self.startPos = startPos |
|
42 self.endPos = endPos |
|
43 self.datalen = len(data) |
|
44 |
|
45 def data(self): |
|
46 return self.__data[:] |
|
47 |
|
48 def code(self, useBlanks=True): |
|
49 return self._get_line((TT_CODE,), useBlanks) |
|
50 |
|
51 def comments(self, useBlanks=True): |
|
52 return self._get_line((TT_COMMENT,), useBlanks) |
|
53 |
|
54 def strings(self, useBlanks=True): |
|
55 return self._get_line((TT_STRING,), useBlanks) |
|
56 |
|
57 def code_and_strings(self, useBlanks=True): |
|
58 return self._get_line((TT_STRING, TT_CODE), useBlanks) |
|
59 |
|
60 def text(self): |
|
61 return self._get_line((TT_STRING, TT_CODE, TT_COMMENT), False) |
|
62 |
|
63 def mask(self): |
|
64 return self.__mask.tostring() |
|
65 |
|
66 def _get_line(self, typez, useBlanks): |
|
67 line = array('c') |
|
68 for i in xrange(self.datalen): |
|
69 if self.__data[i] == '\n': |
|
70 break |
|
71 elif self.__mask[i] in typez: |
|
72 line.append(self.__data[i]) |
|
73 elif useBlanks: |
|
74 line.append(' ') |
|
75 return line.tostring() |
|
76 |
|
77 |
|
78 class CCTextBlockImage(object): |
|
79 delimitPairs = (('(', ')'), ('{', '}'), ('[', ']'), ('<', '>')) |
|
80 delimitChars = [e for s, e in delimitPairs] + [s for s, e in delimitPairs] |
|
81 endDelimiters = [e for s, e in delimitPairs] |
|
82 startDelimiters = [s for s, e in delimitPairs] |
|
83 |
|
84 def __init__(self): |
|
85 self.data = array('c') |
|
86 self.datamask = array('c') |
|
87 |
|
88 self.chPrev = None |
|
89 self.ch = None |
|
90 self.chNext = None |
|
91 self.pos = 0 |
|
92 self.continueScan = True |
|
93 self.dataLen = 0 |
|
94 self.lineFeeds = [] |
|
95 self.lineFeedLen = 0 |
|
96 |
|
97 def load(self, textblock): |
|
98 import sys |
|
99 del self.data |
|
100 self.data = array('c', textblock) |
|
101 self.datamask = array('c', TT_UNKNOWN * len(self.data)) |
|
102 self.dataLen = len(textblock) |
|
103 |
|
104 def _dbg_chars(self): |
|
105 if _DBG: |
|
106 dbg("pos=%d, len=%d, prev=%s, cur=%s, next=%s" % (self.pos, self.dataLen, |
|
107 translate_mark_for_print(self.chPrev), |
|
108 translate_mark_for_print(self.ch), |
|
109 translate_mark_for_print(self.chNext))) |
|
110 |
|
111 # moves current data pointer and marks passed chars as charType |
|
112 def _seek(self, amount, prevType=None, curType=None, nextType=None): |
|
113 |
|
114 if not self.dataLen > 0: |
|
115 return |
|
116 if prevType: |
|
117 self.datamask[self.pos-1] = prevType |
|
118 if curType: |
|
119 self.datamask[self.pos] = curType |
|
120 if nextType: |
|
121 self.datamask[self.pos+1] = nextType |
|
122 |
|
123 oldPos = self.pos |
|
124 self.pos += amount |
|
125 assert (oldPos <= self.pos) and (amount <= 2) |
|
126 |
|
127 if self.pos <= 0: |
|
128 self.pos = 0; self.chPrev = None; self.ch = self.data[0] |
|
129 if self.dataLen > 1: |
|
130 self.chNext = self.data[1] |
|
131 |
|
132 self._dbg_chars() |
|
133 elif self.pos == self.dataLen - 1: |
|
134 self.chNext = None; self.ch = self.data[-1]; self.chPrev = self.data[-2] |
|
135 self._dbg_chars() |
|
136 elif self.pos > self.dataLen - 1: |
|
137 self.pos = self.dataLen - 1 |
|
138 dbg("FINISHING SCAN") |
|
139 self.continueScan = False |
|
140 else: |
|
141 self.ch = self.data[self.pos]; self.chNext = self.data[self.pos+1]; self.chPrev = self.data[self.pos-1] |
|
142 self._dbg_chars() |
|
143 |
|
144 |
|
145 |
|
146 def scan(self, bytePrintFmtStr='', bytePrintTreshold=20000): |
|
147 if self.dataLen == 0: |
|
148 return |
|
149 |
|
150 SINGLELINE = 1; MULTILINE = 2; STRING = 3; CODE = 4 |
|
151 self.pos = 0; self.continueScan = True |
|
152 |
|
153 state = CODE |
|
154 self._seek(0) |
|
155 while self.continueScan: |
|
156 if (self.pos > 1) and (self.pos % bytePrintTreshold == 0) and bytePrintFmtStr: |
|
157 print bytePrintFmtStr % (self.pos, self.dataLen, ((self.pos * 100) / self.dataLen)) |
|
158 if state == CODE: |
|
159 if (self.ch == '/') and (self.chNext == '/'): |
|
160 dbg("FOUND // -> ENTER SINGLELINE STATE") |
|
161 self._seek(2, curType=TT_COMMENT, nextType=TT_COMMENT) |
|
162 state = SINGLELINE |
|
163 elif (self.ch == '/') and (self.chNext == '*'): |
|
164 dbg("FOUND /* -> ENTER MULTILINE STATE") |
|
165 self._seek(2, curType=TT_COMMENT, nextType=TT_COMMENT) |
|
166 state = MULTILINE |
|
167 elif (self.ch == '"'): |
|
168 dbg('FOUND " --> ENTER STRING STATE') |
|
169 self._seek(1, curType=TT_STRING) |
|
170 state = STRING |
|
171 else: |
|
172 if (self.ch == '\n'): |
|
173 self.lineFeeds.append(self.pos) |
|
174 self._seek(1, curType=TT_CODE) |
|
175 elif state == STRING: |
|
176 if (self.ch == '\n'): |
|
177 if (self.chPrev != '\\'): |
|
178 dbg("FOUND line-end on string --> ENTER CODE STATE") |
|
179 state = CODE |
|
180 self.lineFeeds.append(self.pos) |
|
181 self._seek(1, curType=TT_STRING) |
|
182 elif (self.ch == '"'): |
|
183 dbg('FOUND " on string --> ENTER CODE STATE') |
|
184 self._seek(1, curType=TT_STRING) |
|
185 state = CODE |
|
186 else: |
|
187 self._seek(1, curType=TT_STRING) |
|
188 elif state == MULTILINE: |
|
189 if (self.ch == '*') and (self.chNext == '/'): |
|
190 dbg("FOUND multiline end --> ENTER CODE STATE") |
|
191 self._seek(2, curType=TT_COMMENT, nextType=TT_COMMENT) |
|
192 state = CODE |
|
193 else: |
|
194 if (self.ch == '\n'): |
|
195 self.lineFeeds.append(self.pos) |
|
196 self._seek(1, curType=TT_COMMENT) |
|
197 elif state == SINGLELINE: |
|
198 if (self.ch == '\n'): |
|
199 dbg("FOUND line-end on singleline comment --> ENTER CODE STATE") |
|
200 self.lineFeeds.append(self.pos) |
|
201 self._seek(1, curType=TT_COMMENT) |
|
202 state = CODE |
|
203 else: |
|
204 self._seek(1, curType=TT_COMMENT) |
|
205 |
|
206 self.lineFeedLen = len(self.lineFeeds) |
|
207 |
|
208 |
|
209 def __str__(self): |
|
210 rawdata = array('c') |
|
211 for mark in self.data: |
|
212 rawdata.append(translate_mark_for_print(mark)) |
|
213 return rawdata.tostring() |
|
214 |
|
215 def iter_lineimages(self, startPos=-1, forward=True, lineCount=-1): |
|
216 curline = array('c') |
|
217 curmask = array('c') |
|
218 pos = 0 |
|
219 linesLeft = 9999999 |
|
220 datalen = len(self.data) |
|
221 |
|
222 if startPos != -1: |
|
223 pos = startPos |
|
224 assert (pos >= 0) and (pos < datalen) |
|
225 if lineCount != -1: |
|
226 assert lineCount > 0 |
|
227 linesLeft = self.get_lineno(pos) |
|
228 if forward: |
|
229 linesLeft = self.get_linecount() - self.get_lineno(pos) |
|
230 if lineCount < linesLeft: |
|
231 linesLeft = lineCount |
|
232 #print "%d - %d" % (self.get_linecount(), self.get_lineno(pos)) |
|
233 assert linesLeft >= 0 |
|
234 |
|
235 lineStartPos = 0 |
|
236 lineNo = 1 |
|
237 while (pos >= 0) and (pos < datalen) and (linesLeft > 0): |
|
238 mark = self.data[pos] |
|
239 maskmark = self.datamask[pos] |
|
240 if mark == '\n': |
|
241 yield CCLineImage(curline, curmask, lineNo, lineStartPos, pos) |
|
242 linesLeft -= 1 |
|
243 curline = array('c') |
|
244 curmask = array('c') |
|
245 lineStartPos = pos + 1 |
|
246 lineNo += 1 |
|
247 else: |
|
248 curline.append(mark) |
|
249 curmask.append(maskmark) |
|
250 if forward: |
|
251 pos += 1 |
|
252 else: |
|
253 pos -= 1 |
|
254 if len(curline) > 0: |
|
255 yield CCLineImage(curline, curmask, lineNo, lineStartPos, pos) |
|
256 |
|
257 def get_line_startpos(self, pos): |
|
258 if pos > (self.dataLen - 1): |
|
259 return -1 |
|
260 if self.lineFeedLen == 0: |
|
261 return 0 |
|
262 for i in xrange(lineFeedLen): |
|
263 if self.lineFeeds[i] > pos: |
|
264 if i > 0: |
|
265 return self.lineFeeds[i-1] + 1 |
|
266 return 0 |
|
267 return pos - self.lineFeeds[-1] |
|
268 |
|
269 def get_linecount(self): |
|
270 return len(self.lineFeeds) + 1 |
|
271 |
|
272 def get_lineno(self, pos): |
|
273 if pos > (self.dataLen - 1): |
|
274 return -1 |
|
275 lineFeedLen = len(self.lineFeeds) |
|
276 if lineFeedLen == 0: |
|
277 return 1 |
|
278 for i in xrange(lineFeedLen): |
|
279 if self.lineFeeds[i] > pos: |
|
280 #print "if %d > %d" % (self.lineFeeds[i], pos) |
|
281 return i |
|
282 return lineFeedLen |
|
283 |
|
284 def get_colno(self, pos): |
|
285 if pos > (len(self.data) - 1): |
|
286 return -1 |
|
287 if len(self.lineFeeds) == 0: |
|
288 return pos |
|
289 for i in xrange(len(self.lineFeeds)): |
|
290 if self.lineFeeds[i] < pos: |
|
291 return pos - self.lineFeeds[i] |
|
292 |
|
293 def get_lineimages(self, startPos=-1, endPos=-1): |
|
294 lineImgs = [] |
|
295 curline = array('c') |
|
296 curmask = array('c') |
|
297 lastInd = len(self.data) - 1 |
|
298 if startPos < 0: startPos = 0 |
|
299 if startPos > lastInd: startPos = lastInd |
|
300 if endPos < 0: endPos = 0 |
|
301 if endPos > lastInd: endPos = lastInd |
|
302 pos = startPos |
|
303 while pos <= endPos: |
|
304 if self.data[pos] == '\n': |
|
305 lineImgs.append(CCLineImage(curline, curmask, self.get_lineno(pos), self.get_line_startpos(pos), pos)) |
|
306 curline = array('c') |
|
307 curmask = array('c') |
|
308 else: |
|
309 curline.append(self.data[pos]) |
|
310 curmask.append(self.datamask[pos]) |
|
311 pos += 1 |
|
312 if len(curline) > 0: |
|
313 lineImgs.append(CCLineImage(curline, curmask, self.get_lineno(pos), self.get_line_startpos(pos), pos)) |
|
314 |
|
315 return lineImgs |
|
316 |
|
317 def find_matching_delimiter(self, pos): |
|
318 if (len(self.data) < pos) or (pos < 0): |
|
319 return -1 |
|
320 |
|
321 data = self.data |
|
322 startChar = data[pos] |
|
323 if startChar not in self.delimitChars: |
|
324 return -1 |
|
325 |
|
326 forward = True |
|
327 if startChar in self.endDelimiters: |
|
328 forward = False |
|
329 |
|
330 endChar = '' |
|
331 for s, e in self.delimitPairs: |
|
332 if startChar == s: |
|
333 endChar = e |
|
334 elif startChar == e: |
|
335 endChar = s |
|
336 |
|
337 curStackPos = 0 |
|
338 datalen = len(data) |
|
339 while pos < datalen: |
|
340 if data[pos] == startChar: |
|
341 curStackPos += 1 |
|
342 elif data[pos] == endChar: |
|
343 curStackPos -= 1 |
|
344 if curStackPos <= 0: |
|
345 return pos |
|
346 if forward: |
|
347 pos += 1 |
|
348 else: |
|
349 pos -= 1 |
|
350 return -1 |
|
351 |
|
352 def get_surrounding_lineimages(self, linesFw, linesBw, pos): |
|
353 if (len(self.data) < pos) or (pos < 0) or (linesFw < 0) or (linesBw < 0): |
|
354 return [] |
|
355 lineImgs = [] |
|
356 if linesBw and (pos > 0): |
|
357 pos -= 1 |
|
358 while linesBw > 0: |
|
359 if self.data[pos] == '\n': |
|
360 linesBw -= 1; linesFw += 1 |
|
361 if pos < 0: |
|
362 break |
|
363 pos -= 1 |
|
364 |
|
365 for lineImg in self.iter_lineimages(pos, forward=True, lineCount=linesFw): |
|
366 lineImgs.append(lineImg) |
|
367 return lineImgs |
|
368 |
|
369 class CCFileImage(CCTextBlockImage): |
|
370 def __init__(self): |
|
371 super(CCFileImage, self).__init__() |
|
372 |
|
373 def load(self, filename): |
|
374 rawdata = open(filename, 'r').read() |
|
375 self.load_from_rawdata(rawdata) |
|
376 |
|
377 def load_from_rawdata(self, rawdata): |
|
378 super(CCFileImage, self).load(rawdata) |
|
379 |
|
380 if __name__=="__main__": |
|
381 f = CCFileImage() |
|
382 f.load(sys.argv[1]) |
|
383 f.scan() |
|
384 for img in f.iter_lineimages(): |
|
385 print img.mask() |
|
386 print img.text() |
|
387 #print img.code() |
|
388 #print img.comments() |
|
389 #print img.code_and_strings() |
|
390 #print img.strings() |
|
391 print '' |