|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2001-2004 Roberto Raggi |
|
4 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
5 ** All rights reserved. |
|
6 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
7 ** |
|
8 ** This file is part of the qt3to4 porting application of the Qt Toolkit. |
|
9 ** |
|
10 ** $QT_BEGIN_LICENSE:LGPL$ |
|
11 ** No Commercial Usage |
|
12 ** This file contains pre-release code and may not be distributed. |
|
13 ** You may use this file in accordance with the terms and conditions |
|
14 ** contained in the Technology Preview License Agreement accompanying |
|
15 ** this package. |
|
16 ** |
|
17 ** GNU Lesser General Public License Usage |
|
18 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
19 ** General Public License version 2.1 as published by the Free Software |
|
20 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
21 ** packaging of this file. Please review the following information to |
|
22 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
24 ** |
|
25 ** In addition, as a special exception, Nokia gives you certain additional |
|
26 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
28 ** |
|
29 ** If you have questions regarding the use of this file, please contact |
|
30 ** Nokia at qt-info@nokia.com. |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** |
|
39 ** $QT_END_LICENSE$ |
|
40 ** |
|
41 ****************************************************************************/ |
|
42 |
|
43 #include "rpplexer.h" |
|
44 #include <QChar> |
|
45 #include <ctype.h> |
|
46 |
|
47 QT_BEGIN_NAMESPACE |
|
48 |
|
49 using namespace TokenEngine; |
|
50 |
|
51 namespace Rpp { |
|
52 RppLexer::RppLexer() |
|
53 : m_buffer(0), m_ptr(0), m_len(0) |
|
54 { |
|
55 setupScanTable(); |
|
56 } |
|
57 |
|
58 void RppLexer::setupScanTable() |
|
59 { |
|
60 memset(s_attr_table, 0, 256); |
|
61 |
|
62 for (int i=0; i<128; ++i) { |
|
63 switch (i) { |
|
64 case ':': |
|
65 case '*': |
|
66 case '%': |
|
67 case '^': |
|
68 case '=': |
|
69 case '!': |
|
70 case '&': |
|
71 case '|': |
|
72 case '+': |
|
73 case '<': |
|
74 case '>': |
|
75 case '-': |
|
76 case '.': |
|
77 s_scan_table[i] = &RppLexer::scanOperator; |
|
78 break; |
|
79 |
|
80 case '\r': |
|
81 case '\n': |
|
82 s_scan_table[i] = &RppLexer::scanNewline; |
|
83 break; |
|
84 |
|
85 case '\'': |
|
86 s_scan_table[i] = &RppLexer::scanCharLiteral; |
|
87 break; |
|
88 |
|
89 case '"': |
|
90 s_scan_table[i] = &RppLexer::scanStringLiteral; |
|
91 break; |
|
92 case '#': |
|
93 s_scan_table[i] = &RppLexer::scanPreprocessor; |
|
94 break; |
|
95 |
|
96 case '/': |
|
97 s_scan_table[i] = &RppLexer::scanComment; |
|
98 break; |
|
99 |
|
100 default: |
|
101 if (isspace(i)) { |
|
102 s_scan_table[i] = &RppLexer::scanWhiteSpaces; |
|
103 s_attr_table[i] |= A_Whitespace; |
|
104 } else if (isalpha(i) || i == '_') { |
|
105 s_scan_table[i] = &RppLexer::scanKeyword; |
|
106 s_attr_table[i] |= A_Alpha; |
|
107 } else if (isdigit(i)) { |
|
108 s_scan_table[i] = &RppLexer::scanNumberLiteral; |
|
109 s_attr_table[i] |= A_Digit; |
|
110 } else |
|
111 s_scan_table[i] = &RppLexer::scanChar; |
|
112 } |
|
113 } |
|
114 |
|
115 s_scan_table[128] = &RppLexer::scanUnicodeChar; |
|
116 } |
|
117 |
|
118 QVector<Type> RppLexer::lex(const TokenContainer &tokenContainer) |
|
119 { |
|
120 QVector<Type> tokenTypes; |
|
121 const int numTokens = tokenContainer.count(); |
|
122 tokenTypes.reserve(numTokens); |
|
123 QByteArray text = tokenContainer.fullText(); |
|
124 m_buffer = text.constData(); |
|
125 for(int t=0; t<numTokens; ++t) { |
|
126 TokenEngine::Token token = tokenContainer.token(t); |
|
127 tokenTypes.append(indentify(token.start, token.length)); |
|
128 } |
|
129 return tokenTypes; |
|
130 } |
|
131 |
|
132 Type RppLexer::indentify(int pos, int length) |
|
133 { |
|
134 Q_ASSERT(length > 0); |
|
135 m_ptr = pos; |
|
136 m_len = length; |
|
137 int kind = 0; |
|
138 const unsigned char ch = m_buffer[pos]; |
|
139 (this->*s_scan_table[ch < 128 ? ch : 128])(&kind); |
|
140 return (Type)kind; |
|
141 } |
|
142 |
|
143 void RppLexer::scanChar(int *kind) |
|
144 { |
|
145 *kind = m_buffer[m_ptr]; |
|
146 } |
|
147 |
|
148 void RppLexer::scanWhiteSpaces(int *kind) |
|
149 { |
|
150 *kind = Token_whitespaces; |
|
151 |
|
152 while (unsigned char ch = m_buffer[m_ptr]) { |
|
153 if (s_attr_table[ch] & A_Whitespace) |
|
154 ++m_ptr; |
|
155 else |
|
156 break; |
|
157 } |
|
158 } |
|
159 |
|
160 void RppLexer::scanNewline(int *kind) |
|
161 { |
|
162 *kind = '\n'; |
|
163 } |
|
164 |
|
165 void RppLexer::scanUnicodeChar(int *kind) |
|
166 { |
|
167 *kind = m_buffer[m_ptr]; |
|
168 } |
|
169 |
|
170 void RppLexer::scanCharLiteral(int *kind) |
|
171 { |
|
172 *kind = Token_char_literal; |
|
173 } |
|
174 |
|
175 void RppLexer::scanStringLiteral(int *kind) |
|
176 { |
|
177 *kind = Token_string_literal; |
|
178 } |
|
179 |
|
180 void RppLexer::scanIdentifier(int *kind) |
|
181 { |
|
182 *kind = Token_identifier; |
|
183 } |
|
184 |
|
185 void RppLexer::scanNumberLiteral(int *kind) |
|
186 { |
|
187 *kind = Token_number_literal; |
|
188 } |
|
189 |
|
190 void RppLexer::scanPreprocessor(int *kind) |
|
191 { |
|
192 *kind = Token_preproc; |
|
193 } |
|
194 |
|
195 void RppLexer::scanComment(int *kind) |
|
196 { |
|
197 switch(m_buffer[m_ptr + 1]) { |
|
198 case '/': |
|
199 *kind = Token_line_comment; |
|
200 break; |
|
201 case '*': |
|
202 *kind = Token_multiline_comment; |
|
203 break; |
|
204 default: |
|
205 scanOperator(kind); |
|
206 } |
|
207 } |
|
208 |
|
209 void RppLexer::scanOperator(int *kind) |
|
210 { |
|
211 switch (m_buffer[m_ptr]) { |
|
212 case ':': |
|
213 if (m_buffer[m_ptr+1] == ':') { |
|
214 *kind = Token_scope; |
|
215 return; |
|
216 } |
|
217 break; |
|
218 |
|
219 case '*': |
|
220 case '/': |
|
221 case '%': |
|
222 case '^': |
|
223 if (m_buffer[m_ptr+1] == '=') { |
|
224 *kind = Token_assign; |
|
225 return; |
|
226 } |
|
227 break; |
|
228 |
|
229 case '=': |
|
230 if (m_buffer[m_ptr+1] == '=') { |
|
231 *kind = Token_eq; |
|
232 return; |
|
233 } |
|
234 break; |
|
235 case '!': |
|
236 if (m_buffer[m_ptr+1] == '=') { |
|
237 *kind = Token_not_eq; |
|
238 return; |
|
239 } |
|
240 break; |
|
241 |
|
242 case '&': |
|
243 if (m_buffer[m_ptr+1] == '&') { |
|
244 *kind = Token_and; |
|
245 return; |
|
246 } else if (m_buffer[m_ptr+1] == '=') { |
|
247 *kind = Token_assign; |
|
248 return; |
|
249 } |
|
250 break; |
|
251 |
|
252 case '|': |
|
253 if (m_buffer[m_ptr+1] == '|' ) { |
|
254 *kind = Token_or; |
|
255 return; |
|
256 } else if (m_buffer[m_ptr+1] == '=') { |
|
257 *kind = Token_assign; |
|
258 return; |
|
259 } |
|
260 break; |
|
261 |
|
262 case '+': |
|
263 if (m_buffer[m_ptr+1] == '+' ) { |
|
264 *kind = Token_incr; |
|
265 return; |
|
266 } else if (m_buffer[m_ptr+1] == '=') { |
|
267 *kind = Token_assign; |
|
268 return; |
|
269 } |
|
270 break; |
|
271 |
|
272 case '<': |
|
273 if (m_buffer[m_ptr+1] == '<') { |
|
274 if (m_buffer[m_ptr+2] == '=') { |
|
275 *kind = Token_assign; |
|
276 return; |
|
277 } |
|
278 *kind = Token_left_shift; |
|
279 return; |
|
280 } else if (m_buffer[m_ptr+1] == '=') { |
|
281 *kind = Token_leq; |
|
282 return; |
|
283 } |
|
284 break; |
|
285 |
|
286 case '>': |
|
287 if (m_buffer[m_ptr+1] == '>') { |
|
288 if (m_buffer[m_ptr+2] == '=') { |
|
289 *kind = Token_assign; |
|
290 return; |
|
291 } |
|
292 *kind = Token_right_shift; |
|
293 return; |
|
294 } else if (m_buffer[m_ptr+1] == '=') { |
|
295 *kind = Token_geq; |
|
296 return; |
|
297 } |
|
298 break; |
|
299 |
|
300 case '-': |
|
301 if (m_buffer[m_ptr+1] == '>') { |
|
302 if (m_buffer[m_ptr+2] == '*') { |
|
303 *kind = Token_ptrmem; |
|
304 return; |
|
305 } |
|
306 *kind = Token_arrow; |
|
307 return; |
|
308 } else if (m_buffer[m_ptr+1] == '-') { |
|
309 *kind = Token_decr; |
|
310 return; |
|
311 } else if (m_buffer[m_ptr+1] == '=') { |
|
312 *kind = Token_assign; |
|
313 return; |
|
314 } |
|
315 break; |
|
316 |
|
317 case '.': |
|
318 if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') { |
|
319 *kind = Token_ellipsis; |
|
320 return; |
|
321 } else if (m_buffer[m_ptr+1] == '*') { |
|
322 *kind = Token_ptrmem; |
|
323 return; |
|
324 } |
|
325 break; |
|
326 |
|
327 } |
|
328 |
|
329 *kind = m_buffer[m_ptr++]; |
|
330 } |
|
331 |
|
332 bool RppLexer::match(const char *buf, int len) |
|
333 { |
|
334 if (m_len != len) |
|
335 return false; |
|
336 for (int i = 0; i < len; ++i) { |
|
337 if(m_buffer[m_ptr + i] != buf[i]) |
|
338 return false; |
|
339 } |
|
340 return true; |
|
341 } |
|
342 |
|
343 void RppLexer::scanKeyword(int *kind) |
|
344 { |
|
345 if(match("if", 2)) |
|
346 *kind = Token_directive_if; |
|
347 else if(match("elif", 4)) |
|
348 *kind = Token_directive_elif; |
|
349 else if(match("else", 4)) |
|
350 *kind = Token_directive_else; |
|
351 else if(match("line", 4)) |
|
352 *kind = Token_directive_line; |
|
353 else if(match("else", 4)) |
|
354 *kind = Token_directive_else; |
|
355 else if(match("line", 4)) |
|
356 *kind = Token_directive_line; |
|
357 else if(match("endif", 5)) |
|
358 *kind = Token_directive_endif; |
|
359 else if(match("ifdef", 5)) |
|
360 *kind = Token_directive_ifdef; |
|
361 else if(match("error", 5)) |
|
362 *kind = Token_directive_error; |
|
363 else if(match("undef", 5)) |
|
364 *kind = Token_directive_undef; |
|
365 else if(match("pragma", 6)) |
|
366 *kind = Token_directive_pragma; |
|
367 else if(match("ifndef", 6)) |
|
368 *kind = Token_directive_ifndef; |
|
369 else if(match("define", 6)) |
|
370 *kind = Token_directive_define; |
|
371 else if(match("include", 7)) |
|
372 *kind = Token_directive_include; |
|
373 else if(match("defined", 7)) |
|
374 *kind = Token_defined; |
|
375 else |
|
376 *kind = Token_identifier; |
|
377 } |
|
378 |
|
379 } //namespace Rpp |
|
380 |
|
381 QT_END_NAMESPACE |