|
1 // Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // |
|
15 |
|
16 #include <ctype.h> |
|
17 #include <memory.h> |
|
18 #include <string.h> |
|
19 |
|
20 #include "messagedefparser\definitiontokenizer.h" |
|
21 |
|
22 namespace Tokens |
|
23 { |
|
24 |
|
25 struct TTokenTypeText |
|
26 { |
|
27 const char* iText; |
|
28 TTokenType iType; |
|
29 }; |
|
30 |
|
31 |
|
32 static const TTokenTypeText keywords[] = |
|
33 { |
|
34 { "enum", EEnum }, |
|
35 { "message", EMessage }, |
|
36 { "struct", EStruct }, |
|
37 { "signature", ESignature }, |
|
38 { "context", EContext }, |
|
39 { "end", EEnd }, |
|
40 { "const", EConst }, |
|
41 { "include", EInclude }, |
|
42 { "alias", EAlias }, |
|
43 |
|
44 { "decimal", EDisplayDec }, |
|
45 { "hex", EDisplayHex }, |
|
46 |
|
47 { "uint8", EIntType }, |
|
48 { "uint16", EIntType }, |
|
49 { "uint32", EIntType }, |
|
50 { "int8", EIntType }, |
|
51 { "int16", EIntType }, |
|
52 { "int32", EIntType }, |
|
53 { "tmessageid", EMessageIdType }, |
|
54 { "pad", EPadType }, |
|
55 |
|
56 { "typeid", ETypeId }, |
|
57 { "messageid", EMessageId }, |
|
58 |
|
59 { NULL, EUnknown } |
|
60 }; |
|
61 |
|
62 |
|
63 static const TTokenTypeText tokenTypes[] = |
|
64 { |
|
65 { "EColon", EColon }, |
|
66 { "EEquals", EEquals }, |
|
67 { "EInclude", EInclude }, |
|
68 { "EAlias", EAlias }, |
|
69 { "EConst", EConst }, |
|
70 { "EEnd", EEnd }, |
|
71 { "EEnum", EEnum }, |
|
72 { "EStruct", EStruct }, |
|
73 { "ESignature", ESignature }, |
|
74 { "EContext", EContext }, |
|
75 { "EMessage", EMessage }, |
|
76 { "EIdentifier", EIdentifier }, |
|
77 { "ENumberDec", ENumberDec }, |
|
78 { "ENumberHex", ENumberHex }, |
|
79 { "EIntType", EIntType }, |
|
80 { "EPadType", EPadType }, |
|
81 { "EString", EString }, |
|
82 { "ETypeId", ETypeId }, |
|
83 { "EMessageId", EMessageId }, |
|
84 { "EMessageIdType", EMessageIdType }, |
|
85 { "EUnknown", EUnknown } |
|
86 }; |
|
87 |
|
88 |
|
89 const char* TokenTypeToString(Tokens::TTokenType aType) |
|
90 { |
|
91 int i = 0; |
|
92 while (tokenTypes[i].iType != EUnknown) |
|
93 { |
|
94 if (aType == tokenTypes[i].iType) |
|
95 { |
|
96 break; |
|
97 } |
|
98 ++i; |
|
99 } |
|
100 return tokenTypes[i].iText; |
|
101 } |
|
102 |
|
103 |
|
104 CDefinitionTokenizer::CDefinitionTokenizer() |
|
105 { |
|
106 this->iToken = new char[KMaxTokenSize]; |
|
107 this->iInputBuffer = new char[KBufferSize]; |
|
108 this->iTokenOffset = 0; |
|
109 this->iLastBufferOffset = 0; |
|
110 this->iBufferValid = false; |
|
111 this->iBufferOffset = 0; |
|
112 this->iLine = 1; |
|
113 } |
|
114 |
|
115 |
|
116 CDefinitionTokenizer::~CDefinitionTokenizer() |
|
117 { |
|
118 iInputFile.close(); |
|
119 delete iToken; |
|
120 delete iInputBuffer; |
|
121 } |
|
122 |
|
123 |
|
124 Tokens::TResult CDefinitionTokenizer::LoadDefinitionFile(const std::string& aFilename) |
|
125 { |
|
126 iInputFile.open(aFilename.c_str(), std::ios::in); |
|
127 iLine = 1; |
|
128 if (iInputFile.is_open()) |
|
129 { |
|
130 RefillBuffer(); |
|
131 iError = ENoError; |
|
132 } |
|
133 else |
|
134 { |
|
135 iError = EFileNotFound; |
|
136 } |
|
137 |
|
138 return iError; |
|
139 } |
|
140 |
|
141 |
|
142 Tokens::TResult CDefinitionTokenizer::GetNextToken() |
|
143 { |
|
144 iTokenOffset = 0; |
|
145 iLastBufferOffset = iBufferOffset; |
|
146 iTokenType = EUnknown; |
|
147 // TTokenState state = EStateStartToken; |
|
148 iState = EStateStartToken; |
|
149 iError = EUnexpectedToken; |
|
150 |
|
151 while (iBufferValid) |
|
152 { |
|
153 // start to collect the token |
|
154 while (iBufferOffset < iBufferSize) |
|
155 { |
|
156 if (iTokenOffset == KMaxTokenSize) |
|
157 { |
|
158 iError = ETokenTooBig; |
|
159 return ETokenTooBig; |
|
160 } |
|
161 |
|
162 iToken[iTokenOffset] = iInputBuffer[iBufferOffset]; |
|
163 if (iToken[iTokenOffset] == '\r') |
|
164 { |
|
165 ++iBufferOffset; |
|
166 continue; |
|
167 } |
|
168 |
|
169 switch (iState) |
|
170 { |
|
171 case EStateStartToken: |
|
172 iState = ProcessStateStartToken(); |
|
173 break; |
|
174 |
|
175 case EStateMaybeComment: |
|
176 iState = ProcessStateMaybeComment(); |
|
177 break; |
|
178 |
|
179 case EStateMultiLineComment: |
|
180 case EStateMaybeEndMultiLineComment: |
|
181 iState = ProcessStateMultiLineComment(); |
|
182 break; |
|
183 |
|
184 case EStateComment: |
|
185 iState = ProcessStateComment(); |
|
186 break; |
|
187 |
|
188 case EStateNumber: |
|
189 iState = ProcessStateNumber(); |
|
190 break; |
|
191 |
|
192 case EStateDecimalNumber: |
|
193 iState = ProcessStateDecimalNumber(); |
|
194 break; |
|
195 |
|
196 case EStateMaybeHexNumber: |
|
197 iState = ProcessStateMaybeHexNumber(); |
|
198 break; |
|
199 |
|
200 case EStateHexNumber: |
|
201 iState = ProcessStateHexNumber(); |
|
202 break; |
|
203 |
|
204 case EStateIdentifier: |
|
205 iState = ProcessStateIdentifier(); |
|
206 break; |
|
207 |
|
208 case EStateMaybeString: |
|
209 iState = ProcessStateMaybeString(); |
|
210 break; |
|
211 |
|
212 case EStateMaybeNegativeNumber: |
|
213 iState = ProcessStateMaybeNegativeNumber(); |
|
214 break; |
|
215 |
|
216 default: |
|
217 iState = EStateError; |
|
218 iError = EUnknownState; |
|
219 break; |
|
220 } |
|
221 |
|
222 |
|
223 // New state processing |
|
224 switch (iState) |
|
225 { |
|
226 case EStateError: |
|
227 iTokenType = EUnknown; |
|
228 iToken[++iTokenOffset] = 0; // include the invalid character in the token |
|
229 iBufferOffset++; |
|
230 return iError; |
|
231 break; |
|
232 |
|
233 case EStateStartToken: |
|
234 iTokenOffset = 0; |
|
235 iLastBufferOffset = iBufferOffset; |
|
236 iBufferOffset++; |
|
237 break; |
|
238 |
|
239 case EStateComplete: |
|
240 // TODO: clear error |
|
241 iLastBufferOffset = iBufferOffset; |
|
242 iToken[iTokenOffset] = 0; |
|
243 if (iTokenType == EIdentifier) |
|
244 { |
|
245 ExamineIdentifierForKeyword(); |
|
246 } |
|
247 iError = ETokenFound; |
|
248 return ETokenFound; |
|
249 //break; |
|
250 |
|
251 case EStateComment: |
|
252 case EStateMultiLineComment: |
|
253 case EStateMaybeEndMultiLineComment: |
|
254 iTokenOffset = 0; |
|
255 iLastBufferOffset = iBufferOffset; |
|
256 iBufferOffset++; |
|
257 break; |
|
258 |
|
259 default: |
|
260 ++iBufferOffset; |
|
261 ++iTokenOffset; |
|
262 break; |
|
263 } |
|
264 } |
|
265 |
|
266 RefillBuffer(); |
|
267 } |
|
268 |
|
269 iToken[iTokenOffset] = 0; |
|
270 if (iTokenOffset == 0) |
|
271 { |
|
272 iError = EEndOfFile; |
|
273 } |
|
274 else |
|
275 { |
|
276 if (iTokenType == EIdentifier) |
|
277 { |
|
278 ExamineIdentifierForKeyword(); |
|
279 } |
|
280 iError = ETokenFound; |
|
281 } |
|
282 |
|
283 return iError; |
|
284 } |
|
285 |
|
286 |
|
287 void CDefinitionTokenizer::ExamineIdentifierForKeyword() |
|
288 { |
|
289 int i = 0; |
|
290 while (keywords[i].iText != NULL) |
|
291 { |
|
292 if (!_strcmpi(keywords[i].iText, iToken)) |
|
293 { |
|
294 iTokenType = keywords[i].iType; |
|
295 break; |
|
296 } |
|
297 ++i; |
|
298 } |
|
299 } |
|
300 |
|
301 |
|
302 void CDefinitionTokenizer::RefillBuffer() |
|
303 { |
|
304 int bufferSpace = KBufferSize; |
|
305 int bufferInUse = 0; |
|
306 |
|
307 if (iLastBufferOffset) |
|
308 { |
|
309 bufferInUse = iBufferSize - iLastBufferOffset; |
|
310 bufferSpace = KBufferSize - bufferInUse; |
|
311 memcpy(iInputBuffer, &iInputBuffer[iLastBufferOffset], bufferInUse); |
|
312 } |
|
313 |
|
314 memset(&iInputBuffer[bufferInUse], 0, bufferSpace); |
|
315 iInputFile.read(&iInputBuffer[bufferInUse], bufferSpace); |
|
316 iBufferSize = iInputFile.gcount(); |
|
317 iBufferValid = (iBufferSize > 0); |
|
318 iBufferSize += bufferInUse; |
|
319 |
|
320 iBufferOffset = bufferInUse; |
|
321 iLastBufferOffset = 0; |
|
322 } |
|
323 |
|
324 |
|
325 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateStartToken() |
|
326 { |
|
327 TTokenState nextState = EStateError; |
|
328 iTokenType = EUnknown; |
|
329 |
|
330 switch (iToken[0]) |
|
331 { |
|
332 // single char tokens |
|
333 case '=': |
|
334 iToken[++iTokenOffset] = 0; |
|
335 ++iBufferOffset; |
|
336 nextState = EStateComplete; |
|
337 iTokenType = EEquals; |
|
338 break; |
|
339 |
|
340 case ':': |
|
341 iToken[++iTokenOffset] = 0; |
|
342 ++iBufferOffset; |
|
343 nextState = EStateComplete; |
|
344 iTokenType = EColon; |
|
345 break; |
|
346 |
|
347 case ' ': |
|
348 case '\t': |
|
349 // consume leading whitespace |
|
350 nextState = EStateStartToken; |
|
351 break; |
|
352 |
|
353 case '\n': |
|
354 ++iLine; |
|
355 nextState = EStateStartToken; |
|
356 break; |
|
357 |
|
358 case '\"': |
|
359 nextState = EStateMaybeString; |
|
360 --iTokenOffset; // don't include the quotes in the token |
|
361 break; |
|
362 |
|
363 case '/': |
|
364 nextState = EStateMaybeComment; |
|
365 break; |
|
366 |
|
367 case '-': |
|
368 nextState = EStateMaybeNegativeNumber; |
|
369 break; |
|
370 |
|
371 case '0': |
|
372 nextState = EStateNumber; |
|
373 iTokenType = ENumberDec; |
|
374 break; |
|
375 |
|
376 default: |
|
377 if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9') |
|
378 { |
|
379 nextState = EStateDecimalNumber; |
|
380 iTokenType = ENumberDec; |
|
381 } |
|
382 else if (tolower(iToken[iTokenOffset]) >= 'a'&& tolower(iToken[iTokenOffset]) <= 'z') |
|
383 { |
|
384 nextState = EStateIdentifier; |
|
385 iTokenType = EIdentifier; |
|
386 } |
|
387 break; |
|
388 } |
|
389 |
|
390 return nextState; |
|
391 } |
|
392 |
|
393 |
|
394 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeNegativeNumber() |
|
395 { |
|
396 TTokenState nextState = EStateError; |
|
397 |
|
398 switch (iToken[iTokenOffset]) |
|
399 { |
|
400 case '0': |
|
401 nextState = EStateNumber; |
|
402 iTokenType = ENumberDec; |
|
403 break; |
|
404 |
|
405 default: |
|
406 if (iToken[iTokenOffset] >= '1' && iToken[iTokenOffset] <= '9') |
|
407 { |
|
408 nextState = EStateDecimalNumber; |
|
409 iTokenType = ENumberDec; |
|
410 } |
|
411 break; |
|
412 } |
|
413 |
|
414 return nextState; |
|
415 } |
|
416 |
|
417 |
|
418 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeComment() |
|
419 { |
|
420 TTokenState nextState = EStateError; |
|
421 |
|
422 if (iToken[iTokenOffset] == '/') |
|
423 { |
|
424 nextState = EStateComment; |
|
425 } |
|
426 else if (iToken[iTokenOffset] == '*') |
|
427 { |
|
428 nextState = EStateMultiLineComment; |
|
429 } |
|
430 |
|
431 return nextState; |
|
432 } |
|
433 |
|
434 |
|
435 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateComment() |
|
436 { |
|
437 TTokenState nextState = EStateComment; |
|
438 |
|
439 if (iToken[iTokenOffset] == '\n') |
|
440 { |
|
441 --iBufferOffset; // because the behaviour of moving to EStateStartToken is |
|
442 // to progress to the next byte - we want to process the '\n' |
|
443 nextState = EStateStartToken; |
|
444 iTokenType = EUnknown; |
|
445 } |
|
446 |
|
447 return nextState; |
|
448 } |
|
449 |
|
450 |
|
451 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMultiLineComment() |
|
452 { |
|
453 TTokenState nextState = EStateMultiLineComment; |
|
454 |
|
455 if (iState == EStateMultiLineComment) |
|
456 { |
|
457 if (iToken[iTokenOffset] == '*') |
|
458 { |
|
459 // Started multi line comment |
|
460 nextState = EStateMaybeEndMultiLineComment; |
|
461 } |
|
462 } |
|
463 else if (iState == EStateMaybeEndMultiLineComment) |
|
464 { |
|
465 if (iToken[iTokenOffset] == '/') |
|
466 { |
|
467 nextState = EStateStartToken; |
|
468 iTokenType = EUnknown; |
|
469 } |
|
470 } |
|
471 |
|
472 return nextState; |
|
473 } |
|
474 |
|
475 |
|
476 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateNumber() |
|
477 { |
|
478 TTokenState nextState = EStateError; |
|
479 |
|
480 if (IsTerminalChar()) |
|
481 { |
|
482 nextState = EStateComplete; |
|
483 // iTokenType = ENumberDec; |
|
484 } |
|
485 else |
|
486 { |
|
487 if (tolower(iToken[iTokenOffset]) == 'x') |
|
488 { |
|
489 nextState = EStateMaybeHexNumber; |
|
490 iTokenType = EUnknown; |
|
491 } |
|
492 else if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9' ) |
|
493 { |
|
494 nextState = EStateDecimalNumber; |
|
495 // iTokenType = ENumberDec; |
|
496 } |
|
497 } |
|
498 |
|
499 return nextState; |
|
500 } |
|
501 |
|
502 |
|
503 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateDecimalNumber() |
|
504 { |
|
505 TTokenState nextState = EStateError; |
|
506 |
|
507 if (IsTerminalChar()) |
|
508 { |
|
509 nextState = EStateComplete; |
|
510 // iTokenType = ENumberDec; |
|
511 } |
|
512 else |
|
513 { |
|
514 if (iToken[iTokenOffset] >= '0' && iToken[iTokenOffset] <= '9') |
|
515 { |
|
516 nextState = EStateDecimalNumber; |
|
517 // iTokenType = ENumberDec; |
|
518 } |
|
519 } |
|
520 return nextState; |
|
521 } |
|
522 |
|
523 |
|
524 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeHexNumber() |
|
525 { |
|
526 TTokenState nextState = EStateError; |
|
527 |
|
528 char c = tolower(iToken[iTokenOffset]); |
|
529 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) |
|
530 { |
|
531 nextState = EStateHexNumber; |
|
532 iTokenType = ENumberHex; |
|
533 } |
|
534 |
|
535 return nextState; |
|
536 } |
|
537 |
|
538 |
|
539 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateHexNumber() |
|
540 { |
|
541 TTokenState nextState = EStateError; |
|
542 |
|
543 if (IsTerminalChar()) |
|
544 { |
|
545 nextState = EStateComplete; |
|
546 // iTokenType = ENumberHex; |
|
547 } |
|
548 else |
|
549 { |
|
550 char c = tolower(iToken[iTokenOffset]); |
|
551 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) |
|
552 { |
|
553 nextState = EStateHexNumber; |
|
554 // iTokenType = ENumberHex; |
|
555 } |
|
556 } |
|
557 return nextState; |
|
558 } |
|
559 |
|
560 |
|
561 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateIdentifier() |
|
562 { |
|
563 TTokenState nextState = EStateError; |
|
564 |
|
565 if (IsTerminalChar()) |
|
566 { |
|
567 nextState = EStateComplete; |
|
568 // iTokenType = EIdentifier; |
|
569 } |
|
570 else |
|
571 { |
|
572 char c = tolower(iToken[iTokenOffset]); |
|
573 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c == '_')) |
|
574 { |
|
575 nextState = EStateIdentifier; |
|
576 // iTokenType = EIdentifier; |
|
577 } |
|
578 } |
|
579 return nextState; |
|
580 } |
|
581 |
|
582 |
|
583 CDefinitionTokenizer::TTokenState CDefinitionTokenizer::ProcessStateMaybeString() |
|
584 { |
|
585 TTokenState nextState = EStateMaybeString; |
|
586 |
|
587 if (iToken[iTokenOffset] == '\"') |
|
588 { |
|
589 nextState = EStateComplete; |
|
590 ++iBufferOffset; // don't want to process the quote again |
|
591 iTokenType = EString; |
|
592 } |
|
593 else if (iToken[iTokenOffset] == '\n') |
|
594 { |
|
595 nextState = EStateError; |
|
596 --iTokenOffset; // don't include the \n in the bad token |
|
597 iError = EUnterminatedString; |
|
598 // iTokenType = EUnknown; |
|
599 } |
|
600 |
|
601 return nextState; |
|
602 } |
|
603 |
|
604 |
|
605 bool CDefinitionTokenizer::IsTerminalChar() |
|
606 { |
|
607 char c = iToken[iTokenOffset]; |
|
608 if (c == ' ' || c == '\t' || c == '\n' || c == ':' || c == '=' || c == '/') |
|
609 { |
|
610 return true; |
|
611 } |
|
612 return false; |
|
613 } |
|
614 |
|
615 } // namespace Tokens |
|
616 |