|
1 /* |
|
2 * Copyright (c) 2004-2008 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 using System; |
|
19 using System.Text; |
|
20 using System.Threading; |
|
21 using System.Collections; |
|
22 using SymBuildParsingLib.Token; |
|
23 using SymBuildParsingLib.Lexer; |
|
24 |
|
25 namespace SymBuildParsingLib.Grouper |
|
26 { |
|
27 public class SymGrouperMastermind |
|
28 { |
|
29 #region Enumerations |
|
30 public enum TEvent |
|
31 { |
|
32 EEventGroupTokenReady = 0 |
|
33 }; |
|
34 #endregion |
|
35 |
|
36 #region Observer interface |
|
37 public delegate void MastermindObserver( TEvent aEvent, SymToken aGroupedToken ); |
|
38 #endregion |
|
39 |
|
40 #region Events |
|
41 public event MastermindObserver MastermindObservers; |
|
42 #endregion |
|
43 |
|
44 #region Constructors & destructor |
|
45 public SymGrouperMastermind() |
|
46 { |
|
47 } |
|
48 #endregion |
|
49 |
|
50 #region Internal enumerations |
|
51 private enum TGroupingAction |
|
52 { |
|
53 ETokenIgnore = -1, |
|
54 ETokenMerge = 0, |
|
55 ETokenEnqueue, |
|
56 ETokenFlushQueue |
|
57 }; |
|
58 |
|
59 [Flags] |
|
60 private enum TStateFlag |
|
61 { |
|
62 EStateFlagUnspecified = 0, |
|
63 EStateFlagInQuotation = 1, |
|
64 EStateFlagInComment = 2, |
|
65 EStateFlagInPreProcessorDirective = 4 |
|
66 }; |
|
67 #endregion |
|
68 |
|
69 #region API |
|
70 public void PerformGrouping() |
|
71 { |
|
72 SymToken token = NextInputToken(); |
|
73 // |
|
74 while( token != null ) |
|
75 { |
|
76 ProcessToken( token ); |
|
77 token = NextInputToken(); |
|
78 } |
|
79 } |
|
80 |
|
81 public void EnqueueLexedToken( SymToken aToken ) |
|
82 { |
|
83 lock( iLexedTokens ) |
|
84 { |
|
85 iLexedTokens.Enqueue( aToken ); |
|
86 } |
|
87 } |
|
88 #endregion |
|
89 |
|
90 #region Internal token processors |
|
91 private void ProcessToken( SymToken aToken ) |
|
92 { |
|
93 aToken.RefineTokenClass(); |
|
94 aToken.RefineTokenType(); |
|
95 |
|
96 if ( InQuotation ) |
|
97 { |
|
98 ProcessTokenDuringQuotation( aToken ); |
|
99 } |
|
100 else if ( InComment ) |
|
101 { |
|
102 ProcessTokenDuringComment( aToken ); |
|
103 } |
|
104 else if ( InPreProcessorDirective ) |
|
105 { |
|
106 ProcessTokenDuringPreProcessorDirective( aToken ); |
|
107 } |
|
108 else |
|
109 { |
|
110 ProcessTokenDuringNormalOperations( aToken ); |
|
111 } |
|
112 } |
|
113 |
|
114 private void ProcessTokenDuringNormalOperations( SymToken aToken ) |
|
115 { |
|
116 // By default we will just add the input token to the |
|
117 // pending queue (i.e. no combining/grouping) |
|
118 TGroupingAction action = TGroupingAction.ETokenEnqueue; |
|
119 // |
|
120 if ( iCache.Count == 0 ) |
|
121 { |
|
122 #region The cache is empty - enqueue the token. |
|
123 // Starting a new token batch, so just push the token. If |
|
124 // its a quotation, then it will be handled during |
|
125 // the enqueuing. Pragma symbols must appear as the first |
|
126 // item on a line, and will be picked up similarly to quotes. |
|
127 if ( aToken.Class == SymToken.TClass.EClassNewLine ) |
|
128 { |
|
129 // If we're adding a new blank line as the first |
|
130 // token, we just want to flush it out immediately. |
|
131 EnqueueNewOutputToken( aToken ); |
|
132 action = TGroupingAction.ETokenFlushQueue; |
|
133 } |
|
134 else |
|
135 { |
|
136 action = TGroupingAction.ETokenEnqueue; |
|
137 } |
|
138 #endregion |
|
139 } |
|
140 else |
|
141 { |
|
142 #region The cache already has some tokens... |
|
143 SymToken previousToken = PreviousOutputToken; |
|
144 SymToken.TClass previousTokenClass = previousToken.Class; |
|
145 // |
|
146 if ( aToken.Class == SymToken.TClass.EClassNewLine ) |
|
147 { |
|
148 #region New line detected... |
|
149 |
|
150 // Checking for continuations... |
|
151 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
152 { |
|
153 // Because of the continuation character, we don't |
|
154 // flush the cache. |
|
155 |
|
156 // Discard new line |
|
157 previousToken.Class = SymToken.TClass.EClassContinuation; |
|
158 action = TGroupingAction.ETokenIgnore; |
|
159 } |
|
160 else |
|
161 { |
|
162 // We never allow new lines to be combined. In fact, |
|
163 // they are the signal that we should flush whatever we have |
|
164 // cached so far. We must add the new line token |
|
165 // first though. |
|
166 EnqueueNewOutputToken( aToken ); |
|
167 action = TGroupingAction.ETokenFlushQueue; |
|
168 } |
|
169 #endregion |
|
170 } |
|
171 else if ( previousTokenClass == aToken.Class ) |
|
172 { |
|
173 #region Tokens are the same class - check for combining |
|
174 // We group almost all tokens, but some are not permitted |
|
175 // to be combined, for example, brackets. |
|
176 bool combiningAllowed = previousToken.CombiningAllowed; |
|
177 if ( combiningAllowed && aToken.CombiningAllowed ) |
|
178 { |
|
179 // Merge the two tokens |
|
180 action = TGroupingAction.ETokenMerge; |
|
181 } |
|
182 else |
|
183 { |
|
184 // Treat it as a separate token. |
|
185 action = TGroupingAction.ETokenEnqueue; |
|
186 } |
|
187 #endregion |
|
188 } |
|
189 else |
|
190 { |
|
191 #region Handling some other type of token... |
|
192 if ( previousTokenClass == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
193 { |
|
194 // If the last token was a single escaped character, and this next |
|
195 // character is not an asterisk or another back slash, then |
|
196 // we can try to combine the two. |
|
197 if ( !(aToken.Value == "*" || aToken.Value == @"\" ) ) |
|
198 { |
|
199 action = TGroupingAction.ETokenMerge; |
|
200 } |
|
201 else |
|
202 { |
|
203 System.Diagnostics.Debug.Assert( false ); |
|
204 } |
|
205 } |
|
206 else |
|
207 { |
|
208 action = TGroupingAction.ETokenEnqueue; |
|
209 } |
|
210 #endregion |
|
211 } |
|
212 #endregion |
|
213 } |
|
214 |
|
215 #region Now perform the action |
|
216 switch( action ) |
|
217 { |
|
218 case TGroupingAction.ETokenEnqueue: |
|
219 EnqueueNewOutputToken( aToken ); |
|
220 break; |
|
221 case TGroupingAction.ETokenMerge: |
|
222 MergeWithPreviousToken( aToken ); |
|
223 break; |
|
224 case TGroupingAction.ETokenFlushQueue: |
|
225 FlushCache(); |
|
226 break; |
|
227 default: |
|
228 case TGroupingAction.ETokenIgnore: |
|
229 break; |
|
230 } |
|
231 #endregion |
|
232 } |
|
233 |
|
234 private void ProcessTokenDuringQuotation( SymToken aToken ) |
|
235 { |
|
236 // System.Diagnostics.Debug.Write( "[" + aToken.Value + "] " ); |
|
237 System.Diagnostics.Debug.Assert( iCache.Count > 0 ); |
|
238 |
|
239 #region Quotation examples |
|
240 // 1) "" |
|
241 // 2) "\"" |
|
242 // 3) "\"\"" |
|
243 // 4) '' |
|
244 // 5) '\'' |
|
245 // 6) '\'\'' |
|
246 // 7) "\'\'\'\"\"" |
|
247 // 8) "abc def ghi" |
|
248 // |
|
249 // 9) #define WIBBLE " this is a test string \ |
|
250 // This too" " - and this!" |
|
251 // |
|
252 // 10) #define WIBBLE2 " this is a test string \\ abc \ |
|
253 // This too" " - and this!" |
|
254 // |
|
255 // 11) #pragma message("Quotation with brackets (;') and other \'nasty\' things! inside it__\\"); |
|
256 // |
|
257 #endregion |
|
258 |
|
259 if ( aToken.Class == SymToken.TClass.EClassQuotation ) |
|
260 { |
|
261 #region Token is a quotation ... |
|
262 // Quotation symbol whilst already in a quotation. |
|
263 // We should check whether we have reached |
|
264 // the closing quotation symbol, or then whether |
|
265 // this is possibly just an escaped character? |
|
266 // |
|
267 // See examples 2,3,5,6,7,10,11 |
|
268 |
|
269 SymToken previousToken = PreviousOutputToken; |
|
270 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
271 { |
|
272 // Combine the \' or \" with any previous token |
|
273 previousToken.ForceCombine( aToken ); |
|
274 System.Diagnostics.Debug.Assert( iCache.Count > 0 ); |
|
275 } |
|
276 else |
|
277 { |
|
278 // The last token was not an escape marker, so this |
|
279 // is a quotation character all on its own. Since |
|
280 // we always start a new cache run when we first see |
|
281 // a quotation (during "normal" state), then the |
|
282 // first token in the cache forms the basis for the |
|
283 // search character. |
|
284 // |
|
285 // If the number of tokens in the cache with the same |
|
286 // type (as the first token) is even, then we have |
|
287 // reached the end of a quotation. If its odd, then |
|
288 // we're still inside one. |
|
289 |
|
290 SymToken initialQuotationToken = iCache.PeekHead; |
|
291 System.Diagnostics.Debug.Assert( initialQuotationToken.Value.Length == 1 ); |
|
292 System.Diagnostics.Debug.Assert( initialQuotationToken.Class == SymToken.TClass.EClassQuotation ); |
|
293 System.Diagnostics.Debug.Assert( initialQuotationToken.Type == SymToken.TType.ETypeQuotationDouble || initialQuotationToken.Type == SymToken.TType.ETypeQuotationSingle ); |
|
294 |
|
295 if ( initialQuotationToken.Value == aToken.Value ) |
|
296 { |
|
297 // Need to check for a closing quotation. The count in the cache |
|
298 // should be odd (so that adding aToken makes a balanced set of |
|
299 // quotation characters). |
|
300 int count = iCache.CountByType( initialQuotationToken ); |
|
301 int remainder = count % 2; |
|
302 if ( remainder == 1 ) |
|
303 { |
|
304 // Odd number which means that the quotation is treated as complete |
|
305 System.Diagnostics.Debug.Assert( aToken.Value == initialQuotationToken.Value ); |
|
306 EnqueueNewOutputToken( aToken ); |
|
307 |
|
308 #region Try to group all of the text into a logical string |
|
309 |
|
310 // No sense in doing this unless we have more than 3 tokens |
|
311 count = iCache.Count; |
|
312 if ( count > 3 ) |
|
313 { |
|
314 // Assume we have the following string: |
|
315 // "marker.h" |
|
316 // |
|
317 // This is actually represented as 5 tokens:- |
|
318 // |
|
319 // 0 ["] => EClassQuotation |
|
320 // 1 [marker] => EClassQuotation |
|
321 // 2 [.] => EClassQuotation |
|
322 // 3 [h] => EClassQuotation |
|
323 // 4 ["] => EClassQuotation |
|
324 // |
|
325 // We need to merge tokens at indicies 1, 2 and 3 into a |
|
326 // single token. |
|
327 |
|
328 iCache.MergeAllTokensWithinRange( 1, count - 1, false, true ); |
|
329 } |
|
330 #endregion |
|
331 |
|
332 FlushCache(); |
|
333 } |
|
334 else |
|
335 { |
|
336 EnqueueNewOutputToken( aToken ); |
|
337 } |
|
338 } |
|
339 else |
|
340 { |
|
341 // It wasn't the closing quotation, so just queue it up |
|
342 EnqueueNewOutputToken( aToken ); |
|
343 } |
|
344 } |
|
345 #endregion |
|
346 } |
|
347 else |
|
348 { |
|
349 #region Token is not a quotation... |
|
350 // We'll try to combine the tokens as much as is possible. |
|
351 if ( aToken.Class == SymToken.TClass.EClassNewLine ) |
|
352 { |
|
353 #region Handle new line during quotation... |
|
354 // Checking for continuations... |
|
355 // |
|
356 // If the last token was not a backshash marker, then |
|
357 // we should flush the cache (reset state). |
|
358 SymToken previousToken = PreviousOutputToken; |
|
359 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
360 { |
|
361 // The last token was an backslash. This means we |
|
362 // are dealing with a similar case to examples 9 & 10. |
|
363 |
|
364 // Discard new line |
|
365 previousToken.Class = SymToken.TClass.EClassContinuation; |
|
366 } |
|
367 else |
|
368 { |
|
369 // The last token wasn't a continuation character |
|
370 // which means this is a "normal" EOL scenario. |
|
371 // Just add the token and flush the cache. Mind you, this actually |
|
372 // means the content is invalid. |
|
373 EnqueueNewOutputToken( aToken ); |
|
374 FlushCache(); |
|
375 } |
|
376 #endregion |
|
377 } |
|
378 else if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == @"\" ) |
|
379 { |
|
380 SymToken previousToken = PreviousOutputToken; |
|
381 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
382 { |
|
383 // Example 10 - an escaped backslash. Combine the |
|
384 // previous token (a backslash) with the new token |
|
385 // then join this new combined token with the previous. |
|
386 // Phew. |
|
387 MergeWithPreviousToken( aToken ); |
|
388 PreviousOutputToken.Class = SymToken.TClass.EClassQuotation; |
|
389 } |
|
390 else |
|
391 { |
|
392 // This should not be combined until we know |
|
393 // what the next character is. |
|
394 EnqueueNewOutputToken( aToken ); |
|
395 } |
|
396 } |
|
397 else |
|
398 { |
|
399 // Irrespective of what class the token is |
|
400 // currently, we treat it as part of a quotation. |
|
401 aToken.Class = SymToken.TClass.EClassQuotation; |
|
402 |
|
403 // If the previous character wasn't a quotation, |
|
404 EnqueueNewOutputToken( aToken ); |
|
405 } |
|
406 #endregion |
|
407 } |
|
408 } |
|
409 |
|
410 private void ProcessTokenDuringComment( SymToken aToken ) |
|
411 { |
|
412 #region Comment examples |
|
413 // // this is a comment |
|
414 // /* this is also a comment */ |
|
415 // // "This is another comment" |
|
416 // // This is a comment with a continuation \ |
|
417 // and here's the rest. |
|
418 #endregion |
|
419 |
|
420 System.Diagnostics.Debug.Assert( iCache.Count > 0 ); |
|
421 |
|
422 if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == "*" ) |
|
423 { |
|
424 #region Ensure asterisk is not merged with other comments |
|
425 // The asterisk character is separated from |
|
426 // the rest of the comment in order that we can |
|
427 // ascertain when the end of a block comment has |
|
428 // been reached. |
|
429 EnqueueNewOutputToken( aToken ); |
|
430 #endregion |
|
431 } |
|
432 else if ( aToken.Class == SymToken.TClass.EClassNewLine ) |
|
433 { |
|
434 #region New line during comment... |
|
435 |
|
436 // Checking for continuations... |
|
437 SymToken previousToken = PreviousOutputToken; |
|
438 if ( previousToken.Value == @"\" ) |
|
439 { |
|
440 // Discard new line |
|
441 previousToken.Class = SymToken.TClass.EClassContinuation; |
|
442 } |
|
443 else |
|
444 { |
|
445 // If we're in a block comment, then we don't flush when we |
|
446 // see a new line token. |
|
447 SymToken firstToken = iCache.PeekHead; |
|
448 EnqueueNewOutputToken( aToken ); |
|
449 // |
|
450 if ( firstToken.Type == SymToken.TType.ETypeCommentFullLine ) |
|
451 { |
|
452 // Flushing the cache resets the flags... |
|
453 FlushCache(); |
|
454 } |
|
455 else if ( firstToken.Type == SymToken.TType.ETypeCommentBlock ) |
|
456 { |
|
457 // Don't end the comment until we see the closing block token. |
|
458 } |
|
459 } |
|
460 #endregion |
|
461 } |
|
462 else if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == "/" ) |
|
463 { |
|
464 #region Handle Closing Comment Block [ */ ] |
|
465 // For ending a comment region, we must have at least one token |
|
466 // already in the cache. |
|
467 SymToken previousToken = PreviousOutputToken; |
|
468 |
|
469 // Check whether previous token was a "*" - we might be closing a block comment |
|
470 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == "*" ) |
|
471 { |
|
472 // Check whether first token was an opening block |
|
473 SymToken firstToken = iCache.PeekHead; |
|
474 if ( firstToken.Type == SymToken.TType.ETypeCommentBlock && firstToken.Value == "/*" ) |
|
475 { |
|
476 // End of a block reached. Combine the closing "/" with the asterisk we already |
|
477 // have in order to form a closing "*/" block token. |
|
478 previousToken.Combine( aToken ); |
|
479 previousToken.Class = SymToken.TClass.EClassComment; |
|
480 previousToken.Type = SymToken.TType.ETypeCommentBlock; |
|
481 |
|
482 // No longer in a comment |
|
483 InComment = false; |
|
484 } |
|
485 } |
|
486 #endregion |
|
487 } |
|
488 else if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == @"\" ) |
|
489 { |
|
490 #region Handle possible continuation during comment |
|
491 // We treat the possible continuation character as a comment. |
|
492 // If the next character that arrives is really a new line, then we change |
|
493 // the class to continuation and handle the situation accordingly... |
|
494 aToken.Class = SymToken.TClass.EClassComment; |
|
495 EnqueueNewOutputToken( aToken ); |
|
496 #endregion |
|
497 } |
|
498 else |
|
499 { |
|
500 aToken.Class = SymToken.TClass.EClassComment; |
|
501 |
|
502 if ( PreviousOutputToken.Class == SymToken.TClass.EClassContinuation ) |
|
503 { |
|
504 // In this scenario, we don't want to try to merge the specified token with the previous |
|
505 // new line character, since new lines must be left intact. Just enque it, ensuring |
|
506 // that the token class is suitably updated. |
|
507 EnqueueNewOutputToken( aToken ); |
|
508 } |
|
509 else if ( iCache.Count == 1 ) |
|
510 { |
|
511 // We don't want to merge this token with the first token in the |
|
512 // cache, or else we won't be able to successfully identify closing |
|
513 // block comments |
|
514 EnqueueNewOutputToken( aToken ); |
|
515 } |
|
516 else |
|
517 { |
|
518 System.Diagnostics.Debug.Assert( PreviousOutputToken.CombiningAllowed ); |
|
519 ForceMergeWithPreviousToken( aToken ); |
|
520 } |
|
521 } |
|
522 } |
|
523 |
|
524 private void ProcessTokenDuringPreProcessorDirective( SymToken aToken ) |
|
525 { |
|
526 #region PreProcessor examples |
|
527 // 1) #_ pragma "This is invalid" |
|
528 // |
|
529 // 2) #\ |
|
530 // pragma message("hello") |
|
531 // |
|
532 // 3) #\ |
|
533 // define TEST |
|
534 // |
|
535 // 4) # \ |
|
536 // define TEST |
|
537 // |
|
538 // 5) # \\ |
|
539 // define INVALID_DEFINE |
|
540 // |
|
541 // 6) # pragma "This is a valid \ |
|
542 // pragma which contains a quotation" |
|
543 // |
|
544 // 7) #define LOG_FUNC XLeaveDetector __instrument; \ |
|
545 // TCleanupItem __cleanupItem(XLeaveDetector::LeaveOccurred, &__instrument); \ |
|
546 // CleanupStack::PushL(__cleanupItem); |
|
547 #endregion |
|
548 |
|
549 // NB. We only stay in "preprocessor mode" until we've identified |
|
550 // the preprocessor type,i.e. the first non-whitespace word that |
|
551 // appears after the initial hash sign. |
|
552 bool validPreProcessorDirective = true; |
|
553 int cacheCount = iCache.Count; |
|
554 System.Diagnostics.Debug.Assert( cacheCount > 0 ); |
|
555 System.Diagnostics.Debug.Assert( iCache.PeekHead.Class == SymToken.TClass.EClassPreProcessor && iCache.PeekHead.Value == "#" ); |
|
556 |
|
557 // Handle case 5 first of all. If the previous token was a possible |
|
558 // continuation, then this next token must be a new line. If its not, |
|
559 // then the PP statement is invalid. |
|
560 SymToken previousToken = PreviousOutputToken; |
|
561 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
562 { |
|
563 #region Handle new line character - checking for continuations |
|
564 if ( aToken.Class == SymToken.TClass.EClassNewLine ) |
|
565 { |
|
566 previousToken.Class = SymToken.TClass.EClassContinuation; |
|
567 } |
|
568 else |
|
569 { |
|
570 // Borked. |
|
571 validPreProcessorDirective = false; |
|
572 } |
|
573 #endregion |
|
574 } |
|
575 else |
|
576 { |
|
577 // The next token HAS to be an alphanumeric or then a whitespace. |
|
578 // If its not, we're borked. |
|
579 if ( aToken.Class == SymToken.TClass.EClassAlphaNumeric && aToken.Type == SymToken.TType.ETypeAlphaNumericNormal ) |
|
580 { |
|
581 #region Handle identified preprocessor command |
|
582 // Token was okay - and we can switch back to normal mode |
|
583 // now as we've grabbed our preprocessor command. |
|
584 aToken.Class = SymToken.TClass.EClassPreProcessor; |
|
585 EnqueueNewOutputToken( aToken ); |
|
586 InPreProcessorDirective = false; |
|
587 #endregion |
|
588 } |
|
589 else if ( aToken.Class == SymToken.TClass.EClassWhiteSpace ) |
|
590 { |
|
591 // Token is okay, but don't change mode yet. We still need an alphanumeric word. |
|
592 } |
|
593 else if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == @"\" ) |
|
594 { |
|
595 #region Handle possible continuation |
|
596 // Possibly a valid continuation character prior to seeing the first |
|
597 // preprocessor command. For this to be really valid, we must only |
|
598 // have seen whitespace between the first token and now. |
|
599 bool everythingExceptFirstTokenIsWhiteSpace = iCache.CheckTokensAreOfClass( SymToken.TClass.EClassWhiteSpace, 1 ); |
|
600 if ( everythingExceptFirstTokenIsWhiteSpace ) |
|
601 { |
|
602 // Could be a continuation character, but only if the next char is a new line |
|
603 EnqueueNewOutputToken( aToken ); |
|
604 } |
|
605 else |
|
606 { |
|
607 // Borked - we've seen non-whitespace. Actually I don't think we can |
|
608 // ever come here anyway |
|
609 System.Diagnostics.Debug.Assert( false ); |
|
610 validPreProcessorDirective = false; |
|
611 } |
|
612 #endregion |
|
613 } |
|
614 else |
|
615 { |
|
616 // Something else -> borked. |
|
617 validPreProcessorDirective = false; |
|
618 } |
|
619 } |
|
620 |
|
621 #region Handle detection of invalid preprocessor line |
|
622 if ( validPreProcessorDirective == false ) |
|
623 { |
|
624 // Token is not valid - this isn't a valid preprocessor directive. |
|
625 // Reset state, update previous character so that its marked as a symbol |
|
626 // and bail out. |
|
627 InPreProcessorDirective = false; |
|
628 iCache.PeekHead.Class = SymToken.TClass.EClassSymbol; |
|
629 EnqueueNewOutputToken( aToken ); |
|
630 } |
|
631 #endregion |
|
632 } |
|
633 #endregion |
|
634 |
|
635 #region Internal cache manipulation methods |
|
636 private void FlushCache() |
|
637 { |
|
638 #if SHOW_FLUSHED_TOKENS |
|
639 StringBuilder debugListing = new StringBuilder(); |
|
640 foreach( SymToken token in iCache ) |
|
641 { |
|
642 if ( token.Class == SymToken.TClass.EClassNewLine ) |
|
643 { |
|
644 debugListing.Append( "[NL] " ); |
|
645 } |
|
646 else |
|
647 { |
|
648 debugListing.Append( "[" + token.Value + "] "); |
|
649 } |
|
650 } |
|
651 if ( debugListing.Length > 0 ) |
|
652 { |
|
653 System.Diagnostics.Debug.WriteLine( debugListing.ToString() ); |
|
654 } |
|
655 #endif |
|
656 |
|
657 foreach( SymToken token in iCache ) |
|
658 { |
|
659 if ( MastermindObservers != null ) |
|
660 { |
|
661 MastermindObservers( TEvent.EEventGroupTokenReady, token ); |
|
662 } |
|
663 } |
|
664 |
|
665 iCache.Reset(); |
|
666 ResetState(); |
|
667 } |
|
668 |
|
669 private SymToken NextInputToken() |
|
670 { |
|
671 SymToken ret = null; |
|
672 // |
|
673 lock( iLexedTokens ) |
|
674 { |
|
675 if ( iLexedTokens.Count > 0 ) |
|
676 { |
|
677 ret = iLexedTokens.Dequeue(); |
|
678 } |
|
679 } |
|
680 // |
|
681 return ret; |
|
682 } |
|
683 |
|
684 private SymToken PreviousOutputToken |
|
685 { |
|
686 get |
|
687 { |
|
688 SymToken ret = SymToken.NullToken(); |
|
689 if ( iCache.Count > 0 ) |
|
690 { |
|
691 SymToken previousToken = (SymToken) iCache.PeekTail; |
|
692 ret = previousToken; |
|
693 } |
|
694 return ret; |
|
695 } |
|
696 } |
|
697 |
|
698 private void EnqueueNewOutputToken( SymToken aToken ) |
|
699 { |
|
700 if ( CheckIfStateChangeRequiredForEnqueuedToken( aToken ) == false ) |
|
701 { |
|
702 //System.Console.WriteLine( "Enqueue [" + aToken.Value + "]" ); |
|
703 iCache.Append( aToken ); |
|
704 } |
|
705 } |
|
706 |
|
707 private void MergeWithPreviousTwoTokens( SymToken aNewToken, SymToken.TClass aNewClassType ) |
|
708 { |
|
709 System.Diagnostics.Debug.Assert( iCache.Count > 0 ); |
|
710 |
|
711 SymToken previousToken = iCache.PopTail(); |
|
712 |
|
713 // Combine it with the new token... |
|
714 previousToken.Combine( aNewToken ); |
|
715 previousToken.Class = aNewClassType; |
|
716 |
|
717 // And combine any previous previous token |
|
718 MergeWithPreviousToken( previousToken ); |
|
719 } |
|
720 |
|
721 private void MergeWithPreviousToken( SymToken aNewToken ) |
|
722 { |
|
723 if ( iCache.Count > 0 ) |
|
724 { |
|
725 if ( CheckIfStateChangeRequiredForEnqueuedToken( aNewToken ) == false ) |
|
726 { |
|
727 SymToken previousOutputToken = PreviousOutputToken; |
|
728 previousOutputToken.Combine( aNewToken ); |
|
729 } |
|
730 } |
|
731 else |
|
732 { |
|
733 EnqueueNewOutputToken( aNewToken ); |
|
734 } |
|
735 } |
|
736 |
|
737 private void ForceMergeWithPreviousToken( SymToken aNewToken ) |
|
738 { |
|
739 if ( iCache.Count > 0 ) |
|
740 { |
|
741 if ( CheckIfStateChangeRequiredForEnqueuedToken( aNewToken ) == false ) |
|
742 { |
|
743 SymToken previousOutputToken = PreviousOutputToken; |
|
744 previousOutputToken.ForceCombine( aNewToken ); |
|
745 } |
|
746 } |
|
747 else |
|
748 { |
|
749 EnqueueNewOutputToken( aNewToken ); |
|
750 } |
|
751 } |
|
752 #endregion |
|
753 |
|
754 #region Internal state related methods |
|
755 private void ResetState() |
|
756 { |
|
757 iFlags = TStateFlag.EStateFlagUnspecified; |
|
758 } |
|
759 |
|
760 private bool CheckIfStateChangeRequiredForEnqueuedToken( SymToken aToken ) |
|
761 { |
|
762 // NB. This method is called before aToken has been enqueued |
|
763 // or in the case of combining, before the token has been combined |
|
764 // with any prior token. |
|
765 bool tokenProcessed = false; |
|
766 |
|
767 if ( InQuotation ) |
|
768 { |
|
769 } |
|
770 else if ( InComment ) |
|
771 { |
|
772 } |
|
773 else if ( InPreProcessorDirective ) |
|
774 { |
|
775 } |
|
776 else |
|
777 { |
|
778 if ( aToken.Class == SymToken.TClass.EClassQuotation ) |
|
779 { |
|
780 #region Handle start of quotation |
|
781 if ( iCache.Count > 0 ) |
|
782 { |
|
783 // Check whether the previous symbol was a backslash. If it was |
|
784 // then this must be an escaped " or ' character, in which case |
|
785 // we don't change state. |
|
786 SymToken previousToken = PreviousOutputToken; |
|
787 |
|
788 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) |
|
789 { |
|
790 // Last character was an escape marker. Combine it |
|
791 // with the quotation |
|
792 previousToken.Combine( aToken ); |
|
793 |
|
794 // Already handled the token |
|
795 tokenProcessed = true; |
|
796 } |
|
797 else |
|
798 { |
|
799 // Really are starting a quotation. |
|
800 FlushCache(); |
|
801 InQuotation = true; |
|
802 } |
|
803 } |
|
804 #endregion |
|
805 } |
|
806 else if ( aToken.Class == SymToken.TClass.EClassSymbol ) |
|
807 { |
|
808 if ( aToken.Value == "*" ) |
|
809 { |
|
810 #region Handle Opening comment block [ /* ] |
|
811 if ( iCache.Count > 0 ) |
|
812 { |
|
813 SymToken previousToken = PreviousOutputToken; |
|
814 // |
|
815 if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == "/" ) |
|
816 { |
|
817 // "/*" case |
|
818 // |
|
819 // In this scenario, in order to ensure that we do not |
|
820 // flush the first character of our comment marker, we must |
|
821 // dequeue the tail item, then flush, then enqueue. |
|
822 SymToken tailToken = iCache.PopTail(); // -> this is the initial "/" that we pop... |
|
823 FlushCache(); |
|
824 |
|
825 // Forward slash and asterisk are combined |
|
826 tailToken.Combine( aToken ); |
|
827 |
|
828 // Mark the token as a full line comment |
|
829 tailToken.Class = SymToken.TClass.EClassComment; |
|
830 tailToken.Type = SymToken.TType.ETypeCommentBlock; |
|
831 |
|
832 // ...and re-added to the cache |
|
833 iCache.Append( tailToken ); |
|
834 |
|
835 // aToken was already combined so we don't want the caller |
|
836 // to add it twice. |
|
837 tokenProcessed = true; |
|
838 |
|
839 // We're now in a full line comment. |
|
840 InComment = true; |
|
841 } |
|
842 } |
|
843 #endregion |
|
844 } |
|
845 else if ( aToken.Value == "/" ) |
|
846 { |
|
847 #region Handle Full-Line comment [ // ] |
|
848 if ( iCache.Count > 0 ) |
|
849 { |
|
850 SymToken previousToken = PreviousOutputToken; |
|
851 // |
|
852 if ( previousToken.Value == aToken.Value ) |
|
853 { |
|
854 // "//" case |
|
855 // |
|
856 // In this scenario, in order to ensure that we do not |
|
857 // flush the first character of our comment marker, we must |
|
858 // dequeue the tail item, then flush, then enqueue. |
|
859 SymToken tailToken = iCache.PopTail(); // -> this is the initial "/" that we pop... |
|
860 FlushCache(); |
|
861 |
|
862 // Two forward slashes are combined into one. |
|
863 tailToken.Combine( aToken ); |
|
864 |
|
865 // Mark the token as a full line comment |
|
866 tailToken.Class = SymToken.TClass.EClassComment; |
|
867 tailToken.Type = SymToken.TType.ETypeCommentFullLine; |
|
868 |
|
869 // ...and re-added to the cache |
|
870 iCache.Append( tailToken ); |
|
871 |
|
872 // aToken was already combined so we don't want the caller |
|
873 // to add it twice. |
|
874 tokenProcessed = true; |
|
875 |
|
876 // We're now in a full line comment. |
|
877 InComment = true; |
|
878 } |
|
879 } |
|
880 #endregion |
|
881 } |
|
882 } |
|
883 else if ( aToken.Class == SymToken.TClass.EClassPreProcessor ) |
|
884 { |
|
885 #region Handle start of preprocessor directive |
|
886 // Preprocessor directives must only appear on a line |
|
887 // after whitespace. If there was any non-whitespace |
|
888 // characters before the preprocessor directive, then its illegal. |
|
889 bool tokensAreAllWhiteSpace = iCache.CheckTokensAreOfEitherClass( SymToken.TClass.EClassWhiteSpace, SymToken.TClass.EClassNewLine ); |
|
890 if ( aToken.Value == "#" && tokensAreAllWhiteSpace ) |
|
891 { |
|
892 // Starting a preprocess directive |
|
893 FlushCache(); |
|
894 InPreProcessorDirective = true; |
|
895 } |
|
896 #endregion |
|
897 } |
|
898 } |
|
899 |
|
900 return tokenProcessed; |
|
901 } |
|
902 |
|
903 #endregion |
|
904 |
|
905 #region Internal state properties |
|
906 private bool InQuotation |
|
907 { |
|
908 get |
|
909 { |
|
910 bool ret = ( ( iFlags & TStateFlag.EStateFlagInQuotation ) == TStateFlag.EStateFlagInQuotation ); |
|
911 return ret; |
|
912 } |
|
913 set |
|
914 { |
|
915 if ( value ) |
|
916 { |
|
917 iFlags |= TStateFlag.EStateFlagInQuotation; |
|
918 } |
|
919 else |
|
920 { |
|
921 iFlags &= ~TStateFlag.EStateFlagInQuotation; |
|
922 } |
|
923 } |
|
924 } |
|
925 |
|
926 private bool InComment |
|
927 { |
|
928 get |
|
929 { |
|
930 bool ret = ( ( iFlags & TStateFlag.EStateFlagInComment ) == TStateFlag.EStateFlagInComment ); |
|
931 return ret; |
|
932 } |
|
933 set |
|
934 { |
|
935 if ( value ) |
|
936 { |
|
937 iFlags |= TStateFlag.EStateFlagInComment; |
|
938 } |
|
939 else |
|
940 { |
|
941 iFlags &= ~TStateFlag.EStateFlagInComment; |
|
942 } |
|
943 } |
|
944 } |
|
945 |
|
946 private bool InPreProcessorDirective |
|
947 { |
|
948 get |
|
949 { |
|
950 bool ret = ( ( iFlags & TStateFlag.EStateFlagInPreProcessorDirective ) == TStateFlag.EStateFlagInPreProcessorDirective ); |
|
951 return ret; |
|
952 } |
|
953 set |
|
954 { |
|
955 if ( value ) |
|
956 { |
|
957 iFlags |= TStateFlag.EStateFlagInPreProcessorDirective; |
|
958 } |
|
959 else |
|
960 { |
|
961 iFlags &= ~TStateFlag.EStateFlagInPreProcessorDirective; |
|
962 } |
|
963 } |
|
964 } |
|
965 #endregion |
|
966 |
|
967 #region Data members |
|
968 private SymLexedTokens iLexedTokens = new SymLexedTokens(); |
|
969 private SymGrouperMastermindCache iCache = new SymGrouperMastermindCache(); |
|
970 private TStateFlag iFlags = TStateFlag.EStateFlagUnspecified; |
|
971 #endregion |
|
972 } |
|
973 } |