tzpcside/tzcompiler/Source/TZScanner.cpp
changeset 0 2e3d3ce01487
equal deleted inserted replaced
-1:000000000000 0:2e3d3ce01487
       
     1 // Copyright (c) 2004-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // DST TZ Database Compiler 
       
    15 // 
       
    16 //
       
    17 
       
    18 #include "TzGlobals.h"
       
    19 #include <string>
       
    20 #include <vector>
       
    21 #include "TZDocument.h"
       
    22 #include "TZScanner.h"
       
    23 #include "TZNode.h"
       
    24 
       
    25 #include <ctype.h>
       
    26 
       
    27 using namespace std;
       
    28 //============================================================================
       
    29 // CTzCpScanner::CTzCpScanner
       
    30 // Parser construction
       
    31 //============================================================================
       
    32 CTzCpScanner::CTzCpScanner()
       
    33 	{
       
    34 	//Add valid characters
       
    35 	iValidChars = "+-_<>=:/%";
       
    36 	}
       
    37 //============================================================================
       
    38 // CTzCpScanner::~CTzCpScanner
       
    39 // Parser destructor
       
    40 //============================================================================
       
    41 CTzCpScanner::~CTzCpScanner()
       
    42 	{
       
    43 	}
       
    44 //============================================================================
       
    45 // CTzCpScanner::Scan
       
    46 // From MScanner
       
    47 // Returns KErrNone if succesful or any other TzErrorCode
       
    48 //============================================================================
       
    49 int CTzCpScanner::Scan(const char* aFileName)
       
    50 	{
       
    51 	iFileName = aFileName;
       
    52 	dbFile.open(aFileName);
       
    53 
       
    54 	if (dbFile.is_open())
       
    55 		{
       
    56 		dbFile.clear();		//This resets the eof flag and character position
       
    57 		scanState = EWaitFirstChar;
       
    58 		iLineNo = 0;
       
    59 		iColumnNo = 0;
       
    60 		ControlScanning();
       
    61 		dbFile.close();
       
    62 		return TzGlobals::ETzNone;
       
    63 		}
       
    64 	else 
       
    65 	//File was not opened
       
    66 		{
       
    67 		cout << "Cannot open " << aFileName << endl;
       
    68 		throw TzGlobals::ETzAbortScannerFileIOError;
       
    69 		}
       
    70 	}
       
    71 //============================================================================
       
    72 // CTzCpScanner::ControlScanning
       
    73 // Reads the data file line by line.  Each line is converted to a number of 
       
    74 // CTzNodes and ownership of the node is passed to the document.
       
    75 // The general structure of a node is split into elements and attributes
       
    76 // An 'element' node encapsulates the entire line as read from the data file
       
    77 // by owning 'attribute' nodes, the actual fields on the line.
       
    78 // The decisions made here are purely based on the first character of each
       
    79 // line in the file to be parsed.  The parser is unaware of the content or
       
    80 // meaning of the file being parsed.
       
    81 // Current rules used are:
       
    82 //	1:	We are only interested in lines that start with 'R','Z','L' or '\t'
       
    83 //	2:	The end of a token is a newline, tab, space or '#'
       
    84 //	3:	If a line starts with a tab the element is created as a child of the 
       
    85 //		previous node
       
    86 //	4:	All other elements are created as chilren of the document root node
       
    87 //	5:	There is no limit on the number of attributes owned by an element
       
    88 //============================================================================
       
    89 void CTzCpScanner::ControlScanning()
       
    90 
       
    91 	{
       
    92 	char tmpChar;
       
    93 	char firstChar;
       
    94 	string tmpString;
       
    95 
       
    96 	do
       
    97 		{
       
    98 		if (dbFile.eof())
       
    99 			tmpChar = EOF;			// define something out of the range of normal characters
       
   100 		else
       
   101 			{
       
   102 			dbFile.get(tmpChar);	// get next char
       
   103 			++iColumnNo;
       
   104 			}
       
   105 
       
   106 		switch (scanState)
       
   107 			{
       
   108 			case EWaitFirstChar:
       
   109 				{
       
   110 				++iLineNo;			// we have a new line
       
   111 				iColumnNo = 0;		// reset the column count
       
   112 				firstChar = tmpChar;
       
   113 				tmpString.erase();	// is this the right call to empty the string ?
       
   114 				if ((tmpChar == 'R') || (tmpChar == 'Z') || (tmpChar == 'L'))
       
   115 					{
       
   116 					tmpString += tmpChar;
       
   117 					iDocument->CreateRootChildElement();
       
   118 					scanState = EWaitTokenEnd;
       
   119 					}
       
   120 				else if (tmpChar == '\t')
       
   121 					{
       
   122 					iDocument->CreateChildElement();
       
   123 					tmpString += tmpChar;
       
   124 					// this creates an element whose name is '/t'
       
   125 					// We add two '/t' to achieve the same number of fields in the Stdtimealignment and Zone lines
       
   126 					iDocument->AddAttribute(tmpString.c_str());	
       
   127 					iDocument->AddAttribute(tmpString.c_str());
       
   128 					tmpString.erase();	// clear the string for the next state
       
   129 					scanState = EWaitTokenStart;
       
   130 					}
       
   131 				else if (tmpChar == '\n')
       
   132 					{
       
   133 					// do nothing
       
   134 					}
       
   135 				else if ((tmpChar == KCharOlsonStartOfComment) || isspace(tmpChar))
       
   136 					{
       
   137 					// ignore the whole line
       
   138 					scanState = EWaitLineFeed;
       
   139 					}
       
   140 				else if (tmpChar != EOF)					
       
   141 					{
       
   142 					iDocument->HandleScanError(iFileName.c_str(),iLineNo,iColumnNo,tmpChar);
       
   143 					}
       
   144 				}
       
   145 			break;
       
   146 			
       
   147 			case EWaitTokenEnd:
       
   148 				{
       
   149 				// can we assume that there will always be a whitespace before a comment ('#')?
       
   150 				// how about 'CR'; the model assumes there will be a 'CRLF', perhaps that's not right
       
   151 				if ((tmpChar == ' ') || (tmpChar == '\t') || (tmpChar == '\n') || (tmpChar == EOF))
       
   152 					{
       
   153 					iDocument->AddAttribute(tmpString.c_str());
       
   154 					tmpString.erase();		// clear the string for the next state
       
   155 					if (tmpChar == '\n')
       
   156 						{
       
   157 						scanState = EWaitFirstChar;
       
   158 						if (firstChar != 'Z')
       
   159 							{
       
   160 							iDocument->CloseElement();
       
   161 							}
       
   162 						}
       
   163 					else
       
   164 						{
       
   165 						scanState = EWaitTokenStart;
       
   166 						}
       
   167 					}
       
   168 				else
       
   169 					{
       
   170 					std::string strValidChars = iValidChars;
       
   171 					int pos = strValidChars.find(tmpChar);
       
   172 					if ((!isalpha(tmpChar)) && (!isdigit(tmpChar)) && (pos == string::npos))
       
   173 						{
       
   174 						iDocument->HandleScanError(iFileName.c_str(),iLineNo,iColumnNo,tmpChar);
       
   175 						}
       
   176 					
       
   177 					tmpString += tmpChar;
       
   178 					}
       
   179 				}
       
   180 			break;
       
   181 			
       
   182 			case EWaitTokenStart:
       
   183 				{
       
   184 				if (tmpChar == '#')
       
   185 					{
       
   186 					// ignore the whole line
       
   187 					scanState = EWaitLineFeed;
       
   188 					//If a comment appears on a zone line we don't want to close the element
       
   189 					if (firstChar != 'Z')
       
   190 						{
       
   191 						iDocument->CloseElement();
       
   192 						}
       
   193 					}
       
   194 				else if ((tmpChar != ' ') && (tmpChar != '\t') && (tmpChar != '\n')) // valid char
       
   195 					{
       
   196 					tmpString += tmpChar;
       
   197 					scanState = EWaitTokenEnd;
       
   198 					}
       
   199 				}
       
   200 			break;
       
   201 			
       
   202 			case EWaitLineFeed:
       
   203 				{
       
   204 				// ignore every thing till the end of line
       
   205 				if (tmpChar == '\n')
       
   206 					{
       
   207 					scanState = EWaitFirstChar;
       
   208 					}
       
   209 				}
       
   210 			break;
       
   211 
       
   212 			default:
       
   213 				{
       
   214 				cout << "State Error!" << endl;
       
   215 				}
       
   216 			break;
       
   217 			}
       
   218 		}
       
   219 		while (tmpChar != EOF);
       
   220 		iDocument->CloseElement(); 
       
   221 	}
       
   222 
       
   223 //============================================================================
       
   224 // End of file
       
   225 //============================================================================