|
1 // Copyright (c) 2004-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // DST TZ Database Compiler |
|
15 // |
|
16 // |
|
17 |
|
18 #include "TzGlobals.h" |
|
19 #include <string> |
|
20 #include <vector> |
|
21 #include "TZDocument.h" |
|
22 #include "TZScanner.h" |
|
23 #include "TZNode.h" |
|
24 |
|
25 #include <ctype.h> |
|
26 |
|
27 using namespace std; |
|
28 //============================================================================ |
|
29 // CTzCpScanner::CTzCpScanner |
|
30 // Parser construction |
|
31 //============================================================================ |
|
32 CTzCpScanner::CTzCpScanner() |
|
33 { |
|
34 //Add valid characters |
|
35 iValidChars = "+-_<>=:/%"; |
|
36 } |
|
37 //============================================================================ |
|
38 // CTzCpScanner::~CTzCpScanner |
|
39 // Parser destructor |
|
40 //============================================================================ |
|
41 CTzCpScanner::~CTzCpScanner() |
|
42 { |
|
43 } |
|
44 //============================================================================ |
|
45 // CTzCpScanner::Scan |
|
46 // From MScanner |
|
47 // Returns KErrNone if succesful or any other TzErrorCode |
|
48 //============================================================================ |
|
49 int CTzCpScanner::Scan(const char* aFileName) |
|
50 { |
|
51 iFileName = aFileName; |
|
52 dbFile.open(aFileName); |
|
53 |
|
54 if (dbFile.is_open()) |
|
55 { |
|
56 dbFile.clear(); //This resets the eof flag and character position |
|
57 scanState = EWaitFirstChar; |
|
58 iLineNo = 0; |
|
59 iColumnNo = 0; |
|
60 ControlScanning(); |
|
61 dbFile.close(); |
|
62 return TzGlobals::ETzNone; |
|
63 } |
|
64 else |
|
65 //File was not opened |
|
66 { |
|
67 cout << "Cannot open " << aFileName << endl; |
|
68 throw TzGlobals::ETzAbortScannerFileIOError; |
|
69 } |
|
70 } |
|
71 //============================================================================ |
|
72 // CTzCpScanner::ControlScanning |
|
73 // Reads the data file line by line. Each line is converted to a number of |
|
74 // CTzNodes and ownership of the node is passed to the document. |
|
75 // The general structure of a node is split into elements and attributes |
|
76 // An 'element' node encapsulates the entire line as read from the data file |
|
77 // by owning 'attribute' nodes, the actual fields on the line. |
|
78 // The decisions made here are purely based on the first character of each |
|
79 // line in the file to be parsed. The parser is unaware of the content or |
|
80 // meaning of the file being parsed. |
|
81 // Current rules used are: |
|
82 // 1: We are only interested in lines that start with 'R','Z','L' or '\t' |
|
83 // 2: The end of a token is a newline, tab, space or '#' |
|
84 // 3: If a line starts with a tab the element is created as a child of the |
|
85 // previous node |
|
86 // 4: All other elements are created as chilren of the document root node |
|
87 // 5: There is no limit on the number of attributes owned by an element |
|
88 //============================================================================ |
|
89 void CTzCpScanner::ControlScanning() |
|
90 |
|
91 { |
|
92 char tmpChar; |
|
93 char firstChar; |
|
94 string tmpString; |
|
95 |
|
96 do |
|
97 { |
|
98 if (dbFile.eof()) |
|
99 tmpChar = EOF; // define something out of the range of normal characters |
|
100 else |
|
101 { |
|
102 dbFile.get(tmpChar); // get next char |
|
103 ++iColumnNo; |
|
104 } |
|
105 |
|
106 switch (scanState) |
|
107 { |
|
108 case EWaitFirstChar: |
|
109 { |
|
110 ++iLineNo; // we have a new line |
|
111 iColumnNo = 0; // reset the column count |
|
112 firstChar = tmpChar; |
|
113 tmpString.erase(); // is this the right call to empty the string ? |
|
114 if ((tmpChar == 'R') || (tmpChar == 'Z') || (tmpChar == 'L')) |
|
115 { |
|
116 tmpString += tmpChar; |
|
117 iDocument->CreateRootChildElement(); |
|
118 scanState = EWaitTokenEnd; |
|
119 } |
|
120 else if (tmpChar == '\t') |
|
121 { |
|
122 iDocument->CreateChildElement(); |
|
123 tmpString += tmpChar; |
|
124 // this creates an element whose name is '/t' |
|
125 // We add two '/t' to achieve the same number of fields in the Stdtimealignment and Zone lines |
|
126 iDocument->AddAttribute(tmpString.c_str()); |
|
127 iDocument->AddAttribute(tmpString.c_str()); |
|
128 tmpString.erase(); // clear the string for the next state |
|
129 scanState = EWaitTokenStart; |
|
130 } |
|
131 else if (tmpChar == '\n') |
|
132 { |
|
133 // do nothing |
|
134 } |
|
135 else if ((tmpChar == KCharOlsonStartOfComment) || isspace(tmpChar)) |
|
136 { |
|
137 // ignore the whole line |
|
138 scanState = EWaitLineFeed; |
|
139 } |
|
140 else if (tmpChar != EOF) |
|
141 { |
|
142 iDocument->HandleScanError(iFileName.c_str(),iLineNo,iColumnNo,tmpChar); |
|
143 } |
|
144 } |
|
145 break; |
|
146 |
|
147 case EWaitTokenEnd: |
|
148 { |
|
149 // can we assume that there will always be a whitespace before a comment ('#')? |
|
150 // how about 'CR'; the model assumes there will be a 'CRLF', perhaps that's not right |
|
151 if ((tmpChar == ' ') || (tmpChar == '\t') || (tmpChar == '\n') || (tmpChar == EOF)) |
|
152 { |
|
153 iDocument->AddAttribute(tmpString.c_str()); |
|
154 tmpString.erase(); // clear the string for the next state |
|
155 if (tmpChar == '\n') |
|
156 { |
|
157 scanState = EWaitFirstChar; |
|
158 if (firstChar != 'Z') |
|
159 { |
|
160 iDocument->CloseElement(); |
|
161 } |
|
162 } |
|
163 else |
|
164 { |
|
165 scanState = EWaitTokenStart; |
|
166 } |
|
167 } |
|
168 else |
|
169 { |
|
170 std::string strValidChars = iValidChars; |
|
171 int pos = strValidChars.find(tmpChar); |
|
172 if ((!isalpha(tmpChar)) && (!isdigit(tmpChar)) && (pos == string::npos)) |
|
173 { |
|
174 iDocument->HandleScanError(iFileName.c_str(),iLineNo,iColumnNo,tmpChar); |
|
175 } |
|
176 |
|
177 tmpString += tmpChar; |
|
178 } |
|
179 } |
|
180 break; |
|
181 |
|
182 case EWaitTokenStart: |
|
183 { |
|
184 if (tmpChar == '#') |
|
185 { |
|
186 // ignore the whole line |
|
187 scanState = EWaitLineFeed; |
|
188 //If a comment appears on a zone line we don't want to close the element |
|
189 if (firstChar != 'Z') |
|
190 { |
|
191 iDocument->CloseElement(); |
|
192 } |
|
193 } |
|
194 else if ((tmpChar != ' ') && (tmpChar != '\t') && (tmpChar != '\n')) // valid char |
|
195 { |
|
196 tmpString += tmpChar; |
|
197 scanState = EWaitTokenEnd; |
|
198 } |
|
199 } |
|
200 break; |
|
201 |
|
202 case EWaitLineFeed: |
|
203 { |
|
204 // ignore every thing till the end of line |
|
205 if (tmpChar == '\n') |
|
206 { |
|
207 scanState = EWaitFirstChar; |
|
208 } |
|
209 } |
|
210 break; |
|
211 |
|
212 default: |
|
213 { |
|
214 cout << "State Error!" << endl; |
|
215 } |
|
216 break; |
|
217 } |
|
218 } |
|
219 while (tmpChar != EOF); |
|
220 iDocument->CloseElement(); |
|
221 } |
|
222 |
|
223 //============================================================================ |
|
224 // End of file |
|
225 //============================================================================ |