1 /* |
|
2 * Licensed to the Apache Software Foundation (ASF) under one or more |
|
3 * contributor license agreements. See the NOTICE file distributed with |
|
4 * this work for additional information regarding copyright ownership. |
|
5 * The ASF licenses this file to You under the Apache License, Version 2.0 |
|
6 * (the "License"); you may not use this file except in compliance with |
|
7 * the License. You may obtain a copy of the License at |
|
8 * |
|
9 * http://www.apache.org/licenses/LICENSE-2.0 |
|
10 * |
|
11 * Unless required by applicable law or agreed to in writing, software |
|
12 * distributed under the License is distributed on an "AS IS" BASIS, |
|
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 * See the License for the specific language governing permissions and |
|
15 * limitations under the License. |
|
16 */ |
|
17 |
|
18 /* |
|
19 * $Id: IGXMLScanner.hpp 568078 2007-08-21 11:43:25Z amassari $ |
|
20 */ |
|
21 |
|
22 |
|
23 #if !defined(IGXMLSCANNER_HPP) |
|
24 #define IGXMLSCANNER_HPP |
|
25 |
|
26 #include <xercesc/internal/XMLScanner.hpp> |
|
27 #include <xercesc/util/KVStringPair.hpp> |
|
28 #include <xercesc/util/NameIdPool.hpp> |
|
29 #include <xercesc/util/RefHash3KeysIdPool.hpp> |
|
30 #include <xercesc/validators/common/Grammar.hpp> |
|
31 #include <xercesc/validators/schema/SchemaElementDecl.hpp> |
|
32 |
|
33 XERCES_CPP_NAMESPACE_BEGIN |
|
34 |
|
35 class DTDElementDecl; |
|
36 class DTDGrammar; |
|
37 class DTDValidator; |
|
38 class SchemaValidator; |
|
39 class IdentityConstraintHandler; |
|
40 class IdentityConstraint; |
|
41 class ContentLeafNameTypeVector; |
|
42 class SchemaAttDef; |
|
43 class XMLContentModel; |
|
44 class XSModel; |
|
45 class PSVIAttributeList; |
|
46 class PSVIElement; |
|
47 |
|
48 // This is an integrated scanner class, which does DTD/XML Schema grammar |
|
49 // processing. |
|
50 class XMLPARSER_EXPORT IGXMLScanner : public XMLScanner |
|
51 { |
|
52 public : |
|
53 // ----------------------------------------------------------------------- |
|
54 // Constructors and Destructor |
|
55 // ----------------------------------------------------------------------- |
|
56 IGXMLScanner |
|
57 ( |
|
58 XMLValidator* const valToAdopt |
|
59 , GrammarResolver* const grammarResolver |
|
60 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
|
61 ); |
|
62 IGXMLScanner |
|
63 ( |
|
64 XMLDocumentHandler* const docHandler |
|
65 , DocTypeHandler* const docTypeHandler |
|
66 , XMLEntityHandler* const entityHandler |
|
67 , XMLErrorReporter* const errReporter |
|
68 , XMLValidator* const valToAdopt |
|
69 , GrammarResolver* const grammarResolver |
|
70 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
|
71 ); |
|
72 virtual ~IGXMLScanner(); |
|
73 |
|
74 // ----------------------------------------------------------------------- |
|
75 // XMLScanner public virtual methods |
|
76 // ----------------------------------------------------------------------- |
|
77 virtual const XMLCh* getName() const; |
|
78 virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool(); |
|
79 virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const; |
|
80 virtual unsigned int resolveQName |
|
81 ( |
|
82 const XMLCh* const qName |
|
83 , XMLBuffer& prefixBufToFill |
|
84 , const short mode |
|
85 , int& prefixColonPos |
|
86 ); |
|
87 virtual void scanDocument |
|
88 ( |
|
89 const InputSource& src |
|
90 ); |
|
91 virtual bool scanNext(XMLPScanToken& toFill); |
|
92 virtual Grammar* loadGrammar |
|
93 ( |
|
94 const InputSource& src |
|
95 , const short grammarType |
|
96 , const bool toCache = false |
|
97 ); |
|
98 |
|
99 virtual Grammar::GrammarType getCurrentGrammarType() const; |
|
100 |
|
101 private : |
|
102 // ----------------------------------------------------------------------- |
|
103 // Unimplemented constructors and operators |
|
104 // ----------------------------------------------------------------------- |
|
105 IGXMLScanner(); |
|
106 IGXMLScanner(const IGXMLScanner&); |
|
107 IGXMLScanner& operator=(const IGXMLScanner&); |
|
108 |
|
109 // ----------------------------------------------------------------------- |
|
110 // XMLScanner virtual methods |
|
111 // ----------------------------------------------------------------------- |
|
112 virtual void scanCDSection(); |
|
113 virtual void scanCharData(XMLBuffer& toToUse); |
|
114 virtual EntityExpRes scanEntityRef |
|
115 ( |
|
116 const bool inAttVal |
|
117 , XMLCh& firstCh |
|
118 , XMLCh& secondCh |
|
119 , bool& escaped |
|
120 ); |
|
121 virtual void scanDocTypeDecl(); |
|
122 virtual void scanReset(const InputSource& src); |
|
123 virtual void sendCharData(XMLBuffer& toSend); |
|
124 virtual InputSource* resolveSystemId(const XMLCh* const sysId |
|
125 ,const XMLCh* const pubId); |
|
126 |
|
127 // ----------------------------------------------------------------------- |
|
128 // Private helper methods |
|
129 // ----------------------------------------------------------------------- |
|
130 void commonInit(); |
|
131 void cleanUp(); |
|
132 |
|
133 unsigned int buildAttList |
|
134 ( |
|
135 const RefVectorOf<KVStringPair>& providedAttrs |
|
136 , const unsigned int attCount |
|
137 , XMLElementDecl* elemDecl |
|
138 , RefVectorOf<XMLAttr>& toFill |
|
139 ); |
|
140 bool normalizeAttValue |
|
141 ( |
|
142 const XMLAttDef* const attDef |
|
143 , const XMLCh* const name |
|
144 , const XMLCh* const value |
|
145 , XMLBuffer& toFill |
|
146 ); |
|
147 bool normalizeAttRawValue |
|
148 ( |
|
149 const XMLCh* const attrName |
|
150 , const XMLCh* const value |
|
151 , XMLBuffer& toFill |
|
152 ); |
|
153 unsigned int resolvePrefix |
|
154 ( |
|
155 const XMLCh* const prefix |
|
156 , const ElemStack::MapModes mode |
|
157 ); |
|
158 unsigned int resolvePrefix |
|
159 ( |
|
160 const XMLCh* const prefix |
|
161 , XMLBuffer& uriBufToFill |
|
162 , const ElemStack::MapModes mode |
|
163 ); |
|
164 void updateNSMap |
|
165 ( |
|
166 const XMLCh* const attrName |
|
167 , const XMLCh* const attrValue |
|
168 ); |
|
169 void updateNSMap |
|
170 ( |
|
171 const XMLCh* const attrName |
|
172 , const XMLCh* const attrValue |
|
173 , const int colonPosition |
|
174 ); |
|
175 void scanRawAttrListforNameSpaces(int attCount); |
|
176 void parseSchemaLocation(const XMLCh* const schemaLocationStr); |
|
177 void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri); |
|
178 bool switchGrammar(const XMLCh* const newGrammarNameSpace); |
|
179 bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, |
|
180 const XMLContentModel* const cm, |
|
181 const unsigned int parentElemDepth); |
|
182 bool anyAttributeValidation(SchemaAttDef* attWildCard, |
|
183 unsigned int uriId, |
|
184 bool& skipThisOne, |
|
185 bool& laxThisOne); |
|
186 void resizeElemState(); |
|
187 void processSchemaLocation(XMLCh* const schemaLoc); |
|
188 |
|
189 void resizeRawAttrColonList(); |
|
190 |
|
191 unsigned int resolveQNameWithColon |
|
192 ( |
|
193 const XMLCh* const qName |
|
194 , XMLBuffer& prefixBufToFill |
|
195 , const short mode |
|
196 , const int prefixColonPos |
|
197 ); |
|
198 // ----------------------------------------------------------------------- |
|
199 // Private scanning methods |
|
200 // ----------------------------------------------------------------------- |
|
201 bool basicAttrValueScan |
|
202 ( |
|
203 const XMLCh* const attrName |
|
204 , XMLBuffer& toFill |
|
205 ); |
|
206 unsigned int rawAttrScan |
|
207 ( |
|
208 const XMLCh* const elemName |
|
209 , RefVectorOf<KVStringPair>& toFill |
|
210 , bool& isEmpty |
|
211 ); |
|
212 bool scanAttValue |
|
213 ( |
|
214 const XMLAttDef* const attDef |
|
215 , const XMLCh* const attrName |
|
216 , XMLBuffer& toFill |
|
217 ); |
|
218 bool scanContent(); |
|
219 void scanEndTag(bool& gotData); |
|
220 bool scanStartTag(bool& gotData); |
|
221 bool scanStartTagNS(bool& gotData); |
|
222 |
|
223 // ----------------------------------------------------------------------- |
|
224 // IdentityConstraints Activation methods |
|
225 // ----------------------------------------------------------------------- |
|
226 inline bool toCheckIdentityConstraint() const; |
|
227 |
|
228 // ----------------------------------------------------------------------- |
|
229 // Grammar preparsing methods |
|
230 // ----------------------------------------------------------------------- |
|
231 Grammar* loadXMLSchemaGrammar(const InputSource& src, const bool toCache = false); |
|
232 Grammar* loadDTDGrammar(const InputSource& src, const bool toCache = false); |
|
233 |
|
234 // ----------------------------------------------------------------------- |
|
235 // PSVI handling methods |
|
236 // ----------------------------------------------------------------------- |
|
237 void endElementPSVI(SchemaElementDecl* const elemDecl, |
|
238 DatatypeValidator* const memberDV); |
|
239 void resetPSVIElemContext(); |
|
240 |
|
241 // ----------------------------------------------------------------------- |
|
242 // Data members |
|
243 // |
|
244 // fRawAttrList |
|
245 // During the initial scan of the attributes we can only do a raw |
|
246 // scan for key/value pairs. So this vector is used to store them |
|
247 // until they can be processed (and put into fAttrList.) |
|
248 // |
|
249 // fDTDValidator |
|
250 // The DTD validator instance. |
|
251 // |
|
252 // fSchemaValidator |
|
253 // The Schema validator instance. |
|
254 // |
|
255 // fSeeXsi |
|
256 // This flag indicates a schema has been seen. |
|
257 // |
|
258 // fElemState |
|
259 // fElemStateSize |
|
260 // Stores an element next state from DFA content model - used for |
|
261 // wildcard validation |
|
262 // |
|
263 // fDTDElemNonDeclPool |
|
264 // registry of "faulted-in" DTD element decls |
|
265 // fSchemaElemNonDeclPool |
|
266 // registry for elements without decls in the grammar |
|
267 // fElemCount |
|
268 // count of the number of start tags seen so far (starts at 1). |
|
269 // Used for duplicate attribute detection/processing of required/defaulted attributes |
|
270 // fAttDefRegistry |
|
271 // mapping from XMLAttDef instances to the count of the last |
|
272 // start tag where they were utilized. |
|
273 // fUndeclaredAttrRegistry |
|
274 // mapping of attr QNames to the count of the last start tag in which they occurred |
|
275 // fUndeclaredAttrRegistryNS |
|
276 // mapping of namespaceId/localName pairs to the count of the last |
|
277 // start tag in which they occurred. |
|
278 // fPSVIAttrList |
|
279 // PSVI attribute list implementation that needs to be |
|
280 // filled when a PSVIHandler is registered |
|
281 // |
|
282 // ----------------------------------------------------------------------- |
|
283 bool fSeeXsi; |
|
284 Grammar::GrammarType fGrammarType; |
|
285 unsigned int fElemStateSize; |
|
286 unsigned int* fElemState; |
|
287 XMLBuffer fContent; |
|
288 RefVectorOf<KVStringPair>* fRawAttrList; |
|
289 unsigned int fRawAttrColonListSize; |
|
290 int* fRawAttrColonList; |
|
291 DTDValidator* fDTDValidator; |
|
292 SchemaValidator* fSchemaValidator; |
|
293 DTDGrammar* fDTDGrammar; |
|
294 IdentityConstraintHandler* fICHandler; |
|
295 ValueVectorOf<XMLCh*>* fLocationPairs; |
|
296 NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool; |
|
297 RefHash3KeysIdPool<SchemaElementDecl>* fSchemaElemNonDeclPool; |
|
298 unsigned int fElemCount; |
|
299 RefHashTableOf<unsigned int>* fAttDefRegistry; |
|
300 RefHashTableOf<unsigned int>* fUndeclaredAttrRegistry; |
|
301 RefHash2KeysTableOf<unsigned int>* fUndeclaredAttrRegistryNS; |
|
302 PSVIAttributeList * fPSVIAttrList; |
|
303 XSModel* fModel; |
|
304 PSVIElement* fPSVIElement; |
|
305 ValueStackOf<bool>* fErrorStack; |
|
306 PSVIElemContext fPSVIElemContext; |
|
307 }; |
|
308 |
|
309 inline const XMLCh* IGXMLScanner::getName() const |
|
310 { |
|
311 return XMLUni::fgIGXMLScanner; |
|
312 } |
|
313 |
|
314 inline bool IGXMLScanner::toCheckIdentityConstraint() const |
|
315 { |
|
316 return fValidate && fIdentityConstraintChecking && fICHandler; |
|
317 } |
|
318 |
|
319 inline Grammar::GrammarType IGXMLScanner::getCurrentGrammarType() const |
|
320 { |
|
321 return fGrammarType; |
|
322 } |
|
323 |
|
324 XERCES_CPP_NAMESPACE_END |
|
325 |
|
326 #endif |
|