3
|
1 |
#
|
|
2 |
# Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
|
|
3 |
# All rights reserved.
|
|
4 |
# This component and the accompanying materials are made available
|
|
5 |
# under the terms of "Eclipse Public License v1.0"
|
|
6 |
# which accompanies this distribution, and is available
|
|
7 |
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
|
|
8 |
#
|
|
9 |
# Initial Contributors:
|
|
10 |
# Nokia Corporation - initial contribution.
|
|
11 |
#
|
|
12 |
# Contributors:
|
|
13 |
#
|
|
14 |
# Description:
|
|
15 |
#
|
|
16 |
|
|
17 |
import sys, os
|
|
18 |
import logging
|
|
19 |
import StringIO
|
|
20 |
import pkg_resources
|
|
21 |
import jinja2
|
|
22 |
from cone.public import api, utils, exceptions
|
|
23 |
import cone.public.plugin
|
|
24 |
|
|
25 |
log = logging.getLogger('cone.schemavalidation')
|
|
26 |
|
|
27 |
ROOT_PATH = os.path.abspath(os.path.dirname(__file__))
|
|
28 |
|
|
29 |
CONFML_SCHEMA_DIR = 'confml_xsd'
|
|
30 |
|
|
31 |
SCHEMA_FILES_BY_NAMESPACE = {'http://www.s60.com/xml/confml/1': 'confml.xsd',
|
|
32 |
'http://www.s60.com/xml/confml/2': 'confml2.xsd'}
|
|
33 |
|
|
34 |
_schema_cache = {}
|
|
35 |
|
|
36 |
# ============================================================================
|
|
37 |
|
|
38 |
def validate_confml_file(config, ref):
|
|
39 |
"""
|
|
40 |
Schema-validate the given ConfML file in a configuration.
|
|
41 |
@param config: The configuration.
|
|
42 |
@param ref: The resource reference of the file to validate.
|
|
43 |
@return: A list of api.Problem objects.
|
|
44 |
"""
|
|
45 |
return _validate_file(config, ref, validate_confml_data)
|
|
46 |
|
|
47 |
def validate_implml_file(config, ref):
|
|
48 |
"""
|
|
49 |
Schema-validate the given ImplML file in a configuration.
|
|
50 |
@param config: The configuration.
|
|
51 |
@param ref: The resource reference of the file to validate.
|
|
52 |
@return: A list of api.Problem objects.
|
|
53 |
"""
|
|
54 |
return _validate_file(config, ref, validate_implml_data)
|
|
55 |
|
|
56 |
def validate_confml_data(data):
|
|
57 |
"""
|
|
58 |
Schema-validate the given ConfML data.
|
|
59 |
|
|
60 |
@raise ParseError: Something is wrong with the data (invalid XML,
|
|
61 |
unsupported ConfML namespace or not conforming to the schema)
|
|
62 |
"""
|
|
63 |
_validate_data(data, _get_confml_schema_for_namespace, 'xml.confml')
|
|
64 |
|
|
65 |
def validate_implml_data(data):
|
|
66 |
"""
|
|
67 |
Schema-validate the given ImplML data.
|
|
68 |
|
|
69 |
@raise ParseError: Something is wrong with the data (invalid XML,
|
|
70 |
unsupported ImplML namespace or not conforming to the schema)
|
|
71 |
"""
|
|
72 |
_validate_data(data, _get_implml_schema_for_namespace, 'xml.implml')
|
|
73 |
|
|
74 |
# ============================================================================
|
|
75 |
|
|
76 |
def _validate_file(config, ref, data_validator_func):
|
|
77 |
res = config.get_resource(ref)
|
|
78 |
try: data = res.read()
|
|
79 |
finally: res.close()
|
|
80 |
|
|
81 |
problem = None
|
|
82 |
try:
|
|
83 |
data_validator_func(data)
|
|
84 |
except exceptions.ParseError, e:
|
|
85 |
problem = api.Problem.from_exception(e)
|
|
86 |
|
|
87 |
if problem:
|
|
88 |
problem.file = ref
|
|
89 |
return [problem]
|
|
90 |
else:
|
|
91 |
return []
|
|
92 |
|
|
93 |
def _parse_schema(filename, file_data_dict):
|
|
94 |
"""
|
|
95 |
Parse a schema using a filename-data dictionary as the source.
|
|
96 |
@param filename: Name of the schema file to parse.
|
|
97 |
@param file_data_dict: Dictionary mapping file names to file data.
|
|
98 |
@return: The parsed schema object.
|
|
99 |
"""
|
|
100 |
if filename not in file_data_dict:
|
|
101 |
raise RuntimeError("Could not parse XML schema file '%s', no such file" % filename)
|
|
102 |
|
|
103 |
schema_data = file_data_dict[filename]
|
|
104 |
|
|
105 |
import lxml.etree
|
|
106 |
|
|
107 |
parser = lxml.etree.XMLParser()
|
|
108 |
class Resolver(lxml.etree.Resolver):
|
|
109 |
def resolve(self, url, id, context):
|
|
110 |
if url not in file_data_dict:
|
|
111 |
log.error("Could not resolve schema file '%s', no such file" % url)
|
|
112 |
raise RuntimeError("No file named '%s'" % url)
|
|
113 |
data = file_data_dict[url]
|
|
114 |
return self.resolve_string(data, context)
|
|
115 |
parser.resolvers.add(Resolver())
|
|
116 |
|
|
117 |
try:
|
|
118 |
schema_doc = lxml.etree.fromstring(schema_data, parser=parser)
|
|
119 |
schema = lxml.etree.XMLSchema(schema_doc)
|
|
120 |
except lxml.etree.LxmlError, e:
|
|
121 |
raise RuntimeError(
|
|
122 |
"Error parsing schema file '%s': %s: %s" \
|
|
123 |
% (filename, e.__class__.__name__, str(e)))
|
|
124 |
return schema
|
|
125 |
|
|
126 |
def _validate_data(data, schema_resolver_func, xml_parse_problem_type):
|
|
127 |
"""
|
|
128 |
Validate the given XML data.
|
|
129 |
@param data: The raw binary data to validate.
|
|
130 |
@param schema_resolver_func: The function used to resolve the
|
|
131 |
schema used for validation. The function is given the namespace
|
|
132 |
of the root element and is supposed to return the schema object
|
|
133 |
and problem type to use, or raise a ParseError.
|
|
134 |
@param xml_parse_problem_type: Problem type to use if XML parsing
|
|
135 |
fails of the data fails.
|
|
136 |
|
|
137 |
@raise ParseError: Something is wrong with the data (invalid XML
|
|
138 |
or not conforming to the schema)
|
|
139 |
"""
|
|
140 |
# Find out the XML namespace in the root element
|
|
141 |
try:
|
|
142 |
namespace, _ = utils.xml.get_xml_root(StringIO.StringIO(data))
|
|
143 |
except exceptions.XmlParseError, e:
|
|
144 |
e.problem_type = xml_parse_problem_type
|
|
145 |
raise e
|
|
146 |
|
|
147 |
schema, problem_type = schema_resolver_func(namespace)
|
|
148 |
|
|
149 |
# Parse the XML document
|
|
150 |
import lxml.etree
|
|
151 |
try:
|
|
152 |
doc = lxml.etree.fromstring(data)
|
|
153 |
except lxml.etree.XMLSyntaxError, e:
|
|
154 |
raise exceptions.XmlParseError(
|
|
155 |
"XML parse error on line %d: %s" % (e.position[0], e),
|
|
156 |
problem_lineno = e.position[0],
|
|
157 |
problem_msg = str(e),
|
|
158 |
problem_type = xml_parse_problem_type)
|
|
159 |
|
|
160 |
# Validate the document against the schema
|
|
161 |
if not schema.validate(doc):
|
|
162 |
error = schema.error_log.last_error
|
|
163 |
raise exceptions.XmlSchemaValidationError(
|
|
164 |
"Line %d: %s" % (error.line, error.message),
|
|
165 |
problem_lineno = error.line,
|
|
166 |
problem_msg = error.message,
|
|
167 |
problem_type = problem_type)
|
|
168 |
|
|
169 |
|
|
170 |
class UnsupportedNamespaceError(exceptions.ParseError):
|
|
171 |
pass
|
|
172 |
|
|
173 |
_confml_schema_file_cache = None
|
|
174 |
def get_confml_schema_files():
|
|
175 |
global _confml_schema_file_cache
|
|
176 |
if _confml_schema_file_cache is None:
|
|
177 |
_confml_schema_file_cache = _load_confml_schema_files()
|
|
178 |
return _confml_schema_file_cache
|
|
179 |
|
|
180 |
def get_schema_file_data(file):
|
|
181 |
"""
|
|
182 |
Return the data of the given XML schema file.
|
|
183 |
|
|
184 |
@raise ValueError: No such schema file exists.
|
|
185 |
"""
|
|
186 |
resource_path = CONFML_SCHEMA_DIR + '/' + file
|
|
187 |
if pkg_resources.resource_exists('cone.validation', resource_path):
|
|
188 |
data = pkg_resources.resource_string('cone.validation', resource_path)
|
|
189 |
return data
|
|
190 |
else:
|
|
191 |
msg = "Could not get schema file '%s': Package resource '%s' does not exist" \
|
|
192 |
% (file, resource_path)
|
|
193 |
raise ValueError(msg)
|
|
194 |
|
|
195 |
def get_schema_file_for_namespace(namespace):
|
|
196 |
"""
|
|
197 |
Return the correct schema file name for the given namespace.
|
|
198 |
|
|
199 |
@param namespace: The namespace for which to get the schema file.
|
|
200 |
@return: The name of the schema file (suitable for calling
|
|
201 |
get_schema_file_data() with), or None if no schema is associated
|
|
202 |
with the namespace.
|
|
203 |
"""
|
|
204 |
return SCHEMA_FILES_BY_NAMESPACE.get(namespace, None)
|
|
205 |
|
|
206 |
|
|
207 |
def _get_confml_schema_for_namespace(namespace):
|
|
208 |
"""
|
|
209 |
Return the correct XML schema and problem type ID for
|
|
210 |
the given ConfML namespace.
|
|
211 |
@return: Tuple (schema, problem_type).
|
|
212 |
"""
|
|
213 |
PROBLEM_TYPE = 'schema.confml'
|
|
214 |
|
|
215 |
# Return a cached schema if possible
|
|
216 |
if namespace in _schema_cache:
|
|
217 |
return _schema_cache[namespace], PROBLEM_TYPE
|
|
218 |
|
|
219 |
# Get the schema file and its raw byte data
|
|
220 |
schema_file = get_schema_file_for_namespace(namespace)
|
|
221 |
if schema_file is None:
|
|
222 |
raise exceptions.ConfmlParseError(
|
|
223 |
"Unsupported ConfML namespace '%s'" % namespace)
|
|
224 |
schema_data = get_schema_file_data(schema_file)
|
|
225 |
|
|
226 |
# Parse the schema
|
|
227 |
import lxml.etree
|
|
228 |
parser = lxml.etree.XMLParser()
|
|
229 |
class PackageDataResolver(lxml.etree.Resolver):
|
|
230 |
def resolve(self, url, id, context):
|
|
231 |
data = get_schema_file_data(url)
|
|
232 |
return self.resolve_string(data, context)
|
|
233 |
parser.resolvers.add(PackageDataResolver())
|
|
234 |
schema_doc = lxml.etree.fromstring(schema_data, parser=parser)
|
|
235 |
schema = lxml.etree.XMLSchema(schema_doc)
|
|
236 |
|
|
237 |
_schema_cache[namespace] = schema
|
|
238 |
return schema, PROBLEM_TYPE
|
|
239 |
|
|
240 |
def _load_confml_schema_files():
|
|
241 |
files = {}
|
|
242 |
for name in pkg_resources.resource_listdir('cone.validation', CONFML_SCHEMA_DIR):
|
|
243 |
path = CONFML_SCHEMA_DIR + '/' + name
|
|
244 |
if path.lower().endswith('.xsd'):
|
|
245 |
files[name] = pkg_resources.resource_string('cone.validation', path)
|
|
246 |
return files
|
|
247 |
|
|
248 |
# ============================================================================
|
|
249 |
#
|
|
250 |
#
|
|
251 |
# ============================================================================
|
|
252 |
|
|
253 |
# Reader class list stored here so that it can be used to check if the reader
|
|
254 |
# class list changes, and reload the schema files in that case
|
|
255 |
_implml_reader_class_list = None
|
|
256 |
|
|
257 |
_implml_schema_file_cache = None
|
|
258 |
_implml_schema_cache = {}
|
|
259 |
|
|
260 |
def _check_reader_class_list():
|
|
261 |
"""
|
|
262 |
Check if the reader class list has changed, and clear all caches if so.
|
|
263 |
"""
|
|
264 |
global _implml_reader_class_list
|
|
265 |
global _implml_schema_file_cache
|
|
266 |
global _implml_schema_cache
|
|
267 |
|
|
268 |
rc_list = cone.public.plugin.ImplFactory.get_reader_classes()
|
|
269 |
if _implml_reader_class_list is not rc_list:
|
|
270 |
_implml_reader_class_list = rc_list
|
|
271 |
_implml_schema_file_cache = None
|
|
272 |
_implml_schema_cache = {}
|
|
273 |
|
|
274 |
def dump_schema_files(dump_dir):
|
|
275 |
CONFML_SCHEMA_DIR = os.path.join(dump_dir, 'confml')
|
|
276 |
IMPLML_SCHEMA_DIR = os.path.join(dump_dir, 'implml')
|
|
277 |
if not os.path.exists(CONFML_SCHEMA_DIR):
|
|
278 |
os.makedirs(CONFML_SCHEMA_DIR)
|
|
279 |
if not os.path.exists(IMPLML_SCHEMA_DIR):
|
|
280 |
os.makedirs(IMPLML_SCHEMA_DIR)
|
|
281 |
|
|
282 |
def dump_files(files, dir):
|
|
283 |
for name, data in files.iteritems():
|
|
284 |
path = os.path.join(dir, name)
|
|
285 |
f = open(path, 'wb')
|
|
286 |
try: f.write(data)
|
|
287 |
finally: f.close()
|
|
288 |
|
|
289 |
dump_files(get_confml_schema_files(), CONFML_SCHEMA_DIR)
|
|
290 |
dump_files(get_implml_schema_files(), IMPLML_SCHEMA_DIR)
|
|
291 |
|
|
292 |
class _ImplmlReaderEntry(object):
|
|
293 |
def __init__(self, id, namespace, data, root_elem_name, schema_problem_sub_id):
|
|
294 |
self.id = id
|
|
295 |
self.filename = id + '.xsd'
|
|
296 |
self.namespace = namespace
|
|
297 |
self.data = data
|
|
298 |
self.root_elem_name = root_elem_name
|
|
299 |
self.schema_problem_sub_id = schema_problem_sub_id
|
|
300 |
|
|
301 |
def get_implml_schema_files():
|
|
302 |
"""
|
|
303 |
Return a dictionary of ImplML schema file data by file name.
|
|
304 |
"""
|
|
305 |
global _implml_schema_file_cache
|
|
306 |
|
|
307 |
_check_reader_class_list()
|
|
308 |
if _implml_schema_file_cache is None:
|
|
309 |
_implml_schema_file_cache = _load_implml_schema_files()
|
|
310 |
return _implml_schema_file_cache
|
|
311 |
|
|
312 |
def _load_implml_schema_files():
|
|
313 |
result = {}
|
|
314 |
result['implml.xsd'] = _generate_implml_schema_data()
|
|
315 |
|
|
316 |
result['XInclude.xsd'] = pkg_resources.resource_string(
|
|
317 |
'cone.validation', CONFML_SCHEMA_DIR + '/XInclude.xsd')
|
|
318 |
|
|
319 |
for entry in _get_implml_reader_entries():
|
|
320 |
if entry.data is not None:
|
|
321 |
result[entry.filename] = entry.data
|
|
322 |
else:
|
|
323 |
result[entry.filename] = _generate_default_schema_data(entry)
|
|
324 |
return result
|
|
325 |
|
|
326 |
def _get_implml_reader_entries():
|
|
327 |
entries = []
|
|
328 |
for rc in cone.public.plugin.ImplFactory.get_reader_classes():
|
|
329 |
# Skip ImplContainerReader
|
|
330 |
if rc is cone.public.plugin.ImplContainerReader:
|
|
331 |
continue
|
|
332 |
|
|
333 |
entry = _ImplmlReaderEntry(rc.NAMESPACE_ID,
|
|
334 |
rc.NAMESPACE,
|
|
335 |
rc.get_schema_data(),
|
|
336 |
rc.ROOT_ELEMENT_NAME,
|
|
337 |
rc.SCHEMA_PROBLEM_SUB_ID)
|
|
338 |
entries.append(entry)
|
|
339 |
return entries
|
|
340 |
|
|
341 |
def _generate_implml_schema_data():
|
|
342 |
template_data = pkg_resources.resource_string('cone.validation', 'implml_xsd/implml-template.xsd')
|
|
343 |
template = jinja2.Template(template_data)
|
|
344 |
data = template.render(data=_get_implml_reader_entries()).encode('utf-8')
|
|
345 |
return data
|
|
346 |
|
|
347 |
def _generate_default_schema_data(entry):
|
|
348 |
template_data = pkg_resources.resource_string('cone.validation', 'implml_xsd/default-impl-schema-template.xsd')
|
|
349 |
template = jinja2.Template(template_data)
|
|
350 |
data = template.render(entry=entry).encode('utf-8')
|
|
351 |
return data
|
|
352 |
|
|
353 |
def _get_implml_schema_for_namespace(namespace):
|
|
354 |
"""
|
|
355 |
Return the correct XML schema and problem type ID for
|
|
356 |
the given ImplML namespace.
|
|
357 |
@return: Tuple (schema, problem_type).
|
|
358 |
"""
|
|
359 |
global _implml_schema_cache
|
|
360 |
|
|
361 |
problem_type_sub_id = None
|
|
362 |
filename = None
|
|
363 |
if namespace == 'http://www.symbianfoundation.org/xml/implml/1':
|
|
364 |
filename = 'implml.xsd'
|
|
365 |
problem_type_sub_id = 'implml'
|
|
366 |
else:
|
|
367 |
for entry in _get_implml_reader_entries():
|
|
368 |
if entry.namespace == namespace:
|
|
369 |
filename = entry.filename
|
|
370 |
problem_type_sub_id = entry.schema_problem_sub_id
|
|
371 |
break
|
|
372 |
if filename is None:
|
|
373 |
raise exceptions.ImplmlParseError(
|
|
374 |
"Unsupported ImplML namespace: %s" % namespace)
|
|
375 |
|
|
376 |
# Check reader classes before trying to use the schema cache
|
|
377 |
_check_reader_class_list()
|
|
378 |
|
|
379 |
# Get the schema from cache if possible
|
|
380 |
if filename in _implml_schema_cache:
|
|
381 |
return _implml_schema_cache[filename]
|
|
382 |
|
|
383 |
file_data_dict = get_implml_schema_files()
|
|
384 |
if filename not in file_data_dict:
|
|
385 |
raise exceptions.ImplmlParseError(
|
|
386 |
"ImplML schema file '%s' does not exist!" % filename)
|
|
387 |
|
|
388 |
schema = _parse_schema(filename, file_data_dict)
|
|
389 |
problem_type = 'schema.implml'
|
|
390 |
if problem_type_sub_id:
|
|
391 |
problem_type += '.' + problem_type_sub_id
|
|
392 |
return schema, problem_type
|
|
393 |
|
|
394 |
# ============================================================================
|
|
395 |
#
|
|
396 |
#
|
|
397 |
# ============================================================================
|
|
398 |
|
|
399 |
class SchemaValidationTestMixin(object):
|
|
400 |
"""
|
|
401 |
Mix-in class for providing assertion methods for unittest.TestCase sub-classes
|
|
402 |
testing schema validation.
|
|
403 |
"""
|
|
404 |
|
|
405 |
def assert_schemavalidation_succeeds(self, type, dir, namespace=None):
|
|
406 |
"""
|
|
407 |
Assert that schema validation succeeds for all the files in the given directory.
|
|
408 |
@param type: Type of the schema validation to perform, can be 'confml' or 'implml'.
|
|
409 |
@param dir: The directory containing the files to validate
|
|
410 |
@param namespace: If not None, specifies the namespace that the root element
|
|
411 |
in all the must have. If any of the files has a different namespace, the
|
|
412 |
assertion fails.
|
|
413 |
"""
|
|
414 |
errors = []
|
|
415 |
for file in self._get_files(dir):
|
|
416 |
f = open(file, 'rb')
|
|
417 |
try: data = f.read()
|
|
418 |
finally: f.close()
|
|
419 |
|
|
420 |
if namespace is not None:
|
|
421 |
self._check_root_element_namespace(file, data, namespace)
|
|
422 |
|
|
423 |
validate_data = self._get_validator_function_for_type(type)
|
|
424 |
try:
|
|
425 |
validate_data(data)
|
|
426 |
except Exception, e:
|
|
427 |
errors.append(file)
|
|
428 |
errors.append("Raised: %r" % e)
|
|
429 |
|
|
430 |
if errors:
|
|
431 |
self.fail('\n'.join(errors))
|
|
432 |
|
|
433 |
def assert_schemavalidation_fails(self, type, dir, namespace=None, problem_type=None):
|
|
434 |
"""
|
|
435 |
Assert that schema validation fails for all the files in the given directory.
|
|
436 |
@param type: Type of the schema validation to perform, can be 'confml' or 'implml'.
|
|
437 |
@param dir: The directory containing the files to validate
|
|
438 |
@param namespace: If not None, specifies the namespace that the root element
|
|
439 |
in all the must have. If any of the files has a different namespace, the
|
|
440 |
assertion fails.
|
|
441 |
@param: problem_type: If not None, specifies the problem type that the
|
|
442 |
SchemaValidationError raised from validation must contain.
|
|
443 |
"""
|
|
444 |
errors = []
|
|
445 |
for file in self._get_files(dir):
|
|
446 |
f = open(file, 'rb')
|
|
447 |
try: data = f.read()
|
|
448 |
finally: f.close()
|
|
449 |
|
|
450 |
if namespace is not None:
|
|
451 |
self._check_root_element_namespace(file, data, namespace)
|
|
452 |
|
|
453 |
validate_data = self._get_validator_function_for_type(type)
|
|
454 |
try:
|
|
455 |
validate_data(data)
|
|
456 |
errors.append(file)
|
|
457 |
except exceptions.XmlSchemaValidationError, e:
|
|
458 |
if problem_type is not None:
|
|
459 |
if e.problem_type != problem_type:
|
|
460 |
errors.append(file)
|
|
461 |
errors.append("Problem type was '%s', expected '%s'" % (e.problem_type, problem_type))
|
|
462 |
|
|
463 |
if errors:
|
|
464 |
self.fail('The following files were reported as valid when they should not have been:\n%s' % '\n'.join(errors))
|
|
465 |
|
|
466 |
|
|
467 |
def _get_files(self, dir):
|
|
468 |
"""
|
|
469 |
Return a list of all files in the given directory.
|
|
470 |
@param dir: The directory.
|
|
471 |
@return: List of all files in the dir. Each entry has the
|
|
472 |
also the directory joined to it.
|
|
473 |
"""
|
|
474 |
files = []
|
|
475 |
for name in os.listdir(dir):
|
|
476 |
path = os.path.join(dir, name)
|
|
477 |
if os.path.isfile(path):
|
|
478 |
files.append(path)
|
|
479 |
return files
|
|
480 |
|
|
481 |
def _check_root_element_namespace(self, file_path, data, expected_namespace):
|
|
482 |
file_namespace, _ = utils.xml.get_xml_root(StringIO.StringIO(data))
|
|
483 |
if file_namespace != expected_namespace:
|
|
484 |
msg = "Error testing schema validation with file '%s': "\
|
|
485 |
"Root element namespace is not what was expected (expected '%s', got '%s')"\
|
|
486 |
% (file_path, expected_namespace, file_namespace)
|
|
487 |
self.fail(msg)
|
|
488 |
|
|
489 |
def _get_validator_function_for_type(self, type):
|
|
490 |
if type == 'implml':
|
|
491 |
return validate_implml_data
|
|
492 |
elif type == 'confml':
|
|
493 |
return validate_confml_data
|
|
494 |
else:
|
|
495 |
raise ValueError("Invalid schema validation type '%s', should be 'implml' or 'confml'" % type)
|