configurationengine/source/cone/validation/schemavalidation.py
changeset 3 e7e0ae78773e
equal deleted inserted replaced
2:87cfa131b535 3:e7e0ae78773e
       
     1 #
       
     2 # Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 # All rights reserved.
       
     4 # This component and the accompanying materials are made available
       
     5 # under the terms of "Eclipse Public License v1.0"
       
     6 # which accompanies this distribution, and is available
       
     7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 #
       
     9 # Initial Contributors:
       
    10 # Nokia Corporation - initial contribution.
       
    11 #
       
    12 # Contributors:
       
    13 #
       
    14 # Description: 
       
    15 #
       
    16 
       
    17 import sys, os
       
    18 import logging
       
    19 import StringIO
       
    20 import pkg_resources
       
    21 import jinja2
       
    22 from cone.public import api, utils, exceptions
       
    23 import cone.public.plugin
       
    24 
       
    25 log = logging.getLogger('cone.schemavalidation')
       
    26 
       
    27 ROOT_PATH = os.path.abspath(os.path.dirname(__file__))
       
    28 
       
    29 CONFML_SCHEMA_DIR = 'confml_xsd'
       
    30 
       
    31 SCHEMA_FILES_BY_NAMESPACE = {'http://www.s60.com/xml/confml/1': 'confml.xsd',
       
    32                              'http://www.s60.com/xml/confml/2': 'confml2.xsd'}
       
    33 
       
    34 _schema_cache = {}
       
    35 
       
    36 # ============================================================================
       
    37 
       
    38 def validate_confml_file(config, ref):
       
    39     """
       
    40     Schema-validate the given ConfML file in a configuration.
       
    41     @param config: The configuration.
       
    42     @param ref: The resource reference of the file to validate.
       
    43     @return: A list of api.Problem objects.
       
    44     """
       
    45     return _validate_file(config, ref, validate_confml_data)
       
    46 
       
    47 def validate_implml_file(config, ref):
       
    48     """
       
    49     Schema-validate the given ImplML file in a configuration.
       
    50     @param config: The configuration.
       
    51     @param ref: The resource reference of the file to validate.
       
    52     @return: A list of api.Problem objects.
       
    53     """
       
    54     return _validate_file(config, ref, validate_implml_data)
       
    55 
       
    56 def validate_confml_data(data):
       
    57     """
       
    58     Schema-validate the given ConfML data.
       
    59     
       
    60     @raise ParseError: Something is wrong with the data (invalid XML,
       
    61         unsupported ConfML namespace or not conforming to the schema)
       
    62     """
       
    63     _validate_data(data, _get_confml_schema_for_namespace, 'xml.confml')
       
    64 
       
    65 def validate_implml_data(data):
       
    66     """
       
    67     Schema-validate the given ImplML data.
       
    68     
       
    69     @raise ParseError: Something is wrong with the data (invalid XML,
       
    70         unsupported ImplML namespace or not conforming to the schema)
       
    71     """
       
    72     _validate_data(data, _get_implml_schema_for_namespace, 'xml.implml')
       
    73 
       
    74 # ============================================================================
       
    75 
       
    76 def _validate_file(config, ref, data_validator_func):
       
    77     res = config.get_resource(ref)
       
    78     try:        data = res.read()
       
    79     finally:    res.close()
       
    80     
       
    81     problem = None
       
    82     try:
       
    83         data_validator_func(data)
       
    84     except exceptions.ParseError, e:
       
    85         problem = api.Problem.from_exception(e)
       
    86 
       
    87     if problem:
       
    88         problem.file = ref
       
    89         return [problem]
       
    90     else:
       
    91         return []
       
    92 
       
    93 def _parse_schema(filename, file_data_dict):
       
    94     """
       
    95     Parse a schema using a filename-data dictionary as the source.
       
    96     @param filename: Name of the schema file to parse.
       
    97     @param file_data_dict: Dictionary mapping file names to file data.
       
    98     @return: The parsed schema object.
       
    99     """
       
   100     if filename not in file_data_dict:
       
   101         raise RuntimeError("Could not parse XML schema file '%s', no such file" % filename)
       
   102     
       
   103     schema_data = file_data_dict[filename]
       
   104     
       
   105     import lxml.etree
       
   106     
       
   107     parser = lxml.etree.XMLParser()
       
   108     class Resolver(lxml.etree.Resolver):
       
   109         def resolve(self, url, id, context):
       
   110             if url not in file_data_dict:
       
   111                 log.error("Could not resolve schema file '%s', no such file" % url)
       
   112                 raise RuntimeError("No file named '%s'" % url)
       
   113             data = file_data_dict[url]
       
   114             return self.resolve_string(data, context)
       
   115     parser.resolvers.add(Resolver())
       
   116     
       
   117     try:
       
   118         schema_doc = lxml.etree.fromstring(schema_data, parser=parser)
       
   119         schema = lxml.etree.XMLSchema(schema_doc)
       
   120     except lxml.etree.LxmlError, e:
       
   121         raise RuntimeError(
       
   122             "Error parsing schema file '%s': %s: %s" \
       
   123             % (filename, e.__class__.__name__, str(e)))
       
   124     return schema
       
   125 
       
   126 def _validate_data(data, schema_resolver_func, xml_parse_problem_type):
       
   127     """
       
   128     Validate the given XML data.
       
   129     @param data: The raw binary data to validate.
       
   130     @param schema_resolver_func: The function used to resolve the
       
   131         schema used for validation. The function is given the namespace
       
   132         of the root element and is supposed to return the schema object
       
   133         and problem type to use, or raise a ParseError.
       
   134     @param xml_parse_problem_type: Problem type to use if XML parsing
       
   135         fails of the data fails.
       
   136     
       
   137     @raise ParseError: Something is wrong with the data (invalid XML
       
   138         or not conforming to the schema)
       
   139     """
       
   140     # Find out the XML namespace in the root element
       
   141     try:
       
   142         namespace, _ = utils.xml.get_xml_root(StringIO.StringIO(data))
       
   143     except exceptions.XmlParseError, e:
       
   144         e.problem_type = xml_parse_problem_type
       
   145         raise e
       
   146     
       
   147     schema, problem_type = schema_resolver_func(namespace)
       
   148     
       
   149     # Parse the XML document
       
   150     import lxml.etree
       
   151     try:
       
   152         doc = lxml.etree.fromstring(data)
       
   153     except lxml.etree.XMLSyntaxError, e:
       
   154         raise exceptions.XmlParseError(
       
   155             "XML parse error on line %d: %s" % (e.position[0], e),
       
   156             problem_lineno  = e.position[0],
       
   157             problem_msg     = str(e),
       
   158             problem_type    = xml_parse_problem_type)
       
   159     
       
   160     # Validate the document against the schema
       
   161     if not schema.validate(doc):
       
   162         error = schema.error_log.last_error
       
   163         raise exceptions.XmlSchemaValidationError(
       
   164             "Line %d: %s" % (error.line, error.message),
       
   165             problem_lineno  = error.line,
       
   166             problem_msg     = error.message,
       
   167             problem_type    = problem_type)
       
   168     
       
   169 
       
   170 class UnsupportedNamespaceError(exceptions.ParseError):
       
   171     pass
       
   172 
       
   173 _confml_schema_file_cache = None
       
   174 def get_confml_schema_files():
       
   175     global _confml_schema_file_cache
       
   176     if _confml_schema_file_cache is None:
       
   177         _confml_schema_file_cache = _load_confml_schema_files()
       
   178     return _confml_schema_file_cache
       
   179 
       
   180 def get_schema_file_data(file):
       
   181     """
       
   182     Return the data of the given XML schema file.
       
   183     
       
   184     @raise ValueError: No such schema file exists.
       
   185     """
       
   186     resource_path = CONFML_SCHEMA_DIR + '/' + file
       
   187     if pkg_resources.resource_exists('cone.validation', resource_path):
       
   188         data = pkg_resources.resource_string('cone.validation', resource_path)
       
   189         return data
       
   190     else:
       
   191         msg = "Could not get schema file '%s': Package resource '%s' does not exist" \
       
   192             % (file, resource_path)
       
   193         raise ValueError(msg)
       
   194 
       
   195 def get_schema_file_for_namespace(namespace):
       
   196     """
       
   197     Return the correct schema file name for the given namespace.
       
   198     
       
   199     @param namespace: The namespace for which to get the schema file.
       
   200     @return: The name of the schema file (suitable for calling
       
   201         get_schema_file_data() with), or None if no schema is associated
       
   202         with the namespace.
       
   203     """
       
   204     return SCHEMA_FILES_BY_NAMESPACE.get(namespace, None)
       
   205 
       
   206 
       
   207 def _get_confml_schema_for_namespace(namespace):
       
   208     """
       
   209     Return the correct XML schema and problem type ID for
       
   210     the given ConfML namespace.
       
   211     @return: Tuple (schema, problem_type).
       
   212     """
       
   213     PROBLEM_TYPE = 'schema.confml'
       
   214     
       
   215     # Return a cached schema if possible
       
   216     if namespace in _schema_cache:
       
   217         return _schema_cache[namespace], PROBLEM_TYPE
       
   218     
       
   219     # Get the schema file and its raw byte data
       
   220     schema_file = get_schema_file_for_namespace(namespace)
       
   221     if schema_file is None:
       
   222         raise exceptions.ConfmlParseError(
       
   223             "Unsupported ConfML namespace '%s'" % namespace)
       
   224     schema_data = get_schema_file_data(schema_file)
       
   225     
       
   226     # Parse the schema
       
   227     import lxml.etree
       
   228     parser = lxml.etree.XMLParser()
       
   229     class PackageDataResolver(lxml.etree.Resolver):
       
   230         def resolve(self, url, id, context):
       
   231             data = get_schema_file_data(url)
       
   232             return self.resolve_string(data, context)
       
   233     parser.resolvers.add(PackageDataResolver())
       
   234     schema_doc = lxml.etree.fromstring(schema_data, parser=parser)
       
   235     schema = lxml.etree.XMLSchema(schema_doc)
       
   236     
       
   237     _schema_cache[namespace] = schema
       
   238     return schema, PROBLEM_TYPE
       
   239 
       
   240 def _load_confml_schema_files():
       
   241     files = {}
       
   242     for name in pkg_resources.resource_listdir('cone.validation', CONFML_SCHEMA_DIR):
       
   243         path = CONFML_SCHEMA_DIR + '/' + name
       
   244         if path.lower().endswith('.xsd'):
       
   245             files[name] = pkg_resources.resource_string('cone.validation', path)
       
   246     return files
       
   247 
       
   248 # ============================================================================
       
   249 #
       
   250 #
       
   251 # ============================================================================
       
   252 
       
   253 # Reader class list stored here so that it can be used to check if the reader
       
   254 # class list changes, and reload the schema files in that case
       
   255 _implml_reader_class_list = None
       
   256 
       
   257 _implml_schema_file_cache = None
       
   258 _implml_schema_cache = {}
       
   259 
       
   260 def _check_reader_class_list():
       
   261     """
       
   262     Check if the reader class list has changed, and clear all caches if so.
       
   263     """
       
   264     global _implml_reader_class_list
       
   265     global _implml_schema_file_cache
       
   266     global _implml_schema_cache
       
   267     
       
   268     rc_list = cone.public.plugin.ImplFactory.get_reader_classes()
       
   269     if _implml_reader_class_list is not rc_list:
       
   270         _implml_reader_class_list = rc_list
       
   271         _implml_schema_file_cache = None
       
   272         _implml_schema_cache = {}
       
   273 
       
   274 def dump_schema_files(dump_dir):
       
   275     CONFML_SCHEMA_DIR = os.path.join(dump_dir, 'confml')
       
   276     IMPLML_SCHEMA_DIR = os.path.join(dump_dir, 'implml')
       
   277     if not os.path.exists(CONFML_SCHEMA_DIR):
       
   278         os.makedirs(CONFML_SCHEMA_DIR)
       
   279     if not os.path.exists(IMPLML_SCHEMA_DIR):
       
   280         os.makedirs(IMPLML_SCHEMA_DIR)
       
   281     
       
   282     def dump_files(files, dir):
       
   283         for name, data in files.iteritems():
       
   284             path = os.path.join(dir, name)
       
   285             f = open(path, 'wb')
       
   286             try:        f.write(data)
       
   287             finally:    f.close()
       
   288     
       
   289     dump_files(get_confml_schema_files(), CONFML_SCHEMA_DIR)
       
   290     dump_files(get_implml_schema_files(), IMPLML_SCHEMA_DIR)
       
   291 
       
   292 class _ImplmlReaderEntry(object):
       
   293     def __init__(self, id, namespace, data, root_elem_name, schema_problem_sub_id):
       
   294         self.id = id
       
   295         self.filename = id + '.xsd'
       
   296         self.namespace = namespace
       
   297         self.data = data
       
   298         self.root_elem_name = root_elem_name
       
   299         self.schema_problem_sub_id = schema_problem_sub_id
       
   300 
       
   301 def get_implml_schema_files():
       
   302     """
       
   303     Return a dictionary of ImplML schema file data by file name.
       
   304     """
       
   305     global _implml_schema_file_cache
       
   306     
       
   307     _check_reader_class_list()
       
   308     if _implml_schema_file_cache is None:
       
   309         _implml_schema_file_cache = _load_implml_schema_files()
       
   310     return _implml_schema_file_cache
       
   311 
       
   312 def _load_implml_schema_files():
       
   313     result = {}
       
   314     result['implml.xsd'] = _generate_implml_schema_data()
       
   315     
       
   316     result['XInclude.xsd'] = pkg_resources.resource_string(
       
   317         'cone.validation', CONFML_SCHEMA_DIR + '/XInclude.xsd')
       
   318     
       
   319     for entry in _get_implml_reader_entries():
       
   320         if entry.data is not None:
       
   321             result[entry.filename] = entry.data
       
   322         else:
       
   323             result[entry.filename] = _generate_default_schema_data(entry)
       
   324     return result
       
   325 
       
   326 def _get_implml_reader_entries():
       
   327     entries = []
       
   328     for rc in cone.public.plugin.ImplFactory.get_reader_classes():
       
   329         # Skip ImplContainerReader
       
   330         if rc is cone.public.plugin.ImplContainerReader:
       
   331             continue
       
   332         
       
   333         entry = _ImplmlReaderEntry(rc.NAMESPACE_ID,
       
   334                                    rc.NAMESPACE,
       
   335                                    rc.get_schema_data(),
       
   336                                    rc.ROOT_ELEMENT_NAME,
       
   337                                    rc.SCHEMA_PROBLEM_SUB_ID)
       
   338         entries.append(entry)
       
   339     return entries
       
   340 
       
   341 def _generate_implml_schema_data():
       
   342     template_data = pkg_resources.resource_string('cone.validation', 'implml_xsd/implml-template.xsd')
       
   343     template = jinja2.Template(template_data)
       
   344     data = template.render(data=_get_implml_reader_entries()).encode('utf-8')
       
   345     return data
       
   346 
       
   347 def _generate_default_schema_data(entry):
       
   348     template_data = pkg_resources.resource_string('cone.validation', 'implml_xsd/default-impl-schema-template.xsd')
       
   349     template = jinja2.Template(template_data)
       
   350     data = template.render(entry=entry).encode('utf-8')
       
   351     return data
       
   352 
       
   353 def _get_implml_schema_for_namespace(namespace):
       
   354     """
       
   355     Return the correct XML schema and problem type ID for
       
   356     the given ImplML namespace.
       
   357     @return: Tuple (schema, problem_type).
       
   358     """
       
   359     global _implml_schema_cache
       
   360     
       
   361     problem_type_sub_id = None
       
   362     filename = None
       
   363     if namespace == 'http://www.symbianfoundation.org/xml/implml/1':
       
   364         filename = 'implml.xsd'
       
   365         problem_type_sub_id = 'implml'
       
   366     else:
       
   367         for entry in _get_implml_reader_entries():
       
   368             if entry.namespace == namespace:
       
   369                 filename = entry.filename
       
   370                 problem_type_sub_id = entry.schema_problem_sub_id
       
   371                 break
       
   372     if filename is None:
       
   373         raise exceptions.ImplmlParseError(
       
   374             "Unsupported ImplML namespace: %s" % namespace)
       
   375     
       
   376     # Check reader classes before trying to use the schema cache
       
   377     _check_reader_class_list()
       
   378     
       
   379     # Get the schema from cache if possible
       
   380     if filename in _implml_schema_cache:
       
   381         return _implml_schema_cache[filename]
       
   382     
       
   383     file_data_dict = get_implml_schema_files()
       
   384     if filename not in file_data_dict:
       
   385         raise exceptions.ImplmlParseError(
       
   386             "ImplML schema file '%s' does not exist!" % filename)
       
   387     
       
   388     schema = _parse_schema(filename, file_data_dict)
       
   389     problem_type = 'schema.implml'
       
   390     if problem_type_sub_id:
       
   391         problem_type += '.' + problem_type_sub_id
       
   392     return schema, problem_type
       
   393 
       
   394 # ============================================================================
       
   395 #
       
   396 #
       
   397 # ============================================================================
       
   398 
       
   399 class SchemaValidationTestMixin(object):
       
   400     """
       
   401     Mix-in class for providing assertion methods for unittest.TestCase sub-classes
       
   402     testing schema validation.
       
   403     """
       
   404     
       
   405     def assert_schemavalidation_succeeds(self, type, dir, namespace=None):
       
   406         """
       
   407         Assert that schema validation succeeds for all the files in the given directory.
       
   408         @param type: Type of the schema validation to perform, can be 'confml' or 'implml'.
       
   409         @param dir: The directory containing the files to validate
       
   410         @param namespace: If not None, specifies the namespace that the root element
       
   411             in all the must have. If any of the files has a different namespace, the
       
   412             assertion fails.
       
   413         """
       
   414         errors = []
       
   415         for file in self._get_files(dir):
       
   416             f = open(file, 'rb')
       
   417             try:        data = f.read()
       
   418             finally:    f.close()
       
   419             
       
   420             if namespace is not None:
       
   421                 self._check_root_element_namespace(file, data, namespace)
       
   422             
       
   423             validate_data = self._get_validator_function_for_type(type)
       
   424             try:
       
   425                 validate_data(data)
       
   426             except Exception, e:
       
   427                 errors.append(file)
       
   428                 errors.append("Raised: %r" % e)
       
   429         
       
   430         if errors:
       
   431             self.fail('\n'.join(errors))
       
   432     
       
   433     def assert_schemavalidation_fails(self, type, dir, namespace=None, problem_type=None):
       
   434         """
       
   435         Assert that schema validation fails for all the files in the given directory.
       
   436         @param type: Type of the schema validation to perform, can be 'confml' or 'implml'.
       
   437         @param dir: The directory containing the files to validate
       
   438         @param namespace: If not None, specifies the namespace that the root element
       
   439             in all the must have. If any of the files has a different namespace, the
       
   440             assertion fails.
       
   441         @param: problem_type: If not None, specifies the problem type that the
       
   442             SchemaValidationError raised from validation must contain.
       
   443         """
       
   444         errors = []
       
   445         for file in self._get_files(dir):
       
   446             f = open(file, 'rb')
       
   447             try:        data = f.read()
       
   448             finally:    f.close()
       
   449             
       
   450             if namespace is not None:
       
   451                 self._check_root_element_namespace(file, data, namespace)
       
   452             
       
   453             validate_data = self._get_validator_function_for_type(type)
       
   454             try:
       
   455                 validate_data(data)
       
   456                 errors.append(file)
       
   457             except exceptions.XmlSchemaValidationError, e:
       
   458                 if problem_type is not None:
       
   459                     if e.problem_type != problem_type:
       
   460                         errors.append(file)
       
   461                         errors.append("Problem type was '%s', expected '%s'" % (e.problem_type, problem_type))
       
   462         
       
   463         if errors:
       
   464             self.fail('The following files were reported as valid when they should not have been:\n%s' % '\n'.join(errors))
       
   465     
       
   466     
       
   467     def _get_files(self, dir):
       
   468         """
       
   469         Return a list of all files in the given directory.
       
   470         @param dir: The directory.
       
   471         @return: List of all files in the dir. Each entry has the
       
   472             also the directory joined to it.
       
   473         """
       
   474         files = []
       
   475         for name in os.listdir(dir):
       
   476             path = os.path.join(dir, name)
       
   477             if os.path.isfile(path):
       
   478                 files.append(path)
       
   479         return files
       
   480     
       
   481     def _check_root_element_namespace(self, file_path, data, expected_namespace):
       
   482         file_namespace, _ = utils.xml.get_xml_root(StringIO.StringIO(data))
       
   483         if file_namespace != expected_namespace:
       
   484             msg = "Error testing schema validation with file '%s': "\
       
   485                   "Root element namespace is not what was expected (expected '%s', got '%s')"\
       
   486                   % (file_path, expected_namespace, file_namespace)
       
   487             self.fail(msg)
       
   488     
       
   489     def _get_validator_function_for_type(self, type):
       
   490         if type == 'implml':
       
   491             return validate_implml_data
       
   492         elif type == 'confml':
       
   493             return validate_confml_data
       
   494         else:
       
   495             raise ValueError("Invalid schema validation type '%s', should be 'implml' or 'confml'" % type)