configurationengine/source/testautomation/testautomation/compare_xml.py
changeset 0 2e8eeb919028
equal deleted inserted replaced
-1:000000000000 0:2e8eeb919028
       
     1 #
       
     2 # Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 # All rights reserved.
       
     4 # This component and the accompanying materials are made available
       
     5 # under the terms of "Eclipse Public License v1.0"
       
     6 # which accompanies this distribution, and is available
       
     7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 #
       
     9 # Initial Contributors:
       
    10 # Nokia Corporation - initial contribution.
       
    11 #
       
    12 # Contributors:
       
    13 #
       
    14 # Description:
       
    15 #
       
    16 
       
    17 import sys
       
    18 import xml.etree.ElementTree as ElementTree
       
    19 import unittest
       
    20 import traceback
       
    21 
       
    22 def compare_xml_documents(data1, data2, **kwargs):
       
    23     """
       
    24     Compare two XML documents for equality.
       
    25     @param data1: The raw byte data of the first XML document as a string.
       
    26     @param data2: The raw byte data of the second XML document as a string.
       
    27     
       
    28     Keyword arguments:
       
    29     @param check_encoding: If True, the encoding of the documents is checked to be the same.
       
    30     @param ignore_namespace: If True, XML namespaces are ignored in the comparison.
       
    31     @param ignored_empty_tags: List of tags that will be ignored in the comparison if empty,
       
    32         i.e. if the tags is empty on one side and does not exist on the other.
       
    33     @param debug_stream: If not None, the stream where debug messages are printed.
       
    34     
       
    35     @return: True if the documents are equal, False if not.
       
    36     """
       
    37     
       
    38     check_encoding = kwargs.get('check_encoding', False)
       
    39     ignore_namespaces = kwargs.get('ignore_namespaces', False)
       
    40     debug_stream = kwargs.get('debug_stream', None)
       
    41     ignored_empty_tags = kwargs.get('ignored_empty_tags', [])
       
    42     
       
    43     if data1 == data2:
       
    44         if debug_stream: print >>debug_stream, "Raw byte data equal"
       
    45         return True
       
    46     
       
    47     if check_encoding:
       
    48         enc1 = _get_xml_encoding(data1)
       
    49         enc2 = _get_xml_encoding(data2)
       
    50         if enc1.lower() != enc2.lower():
       
    51             if debug_stream: print >>debug_stream, "XML encoding is not the same (%s vs. %s)" % (repr(enc1), repr(enc2))
       
    52             return False
       
    53     
       
    54     try:
       
    55         et1 = ElementTree.fromstring(data1)
       
    56     except Exception:
       
    57         if debug_stream: print >>debug_stream, "Failure when parsing data1: %s" % traceback.format_exc()
       
    58         return False
       
    59     
       
    60     try:
       
    61         et2 = ElementTree.fromstring(data2)
       
    62     except Exception:
       
    63         if debug_stream: print >>debug_stream, "Failure when parsing data2: %s" % traceback.format_exc()
       
    64         return False
       
    65     
       
    66     return _xml_elements_equal(et1, et2, ignore_namespaces, debug_stream, '', ignored_empty_tags)
       
    67 
       
    68 def _xml_elements_equal(elem1, elem2, ignore_namespaces, debug_stream, parent_path, ignored_empty_tags):
       
    69     ds = debug_stream
       
    70     
       
    71     elem1_tag = _get_tag(elem1, ignore_namespaces)
       
    72     elem2_tag = _get_tag(elem2, ignore_namespaces)
       
    73     
       
    74     full_path1 = parent_path + '/' + elem1_tag
       
    75     full_path2 = parent_path + '/' + elem2_tag
       
    76     if ds and parent_path == '':
       
    77         print >>ds, "Comparing '%s' vs. '%s'" % (full_path1, full_path2)
       
    78     
       
    79     if elem1_tag != elem2_tag:
       
    80         if ds and parent_path == '':
       
    81             print >>ds, "Tags don't match"
       
    82         return False
       
    83     
       
    84     def strip_string(data):
       
    85         if data == None:    return data
       
    86         else:               return data.strip(' \n\r\t')
       
    87     text1 = strip_string(elem1.text)
       
    88     text2 = strip_string(elem2.text)
       
    89     if text1 != text2:
       
    90         if ds and parent_path == '':
       
    91             print >>ds, "Element text %s does not match %s" % (repr(text1), repr(text2))
       
    92         return False
       
    93     
       
    94     def strip_namespace_attrs(attrib):
       
    95         if not ignore_namespaces:
       
    96             return attrib
       
    97         else:
       
    98             # Strip all attributes with a namespace if namespace are ignored
       
    99             result = {}
       
   100             for key, value in attrib.iteritems():
       
   101                 if '{' not in key:
       
   102                     result[key] = value
       
   103             return result
       
   104     attrs1 = strip_namespace_attrs(elem1.attrib)
       
   105     attrs2 = strip_namespace_attrs(elem2.attrib)
       
   106     if attrs1 != attrs2:
       
   107         if ds and parent_path == '':
       
   108             print >>ds, "Element attributes don't match (%s vs. %s)" % (repr(attrs1), repr(attrs2))
       
   109         return False
       
   110     
       
   111     # Remove ignored empty sub-elements before comparing the sub-elems
       
   112     subelems1 = elem1.getchildren()
       
   113     subelems2 = elem2.getchildren()
       
   114     _remove_ignored_empty_subelems(subelems1, elem2.getchildren(), full_path1, ignore_namespaces, ignored_empty_tags, ds)
       
   115     _remove_ignored_empty_subelems(subelems2, elem1.getchildren(), full_path1, ignore_namespaces, ignored_empty_tags, ds)
       
   116     
       
   117     # Compare sub-elements without caring about their document order
       
   118     # NOTE: This approach will not scale well for very large documents
       
   119     len1 = len(elem1.getchildren())
       
   120     len2 = len(elem2.getchildren())
       
   121     if len1 != len2:    return False
       
   122     if len1 == 0:       return True
       
   123     matched_subelems2 = []
       
   124     for subelem1 in subelems1:
       
   125         matched = False
       
   126         for subelem2 in subelems2:
       
   127             # Try to match the sub-element in elem2 only if it
       
   128             # has not been matched yet
       
   129             if id(subelem2) not in matched_subelems2:
       
   130                 if _xml_elements_equal(subelem1, subelem2, ignore_namespaces, ds, full_path1, ignored_empty_tags):
       
   131                     matched = True
       
   132                     matched_subelems2.append(id(subelem2))
       
   133                     break
       
   134         if not matched:
       
   135             if ds:
       
   136                 print >>ds, "No match found for element '%s' under '%s'." % (subelem1.tag, full_path1)
       
   137                 print >>ds, "Element data:"
       
   138                 print >>ds, ElementTree.tostring(subelem1)
       
   139             return False
       
   140     
       
   141     # Everything matched
       
   142     return True
       
   143 
       
   144 def _remove_ignored_empty_subelems(subelems1, subelems2, parent_path, ignore_namespaces, ignored_empty_tags, debug_stream):
       
   145     """Remove ignored empty sub-elements from list subelems1."""
       
   146     ds = debug_stream
       
   147     if ds: print >>ds, "parent_path: %s" % parent_path
       
   148     removed = []
       
   149     for i, subelem1 in enumerate(subelems1):
       
   150         if len(subelem1.getchildren()) > 0:
       
   151             continue
       
   152         
       
   153         # See if the tag should be ignored if it doesn't exist on
       
   154         # the other side
       
   155         is_ignored = False
       
   156         for ignored_tag in ignored_empty_tags:
       
   157             if ds: print >>ds, "ignored_tag = %s, tag = %s" % (ignored_tag, parent_path + "/" + _get_tag(subelem1, ignore_namespaces))
       
   158             if ignored_tag == parent_path + "/" + _get_tag(subelem1, ignore_namespaces):
       
   159                 is_ignored = True
       
   160                 break
       
   161         if not is_ignored:
       
   162             continue
       
   163         
       
   164         # See if the tag exists on the other side
       
   165         found = False
       
   166         for subelem2 in subelems2:
       
   167             if _get_tag(subelem1, ignore_namespaces) == _get_tag(subelem2, ignore_namespaces):
       
   168                 found = True
       
   169                 break
       
   170         if not found:
       
   171             removed.append(i)
       
   172     
       
   173     # Sort and reverse the removed list so that deleting starts from the
       
   174     # end and the indices are correct throughout the operation
       
   175     removed.sort()
       
   176     removed = removed[::-1]
       
   177     if len(removed) >= 2:
       
   178         if removed[0] < removed[-1]:
       
   179             raise RuntimeError("Internal error: list in wrong order: %s" % removed)
       
   180     
       
   181     for i in removed:
       
   182         del subelems1[i]
       
   183         
       
   184 def _get_tag(elem, ignore_namespaces):
       
   185     tag = elem.tag
       
   186     if ignore_namespaces:
       
   187         pos = tag.find('}')
       
   188         if pos >= 0:
       
   189             tag = tag[pos + 1:]
       
   190     return tag
       
   191 
       
   192 def _get_xml_encoding(xml_data):
       
   193     encoding = 'UTF-8'
       
   194     if xml_data.startswith('\xFE\xFF') or xml_data.startswith('\xFF\xFE'):
       
   195         encoding = 'UTF-16'
       
   196     
       
   197     # Decode only up to the first 200 bytes (should be enough for the header)
       
   198     decoded_data = xml_data[:200].decode(encoding, 'ignore')
       
   199     if decoded_data.startswith('<?xml'):
       
   200         header = decoded_data[:decoded_data.find('?>') + 2]
       
   201         # E.g header = '<?xml version="1.0" encoding = 'UTF-8'?>'
       
   202         
       
   203         def get_substr(string, sought_data):
       
   204             pos = string.find(sought_data)
       
   205             if pos >= 0:    return string[pos + len(sought_data):]
       
   206             else:           return None
       
   207         
       
   208         x = get_substr(header, "encoding")
       
   209         if not x: return ''
       
   210         # E.g x = ' = 'UTF-8'?>'
       
   211         x = x.replace(' ', '').replace('\t', '')
       
   212         # E.g x = '='UTF-8'?>'
       
   213         sgl_quoted = get_substr(x, "='")
       
   214         dbl_quoted = get_substr(x, '="')
       
   215         # E.g sgl_quoted = 'UTF-8'?>'
       
   216         if sgl_quoted:      return sgl_quoted[:sgl_quoted.find("'")]
       
   217         elif dbl_quoted:    return dbl_quoted[:dbl_quoted.find('"')]
       
   218         
       
   219     return ''