Python - 2.7.3 I want to compare two xml files (Size approx. - 100 MB) I updated code based on discussion on this thread. Comparing two xml files in python
I used element.iter() instead of GetChildren(). For testing purposes, I edited the xml file and made it really short i.e. 1MB in size.
Following is the code.
import xml.etree.ElementTree as ET
import logging
class XmlTree():
def __init__(self):
logging.basicConfig(filename='example.log',level=logging.DEBUG)
self.logger = logging.getLogger('xml_compare')
self.hdlr = logging.FileHandler('xml-comparison.log')
self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
self.logger.addHandler(self.hdlr)
@staticmethod
def convert_string_to_tree( xmlString):
return ET.fromstring(xmlString)
def xml_compare(self, x1, x2, excludes=[]):
"""
Compares two xml etrees
:param x1: the first tree
:param x2: the second tree
:param excludes: list of string of attributes to exclude from comparison
:return:
True if both files match
"""
if x1.tag != x2.tag:
self.logger.debug('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if not name in excludes:
if x2.attrib.get(name) != value:
self.logger.debug('Attributes do not match: %s=%r, %s=%r' % (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if not name in excludes:
if name not in x1.attrib:
self.logger.debug('x2 has an attribute x1 is missing: %s' % name)
return False
if not self.text_compare(x1.text, x2.text):
self.logger.debug('text: %r != %r' % (x1.text, x2.text))
return False
if not self.text_compare(x1.tail, x2.tail):
self.logger.debug('tail: %r != %r' % (x1.tail, x2.tail))
return False
cl1 = list(x1.iter())
cl2 = list(x2.iter())
if len(cl1) != len(cl2):
self.logger.debug('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not c1.tag in excludes:
if not self.xml_compare(c1, c2, excludes):
self.logger.debug('children %i do not match: %s' % (i, c1.tag))
return False
return True
def text_compare(self, t1, t2):
"""
Compare two text strings
:param t1: text one
:param t2: text two
:return:
True if a match
"""
if not t1 and not t2:
return True
if t1 == '*' or t2 == '*':
return True
return (t1 or '').strip() == (t2 or '').strip()
I call it like following
tree1 = ET.parse('A.xml')
tree2 = ET.parse('B.xml')
root1 = tree1.getroot()
root2 = tree2.getroot()
xmlStr1 = ET.tostring(root1,encoding='utf8',method='xml')
xmlStr2 = ET.tostring(root2,encoding='utf8',method='xml')
basetree1 = XmlTree.convert_string_to_tree(xmlStr1)
basetree2 = XmlTree.convert_string_to_tree(xmlStr2)
comparator = XmlTree()
if comparator.xml_compare(basetree1, basetree2, [""]):
print "XMLs match"
else:
print "XMLs don't match"
But it is giving me following error
File "<pyshell#3>", line 56, in xml_compare
if not self.xml_compare(c1, c2, excludes):
File "<pyshell#3>", line 46, in xml_compare
cl1 = list(x1.iter())
File "C:\Python27\lib\xml\etree\ElementTree.py", line 477, in iter
for e in e.iter(tag):
File "C:\Python27\lib\xml\etree\ElementTree.py", line 477, in iter
for e in e.iter(tag):
File "C:\Python27\lib\xml\etree\ElementTree.py", line 477, in iter
for e in e.iter(tag):
File "C:\Python27\lib\xml\etree\ElementTree.py", line 477, in iter
for e in e.iter(tag):
File "C:\Python27\lib\xml\etree\ElementTree.py", line 477, in iter
for e in e.iter(tag):
File "C:\Python27\lib\xml\etree\ElementTree.py", line 472, in iter
if tag == "*":
RuntimeError: maximum recursion depth exceeded in cmp
what am I doing wrong? Is Zip function very heavy to loop through? Is there an alternative. While doing XML comparison, I want to compare a)Text B)tag