equivalent of lxml.objectify cleanup_namespaces in defusedxml

493 views Asked by At

I am getting below error in bandit. Using lxml.etree.parse to parse untrusted XML data is known to be vulnerable to XML attacks. Replace lxml.etree.parse with its defusedxml equivalent function.

I want the below code's equivlent with defusedxml.

from lxml import etree, objectify
def fn_read_xml_root(xml_file):
    """
    function open xml and remove annotation and return the root node
    xml_file : xml file to be parsed
    """
    with open(xml_file, "r", encoding="utf-8") as x_file:
        xml_data = x_file.read()

    parser = etree.XMLParser(remove_blank_text=True)
    xtree = etree.parse(xml_file, parser)
    xroot = xtree.getroot()
    for elem in xroot.getiterator():
        if not hasattr(elem.tag, "find"):
            continue  # (1)
        idx = elem.tag.find("}")
        if idx >= 0:
            elem.tag = elem.tag[idx + 1:]
    objectify.deannotate(xroot, cleanup_namespaces=True)
    # return xml data and root node of the file
    return xml_data, xroot
1

There are 1 answers

0
Ankit Gupta On
def remove_namespace(elem):
    """
    function to remove namespace from  doc element
    node_key : xml doc element
    """
    elem = elem[elem.find("}") + 1 :] if elem.startswith("{") else elem
    return elem


def remove_all_namespaces(doc):
    """
    function to remove namespaces from xml
    doc : xml doc element
    """
    for elem in doc.iter():
        elem.tag = remove_namespace(elem.tag)
        elem.attrib = {remove_namespace(key): value for key, value in elem.attrib.items()}
    return doc


def fn_read_xml_root(xml_file):
    """
    function open xml and remove annotation and return the root node
    xml_file : xml file to be parsed
    """
    with open(xml_file, "r", encoding="utf-8") as x_file:
        xml_data = x_file.read()
    xroot = ET.parse(xml_file).getroot()
    try:
        xroot = remove_all_namespaces(xroot)
    except Exception as exp:
        logging.info(f"XML namespace remove error {str(exp)}")
    # return xml data and root node of the file
    return xml_data, xroot