How to use Libxml2 to parse data from XML? LINUX + C Code

2k views Asked by At

Read xml file using libxml2

I want to parse all the data from XML file and store in the structure so I can use that data where I want in my aplication. I am using Libxml2 library to parse the data and this is the simplest code to take the data from xml file.

  • OS -> Ubuntu LINUX
  • Lang -> C / C++
1

There are 1 answers

1
Omkar On
  • XML File :- (Raw xml file for understanding)

tmp.xml

<?xml version="1.0" encoding="windows-1252"?>
<firewall-rules>
    <rule name="Telnet &amp; Secure Shell" active="0" action="1" protocol="3" interface="0" enable-process-icmp="0" enable-trusted-mac="0" enable-log="0">
        <ip-source-address type="4" address-1="0.0.0.0" address-2="0.0.0.0"/>
        <ip-source-port type="0" count="0"/>
        <ip-destination-address type="3" address-1="0.0.0.0" address-2="0.0.0.0"/>
        <ip-destination-port type="3" count="2">
            <port>22</port>
            <port>23</port>
        </ip-destination-port>
        <process-icmp>
            <destination-unreachableI>False</destination-unreachableI>
            <echo-replyI>False</echo-replyI>
            <echo-requestI>False</echo-requestI>
            <information-replyI>False</information-replyI>
            <information-requestI>False</information-requestI>
            <parameter-problemI>False</parameter-problemI>
            <redirectI>False</redirectI>
            <source-quenchI>False</source-quenchI>
            <ttl-expiredI>False</ttl-expiredI>
            <destination-unreachableO>False</destination-unreachableO>
            <echo-replyO>False</echo-replyO>
            <echo-requestO>False</echo-requestO>
            <information-replyO>False</information-replyO>
            <information-requestO>False</information-requestO>
            <parameter-problemO>False</parameter-problemO>
            <redirectO>False</redirectO>
            <source-quenchO>False</source-quenchO>
            <ttl-expiredO>False</ttl-expiredO>
        </process-icmp>
    </rule>
</firewall-rules> 
  1. You need to install libxml2 library into your system

sudo apt install libxml2-dev

Code -> main.c

#include <stdio.h>
#include <unistd.h>
#include <libxml/parser.h>
#include <unistd.h>
#include <string.h>
struct xmlRuleData{
    char rule_name[200], rule_active[5], rule_action[5], rule_protocol[5], rule_interface[5], rule_enableProcessIcmp[5], rule_enableTrustedMac[200], rule_enableLog[200];

    char ipSourceAddress_type[5], ipSourceAddress_address1[200], ipSourceAddress_address2[200];

    char ipSourcePort_type[5], ipSourcePort_count[5];

    char ipDestinationAddress_type[5], ipDestinationAddress_address1[200], ipDestinationAddress_address2[200];

    char ipDestinationPort_type[5], ipDestinationPort_count[5], ipDestinationPort_port[1024];

    char processIcmp_destinationUnreachableI[10], processIcmp_echoReplyI[10], processIcmp_echoRequestI[10], processIcmp_informationReplyI[10], processIcmp_informationRequestI[10], processIcmp_parameterProblemI[10], processIcmp_redirectI[10], processIcmp_sourceQuenchI[10], processIcmp_ttlExpiredI[10], processIcmp_destinationUnreachableO[10], processIcmp_echoReplyO[10], processIcmp_echoRequestO[10], processIcmp_informationReplyO[10], processIcmp_informationRequestO[10], processIcmp_parameterProblemO[10], processIcmp_redirectO[10], processIcmp_sourceQuenchO[10], processIcmp_ttlExpiredO[10];

}obj;

char *fileName = "/home/tmp.xml"; // path of xml file

char subAtt[19][1024]={"destination-unreachableI","echo-replyI","echo-requestI","information-replyI","information-requestI","parameter-problemI","redirectI","source-quenchI","ttl-expiredI","destination-unreachableO","echo-replyO","echo-requestO","information-replyO","information-requestO","parameter-problemO","redirectO","source-quenchO","ttl-expiredO","port"};

char ruleAttribute [8][1024]={"name","active","action","protocol","interface","enable-process-icmp","enable-trusted-mac","enable-log"};

char ipLeafAttributes[4][1024]={"type","address-1","address-2","count"};

void freeStructData(){
    bzero(obj.processIcmp_echoReplyI ,sizeof(obj.processIcmp_echoReplyI ));
    bzero(obj.processIcmp_destinationUnreachableI ,sizeof(obj.processIcmp_destinationUnreachableI ));
    bzero(obj.ipDestinationPort_port ,sizeof(obj.ipDestinationPort_port ));
    bzero(obj.ipDestinationPort_count ,sizeof(obj.ipDestinationPort_count ));
    bzero(obj.ipDestinationPort_type ,sizeof(obj.ipDestinationPort_type ));
    bzero(obj.ipDestinationAddress_address2 ,sizeof(obj.ipDestinationAddress_address2 ));
    bzero(obj.ipDestinationAddress_address1 ,sizeof(obj.ipDestinationAddress_address1 ));
    bzero(obj.ipDestinationAddress_type ,sizeof(obj.ipDestinationAddress_type ));
    bzero(obj.ipSourcePort_count ,sizeof(obj.ipSourcePort_count ));
    bzero(obj.ipSourcePort_type ,sizeof(obj.ipSourcePort_type ));
    bzero(obj.ipSourceAddress_address2 ,sizeof(obj.ipSourceAddress_address2 ));
    bzero(obj.ipSourceAddress_address1 ,sizeof(obj.ipSourceAddress_address1 ));
    bzero(obj.ipSourceAddress_type ,sizeof(obj.ipSourceAddress_type ));
    bzero(obj.rule_enableLog ,sizeof(obj.rule_enableLog ));
    bzero(obj.rule_enableTrustedMac ,sizeof(obj.rule_enableTrustedMac ));
    bzero(obj.rule_enableProcessIcmp ,sizeof(obj.rule_enableProcessIcmp ));
    bzero(obj.rule_interface ,sizeof(obj.rule_interface ));
    bzero(obj.rule_protocol ,sizeof(obj.rule_protocol ));
    bzero(obj.rule_action ,sizeof( obj.rule_action));
    bzero(obj.rule_active ,sizeof(obj.rule_active ));
    bzero(obj.rule_name ,sizeof(obj.rule_name ));
    bzero( obj.processIcmp_echoRequestI,sizeof( obj.processIcmp_echoRequestI));
    bzero( obj.processIcmp_informationReplyI,sizeof( obj.processIcmp_informationReplyI));
    bzero( obj.processIcmp_informationRequestI,sizeof( obj.processIcmp_informationRequestI));
    bzero( obj.processIcmp_parameterProblemI,sizeof( obj.processIcmp_parameterProblemI));
    bzero( obj.processIcmp_redirectI,sizeof( obj.processIcmp_redirectI));
    bzero( obj.processIcmp_sourceQuenchI,sizeof( obj.processIcmp_sourceQuenchI));
    bzero( obj.processIcmp_ttlExpiredI,sizeof( obj.processIcmp_ttlExpiredI));
    bzero( obj.processIcmp_destinationUnreachableO,sizeof( obj.processIcmp_destinationUnreachableO));
    bzero( obj.processIcmp_echoReplyO,sizeof( obj.processIcmp_echoReplyO));
    bzero( obj.processIcmp_echoRequestO,sizeof( obj.processIcmp_echoRequestO));
    bzero( obj.processIcmp_informationReplyO,sizeof( obj.processIcmp_informationReplyO));
    bzero( obj.processIcmp_informationRequestO,sizeof( obj.processIcmp_informationRequestO));
    bzero( obj.processIcmp_parameterProblemO,sizeof( obj.processIcmp_parameterProblemO));
    bzero( obj.processIcmp_redirectO,sizeof( obj.processIcmp_redirectO));
    bzero( obj.processIcmp_sourceQuenchO,sizeof( obj.processIcmp_sourceQuenchO));
    bzero(obj.processIcmp_ttlExpiredO ,sizeof(obj.processIcmp_ttlExpiredO ));
}


int is_leaf(xmlNode * node){
    xmlNode * child = node->children;
    while(child){
        if(child->type == XML_ELEMENT_NODE) return 0;
        child = child->next;
    }
    return 1;
}

void insertIntoStruct(xmlChar * str, xmlNode * node, char *attName){
    struct xmlRuleData *ptr = &obj;
    if ((!xmlStrcmp(node->name, (const xmlChar *)"rule"))) {
        if(strcmp(attName, "name") == 0){
            strcpy(((ptr)->rule_name), str);
        }else if(strcmp(attName, "active") == 0){
            strcpy(((ptr)->rule_active), str);
        }else if(strcmp(attName, "action") == 0){
            strcpy(((ptr)->rule_action), str);
        }else if(strcmp(attName, "protocol") == 0){
            strcpy(((ptr)->rule_protocol), str);
        }else if(strcmp(attName, "interface") == 0){
            strcpy(((ptr)->rule_interface), str);
        }else if(strcmp(attName, "enable-process-icmp") == 0){
            strcpy(((ptr)->rule_enableProcessIcmp), str);
        }else if(strcmp(attName, "enable-trusted-mac") == 0){
            strcpy(((ptr)->rule_enableTrustedMac), str);
        }else if(strcmp(attName, "enable-log") == 0){
            strcpy(((ptr)->rule_enableLog), str);
        }
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ip-source-address"))) {
        if(strcmp(attName, "type") == 0){
            strcpy(((ptr)->ipSourceAddress_type), str);
        }else if(strcmp(attName, "address-1") == 0){
            strcpy(((ptr)->ipSourceAddress_address1), str);
        }else if(strcmp(attName, "address-2") == 0){
            strcpy(((ptr)->ipSourceAddress_address2), str);
        }
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ip-source-port"))) {
        if(strcmp(attName, "type") == 0){
            strcpy(((ptr)->ipSourcePort_type), str);
        }else if(strcmp(attName, "count") == 0){
            strcpy(((ptr)->ipSourcePort_count), str);
        }
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ip-destination-address"))) {
        if(strcmp(attName, "type") == 0){
            strcpy(((ptr)->ipDestinationAddress_type), str);
        }else if(strcmp(attName, "address-1") == 0){
            strcpy(((ptr)->ipDestinationAddress_address1), str);
        }else if(strcmp(attName, "address-2") == 0){
            strcpy(((ptr)->ipDestinationAddress_address2), str);
        }

    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ip-destination-port"))) {
        if(strcmp(attName, "type") == 0){
            strcpy(((ptr)->ipDestinationPort_type), str);
        }else if(strcmp(attName, "count") == 0){
            strcpy(((ptr)->ipDestinationPort_count), str);
        }
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"port"))) {
        strcat(((ptr)->ipDestinationPort_port), str);
        strcat(((ptr)->ipDestinationPort_port), ", ");
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"destination-unreachableI"))) {
        strcpy(((ptr)->processIcmp_destinationUnreachableI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"echo-replyI"))) {
        strcpy(((ptr)->processIcmp_echoReplyI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"echo-requestI"))) {
        strcpy(((ptr)->processIcmp_echoRequestI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"information-replyI"))) {
        strcpy(((ptr)->processIcmp_informationReplyI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"information-requestI"))) {
        strcpy(((ptr)->processIcmp_informationRequestI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"parameter-problemI"))) {
        strcpy(((ptr)->processIcmp_parameterProblemI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"redirectI"))) {
        strcpy(((ptr)->processIcmp_redirectI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"source-quenchI"))) {
        strcpy(((ptr)->processIcmp_sourceQuenchI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ttl-expiredI"))) {
        strcpy(((ptr)->processIcmp_ttlExpiredI), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"destination-unreachableO"))) {
        strcpy(((ptr)->processIcmp_destinationUnreachableO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"echo-replyO"))) {
        strcpy(((ptr)->processIcmp_echoReplyO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"echo-requestO"))) {
        strcpy(((ptr)->processIcmp_echoRequestO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"information-replyO"))) {
        strcpy(((ptr)->processIcmp_informationReplyO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"information-requestO"))) {
        strcpy(((ptr)->processIcmp_informationRequestO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"parameter-problemO"))) {
        strcpy(((ptr)->processIcmp_parameterProblemO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"redirectO"))) {
        strcpy(((ptr)->processIcmp_redirectO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"source-quenchO"))) {
        strcpy(((ptr)->processIcmp_sourceQuenchO), str);
    }else if ((!xmlStrcmp(node->name, (const xmlChar *)"ttl-expiredO"))) {
        strcpy(((ptr)->processIcmp_ttlExpiredO), str);
    }

    ptr = NULL;
    free(ptr);
}

void printStruct(){
    printf("\nrule_name->> %s", obj.rule_name);
    printf("\nrule_active -->> %s",obj.rule_active);
    printf("\nrule_action -->> %s",obj.rule_action);
    printf("\nrule_action -->> %s",obj.rule_action);
    printf("\nrule_protocol -->> %s",obj.rule_protocol);
    printf("\nrule_interface -->> %s",obj.rule_interface);
    printf("\nrule_enableProcessIcmp -->> %s",obj.rule_enableProcessIcmp);
    printf("\nrule_enableTrustedMac -->> %s",obj.rule_enableTrustedMac);
    printf("\nrule_enableLog -->> %s",obj.rule_enableLog);
    printf("\nipSourceAddress_type -->> %s",obj.ipSourceAddress_type);
    printf("\nipSourceAddress_address1 -->> %s",obj.ipSourceAddress_address1);
    printf("\nipSourceAddress_address2 -->> %s",obj.ipSourceAddress_address2);
    printf("\nipSourcePort_type-->> %s",obj.ipSourcePort_type);
    printf("\nipSourcePort_count-->> %s",obj.ipSourcePort_count);
    printf("\nipDestinationAddress_type-->> %s",obj.ipDestinationAddress_type);
    printf("\nipDestinationAddress_address1-->> %s",obj. ipDestinationAddress_address1);
    printf("\nipDestinationAddress_address2-->> %s",obj.ipDestinationAddress_address2);
    printf("\nipDestinationPort_type -->> %s",obj.ipDestinationPort_type);
    printf("\nipDestinationPort_count -->> %s",obj.ipDestinationPort_count);
    printf("\nipDestinationPort_port -->> %s",obj.ipDestinationPort_port);
    printf("\nprocessIcmp_destinationUnreachableI -->> %s",obj.processIcmp_destinationUnreachableI);
    printf("\nprocessIcmp_echoReplyI -->> %s",obj.processIcmp_echoReplyI);
    printf("\nprocessIcmp_echoRequestI -->> %s",obj.processIcmp_echoRequestI);
    printf("\nprocessIcmp_informationReplyI -->> %s",obj.processIcmp_informationReplyI);
    printf("\nprocessIcmp_informationRequestI-->> %s",obj.processIcmp_informationRequestI);
    printf("\nprocessIcmp_parameterProblemI -->> %s",obj.processIcmp_parameterProblemI);
    printf("\nprocessIcmp_redirectI -->> %s",obj.processIcmp_redirectI);
    printf("\nprocessIcmp_sourceQuenchI -->> %s",obj.processIcmp_sourceQuenchI);
    printf("\nprocessIcmp_ttlExpiredI -->> %s",obj.processIcmp_ttlExpiredI);
    printf("\nprocessIcmp_destinationUnreachableO -->> %s",obj.processIcmp_destinationUnreachableO);
    printf("\nprocessIcmp_echoReplyO -->> %s",obj.processIcmp_echoReplyO);
    printf("\nprocessIcmp_echoRequestO -->> %s",obj.processIcmp_echoRequestO);
    printf("\nprocessIcmp_informationReplyO-->> %s",obj.processIcmp_informationReplyO);
    printf("\nprocessIcmp_informationRequestO -->> %s",obj.processIcmp_informationRequestO);
    printf("\nprocessIcmp_parameterProblemO -->> %s",obj.processIcmp_parameterProblemO);
    printf("\nprocessIcmp_redirectO -->> %s",obj.processIcmp_redirectO);
    printf("\nprocessIcmp_sourceQuenchO -->> %s",obj.processIcmp_sourceQuenchO);
    printf("\nprocessIcmp_ttlExpiredO -->> %s",obj.processIcmp_ttlExpiredO);

}
void print_xml(xmlNode * node){
    xmlChar *uri, *tmp, *key;
    xmlNode *cur;
    xmlDocPtr doc = xmlParseFile(fileName);
    while(node){
        if(node->type == XML_ELEMENT_NODE){
            if(is_leaf(node) == 1){
                struct xmlRuleData *objPtr = &obj;
                xmlNodeGetContent(node);
                tmp = xmlGetProp(node, "firewall-rules");
                for(int j=0; j<4; j++){
                    if((xmlGetProp(node, ipLeafAttributes[j])) != NULL){
                        uri = xmlGetProp(node, ipLeafAttributes[j]);
                        insertIntoStruct(uri, node, ipLeafAttributes[j]);
                        xmlFree(tmp);
                    }
                }

                for(int i=0; i<19; i++){
                    if ((!xmlStrcmp(node->name, (const xmlChar *)subAtt[i]))) {
                        key = xmlNodeListGetString(doc, node->xmlChildrenNode, 1);
                        insertIntoStruct(key, node, subAtt[i]);
                        xmlFree(key);
                    }
                }
            }else{
                tmp = xmlGetProp(node, "firewall-rules");
                if ((!xmlStrcmp(node->name, (const xmlChar *)"rule"))) {
                    printf("\n <Start of the rule> \n");
                    struct xmlRuleData *objPtr = &obj;
                    for(int j=0; j<8; j++){
                        uri = xmlGetProp(node, ruleAttribute[j]);
                        insertIntoStruct(uri, node, ruleAttribute[j]);
                        xmlFree(tmp);
                    }
                }
                if ((!xmlStrcmp(node->name, (const xmlChar *)"ip-destination-port"))) {
                    struct xmlRuleData *objPtr = &obj;
                    for(int j=0; j<4; j++){
                        if((xmlGetProp(node, ipLeafAttributes[j])) != NULL){
                            uri = xmlGetProp(node, ipLeafAttributes[j]);
                            insertIntoStruct(uri, node, ipLeafAttributes[j]);
                            xmlFree(tmp);
                        }
                    }
                }
            }
        }
        if ((!xmlStrcmp(node->name, (const xmlChar *)"ttl-expiredO"))) {
            printStruct();          
            freeStructData();
            printf("\n <End of the rule> \n");
        }
        print_xml(node->children);
        node = node->next;
    }
}

int main(){
    xmlDoc *doc = NULL;
    xmlNode *root_element = NULL;
    doc = xmlReadFile(fileName, NULL, 0);
    if (doc == NULL) {
        printf("Could not parse the XML file");
    }
    root_element = xmlDocGetRootElement(doc);
    print_xml(root_element);
    xmlFreeDoc(doc);
    xmlCleanupParser();
}

  • Command

gcc tmp.c -I/usr/include/libxml2 -lxml2 -o tmp

  • Output
<Start of the rule> 
rule_name->> Telnet & Secure Shell
rule_active -->> 0
rule_action -->> 1
rule_action -->> 1
rule_protocol -->> 3
rule_interface -->> 0
rule_enableProcessIcmp -->> 0
rule_enableTrustedMac -->> 0
rule_enableLog -->> 0
ipSourceAddress_type -->> 4
ipSourceAddress_address1 -->> 0.0.0.0
ipSourceAddress_address2 -->> 0.0.0.0
ipSourcePort_type-->> 0
ipSourcePort_count-->> 0
ipDestinationAddress_type-->> 3
ipDestinationAddress_address1-->> 0.0.0.0
ipDestinationAddress_address2-->> 0.0.0.0
ipDestinationPort_type -->> 3
ipDestinationPort_count -->> 2
ipDestinationPort_port -->> 22, 23, 
processIcmp_destinationUnreachableI -->> False
processIcmp_echoReplyI -->> False
processIcmp_echoRequestI -->> False
processIcmp_informationReplyI -->> False
processIcmp_informationRequestI-->> False
processIcmp_parameterProblemI -->> False
processIcmp_redirectI -->> False
processIcmp_sourceQuenchI -->> False
processIcmp_ttlExpiredI -->> False
processIcmp_destinationUnreachableO -->> False
processIcmp_echoReplyO -->> False
processIcmp_echoRequestO -->> False
processIcmp_informationReplyO-->> False
processIcmp_informationRequestO -->> False
processIcmp_parameterProblemO -->> False
processIcmp_redirectO -->> False
processIcmp_sourceQuenchO -->> False
processIcmp_ttlExpiredO -->> False
<End of the rule>

This is how you can simply read xml data from xml file using libxml2 parser and store into strucutre and for further use of this struct data you can pass structure object address to any function.