Parse child elements with the same name in XML

489 views Asked by At

I'm getting the following XML document from a RSS feed.

<?xml version="1.0" encoding="ISO-8859-1"?>
<?xml-stylesheet type="text/css" href="../css/rssfeed.css"?>
<rss version="2.0">
    <channel>
        <title>Agriculture Dairy Environment</title>
        <link>http://topjobs.lk/applicant/vacancybyfunctionalarea.jsp?FA=AGD</link>
        <description>Open Vacancies Under Agriculture Dairy Environment</description>
        <copyright>Copyright 2006-2014 topjobs</copyright>
        <language>en</language>
        <image>
            <title>Agriculture Dairy Environment</title>
            <url>http://topjobs.lk/images/home/skllisdb.gif</url>
            <link>http://topjobs.lk</link>
            <width>208</width>
            <height>49</height>
        </image>
        <lastBuildDate>Wed, 28 Dec 2016 19:15:15 IST</lastBuildDate>
        <item>
            <title>Study, Work and Live in Australia - 0000398608 - Morgan HR</title>
            <description>Join us to learn more about enrolling for the Feb. 2017 intake at Deakin University, Australia. See Flier for details</description>
            <link>http://topjobs.lk/applicant/vacancybyfunctionalarea.jsp?FA=AGD</link>
            <pubDate>2016-12-15</pubDate>
            <closingDate>2017-01-15</closingDate>
            <ac>0000000368</ac>
            <js>0000398608</js>
            <ec>0000000483</ec>
        </item>
        <item>
            <title>Vacancies in Call Center   Colombo   09 - 0000398679 - BLUESTEPS</title>
            <description>Call Center Associate (Male/Female)  &amp; Call Center Executive (Male / Female)</description>
            <link>http://topjobs.lk/applicant/vacancybyfunctionalarea.jsp?FA=AGD</link>
            <pubDate>2016-12-15</pubDate>
            <closingDate>2016-12-29</closingDate>
            <ac>0000000417</ac>
            <js>0000398679</js>
            <ec>0000000550</ec>
        </item>
    </channel>
</rss>

I need to parse it and extract information inside each <item></item> blocks.

There is a small issue. There are a couple of tags that have the same names outside the <item> blocks. See under the <channel> tag. Both have <title>, <link> and <description> tags.

When I try to parse it, it detects the <title> tag within the < channel > block and ignores all others! How do I get only the info inside <item></item> blocks?

I tried specifying tag names like item.title but that doesn't seem to work either. Here's what I have so far.

import Foundation

class RSSParser: NSObject, XMLParserDelegate {

    fileprivate var titleEntry: String = ""
    fileprivate var descriptionEntry: String = ""
    fileprivate var linkEntry: String = ""
    fileprivate var pubDateEntry: String = ""
    fileprivate var closingDateEntry: String = ""
    fileprivate var acEntry: String = ""
    fileprivate var jsEntry: String = ""
    fileprivate var ecEntry: String = ""
    fileprivate var currentElement: String = ""
    fileprivate var entryDictionary: [String: String] = [:]
    fileprivate var entryArray: [[String: String]] = []


    func parseURL(URL: URL) {
        let parser = XMLParser(contentsOf: URL)
        parser?.delegate = self
        parser?.parse()
    }

    func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) {
        if elementName == "item.title" {
            currentElement = "title"
        }

        if elementName == "item.description" {
            currentElement = "description"
        }

        if elementName == "item.link" {
            currentElement = "link"
        }

        if elementName == "item.pubDate" {
            currentElement = "pubDate"
        }

        if elementName == "item.closingDate" {
            currentElement = "closingDate"
        }

        if elementName == "item.ac" {
            currentElement = "ac"
        }

        if elementName == "item.js" {
            currentElement = "js"
        }

        if elementName == "item.ec" {
            currentElement = "ec"
        }
    }

    func parser(_ parser: XMLParser, foundCharacters string: String) {
        if currentElement == "item.title" {
            titleEntry = titleEntry + string
        }

        if currentElement == "item.description" {
            descriptionEntry = descriptionEntry + string
        }

        if currentElement == "item.link" {
            linkEntry = linkEntry + string
        }

        if currentElement == "item.pubDate" {
            pubDateEntry = pubDateEntry + string
        }

        if currentElement == "item.closingDate" {
            closingDateEntry = closingDateEntry + string
        }

        if currentElement == "item.ac" {
            acEntry = acEntry + string
        }

        if currentElement == "item.js" {
            jsEntry = jsEntry + string
        }

        if currentElement == "item.ec" {
            ecEntry = ecEntry + string
        }
    }

    func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
        if elementName == "item.title" {
            entryDictionary["title"] = titleEntry
        }

        if elementName == "item.description" {
            entryDictionary["description"] = descriptionEntry
        }

        if elementName == "item.link" {
            entryDictionary["link"] = linkEntry
        }

        if elementName == "item.pubDate" {
            entryDictionary["pubDate"] = pubDateEntry
        }

        if elementName == "item.closingDate" {
            entryDictionary["closingDate"] = closingDateEntry
        }

        if elementName == "item.ac" {
            entryDictionary["ac"] = acEntry
        }

        if elementName == "item.js" {
            entryDictionary["js"] = jsEntry
        }

        if elementName == "item.ec" {
            entryDictionary["ec"] = ecEntry
        }

        entryArray.append(entryDictionary)
    }

    func parserDidEndDocument(_ parser: XMLParser) {
        print(#function)
        print(entryArray.count)
        print(entryArray.first)
    }
}
0

There are 0 answers