parse special characters(&lt, &gt ..etc) in SAX

1.9k views Asked by At

I am trying to parse xml document that includes special characters such as "//gpa[.<2.0]" in some of the nodes. However, the parser reads only part of node's contents like "//gpa[." where special characters is located. I tried different ways found online but still getting the same result. How can I parse all node's content.

Here is my part of code and thank you in advance:

package temp;

import java.util.*;
import java.lang.Object.*;
import org.xml.sax.*;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import org.apache.commons.lang.StringEscapeUtils;

public class Login implements LoginInterface{

public static String elementName="";    
public static StringBuilder chars = new StringBuilder();

public static void startDocument ()
throws SAXException, IOException
{
}

public static void endDocument ()
throws SAXException
{
}

public static void characters (char buf [], int offset, int len)
throws SAXException
{
    chars.append(new String(buf , offset, len));                
        if(elementName.equals("object"))
            {                    
            //tempObjectStr += org.apache.commons.lang.StringEscapeUtils.escapeXml( new String(buf, offset, len));
            //characters(s.toCharArray(),0,s.length());                                            
            System.out.println( "objectNodeContent: " + chars.toString());            
            }
}

public static void startElement (String uri, String localname, String name, Attributes attrs)
throws SAXException
 {
    chars.setLength(0);
    //nodeContent = "";
    elementName=name;       
 }

public static void endElement (String uri, String localName, String name)
throws SAXException
{        
}    

public static void main(String args[]) {    
    try
    {            
         SAXParser saxParser2 = factory.newSAXParser();
         saxParser2.parse( "authorization.xml",new LoginHandlerBase());             
    }
    catch(Exception e)
    {
        System.out.println("Error:"+e);
    }
}
}

And here is part of Authorization.xml:

<rules>
    <rule>
        <role>staff</role>
        <object>/department/gradstudent/address</object>        
        <action>Read</action>
        <type>R</type>
    </rule>
    <rule>
        <role>staff</role>
        <object>//gpa[.&lt;2.0]</object>        
        <action>Read</action>
        <type>L</type>
    </rule>
</rules>

The output looks likes:

objectNodeContent: /department/gradstudent/address
objectNodeContent: //gpa[.
1

There are 1 answers

0
Santhosh Kumar Tekuri On

see javadoc

as michael kay said, the character data might come in several chunks. your contenthandler should collect them. below code shows how to do this:

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.SAXParserFactory;
import java.io.File;

public class MySAXHandler extends DefaultHandler{
    private StringBuilder content = new StringBuilder();

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException{
        content.setLength(0);
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException{
        content.append(ch, start, length);
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException{
        if(localName.equals("object"))
            System.out.println("objectNodeContent: " + content);
        content.setLength(0);
    }

    public static void main(String[] args) throws Exception{
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);
        factory.newSAXParser().parse(new File("Authorization.xml"), new MySAXHandler());
    }
}

the output of above code is:

objectNodeContent: /department/gradstudent/address
objectNodeContent: //gpa[.<2.0]