Stax transform seems not working when nodes are juxtaposed

59 views Asked by At

I'd like to split a big XML file into many parts using StAX technology, based on a split-basis node.
The problem is when split-basis nodes are juxtaposed (no whitespace, no tab and no breakline between them). Those seems not be read by the parser while transform instruction. However when I put in comment transform instruction, those nodes are correctly read and outputted into the console.

Bellow XML sample. Split-basis is AB node.

< ?xml version="1.0" encoding="UTF-8"?>
< root>
    < AB Id="1">< BC attB="valB1">b1< /BC>< CD attC="valC1">< EF attE="valD1">c1< /EF>< /CD>< /AB>< AB Id="2">< BC attB="valB2">b2< /BC>< CD attC="valC2">< EF attE="valD2">c2< /EF>< /CD>< /AB>
    < AB Id="3">
        < BC attB="valB3">b3< /BC>
        < CD attC="valC3">
            < EF attE="valD3">c3< /EF>
        < /CD>
    < /AB>
< /root>

The expected output should be 3 files named Part_1.xml, Part_2.xml and Part_3.xml. Each file should respectively contain < AB Id="1"> and its sub-tags, < AB Id="2"> and its sub-tags and < AB Id="3"> and its sub-tags. All of them should also have < root > node parent.

Unfortunately, I only obtain Part_1.xml and Part_2.xml files. Inside Part_1.xml I get < AB id = "1"> and its sub-tags. But inside Part_2.xml, I get < AB id = "3"> and its sub-tags instead of < AB id = "2"> and its sub-tags. < AB id = "2"> and its sub-tags are not written.

When I put in comment transformer.transform(staxs, staxr); line. StreamReader reads correclty < AB id = "2">.

code:

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stax.StAXResult;
import javax.xml.transform.stax.StAXSource;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

@Component
final class Split {
    private static final Logger LOG = LoggerFactory.getLogger(Split.class);
    private static final String inputXmlFile = "Big xml file to split. Above a dummy sample";
    private static final String pathToOutputFolder = System.getProperty("java.io.tmpdir");
    private static final String[] parentTags = new String[] {"<root>"};
    private static final String splitTag = "<AB>";
    private static final String chunkNumber = "1";
    public void run() {
        final Transformer transformer;
        XMLStreamReader xsr = null;
        XMLStreamWriter xsw = null;
        try {
            transformer = TransformerFactory.newInstance().newTransformer();
            final XMLInputFactory xif = XMLInputFactory.newInstance();
            xsr = xif.createXMLStreamReader(new FileInputStream(inputXmlFile));
            Short fileNumber = 0;
            Short dataRepetitions = 0;
            xsw = write(pathToOutputFolder, ++fileNumber, parentTags);
            int tagCount = 0;
            while (xsr.hasNext()) {
                xsr.next();
                if (xsr.getEventType() == XMLEvent.START_ELEMENT) {
                    tagCount++;
                    System.out.println("Tag _" + tagCount + ": " + xsr.getLocalName());
                    if (xsr.getLocalName().equals(splitTag)) {
                        System.out.println(xsr.getLocalName() + ": [" + xsr.getAttributeLocalName(0) + ", " + xsr.getAttributeValue(0) + "]");
                        if (dataRepetitions.equals(1)) {
                            xsw.flush();
                            xsw.writeEndDocument();
                            xsw.close();
                            xsw = write(pathToOutputFolder, ++fileNumber, parentTags);
                            dataRepetitions = 0;
                        }
                        final StAXSource staxs = new StAXSource(xsr);
                        final StAXResult staxr = new StAXResult(xsw);
                        transformer.transform(staxs, staxr);
                        dataRepetitions++;
                    }
                }
            }
        } catch (final TransformerException | FileNotFoundException | XMLStreamException e) {
            throw new SplitXmlRuntimeException(e.getMessage());
        } finally {
            try {
                xsr.close();
                if (xsw != null) {
                    xsw.flush();
                    xsw.writeEndDocument();
                    xsw.close();
                }
            } catch (final XMLStreamException e) {
                LOG.error(e.getMessage());
            }
        }
    }
    
    private XMLStreamWriter write(final String pathToOutputFolder, final Short fileNumber, final String[] rootTags) throws XMLStreamException, FileNotFoundException  {
        XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
        XMLStreamWriter writer = xmlOutputFactory.createXMLStreamWriter(new FileOutputStream(new File(pathToOutputFolder, "Part_" + fileNumber), true));
        writer.writeStartDocument();
        for (final String s : rootTags) {
            writer.writeStartElement(s);
        }
        return writer;
    }
}
0

There are 0 answers