word to FO conversion using hwpf apache poi

2.3k views Asked by At

How do i convert a .doc file to FO using hwpf.converter.WordToFo class? I have tried searching but i could only get a word to html conversion. I have also read the WordToFO manual at the apache-poi site, but could not get it.

Convert Word to HTML with Apache POI

I have tried to convert .doc to .fo using the following code, but after using apache-fop to convert the .fo file to .png, i am not able to get the images present in the word file.

package word2fo;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;

import javax.swing.text.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.apache.poi.hwpf.converter.WordToFoUtils;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.w3c.dom.Node;

public class Doc2Fo{
    public static void main(String[] args) throws Exception {
        System.out.println("reached 1");
        HWPFDocumentCore wordDocument = WordToFoUtils.loadDoc(new FileInputStream("D:\\Magna.doc"));
        System.out.println("reached 2");
        WordToFoConverter wordToFoConverter = new WordToFoConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        System.out.println("reached 3");
        wordToFoConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToFoConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource((Node) htmlDocument);
        StreamResult streamResult = new StreamResult(out);
        System.out.println("reached 4");

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer;
        try {
            serializer = tf.newTransformer();
             serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                //serializer.setOutputProperty(OutputKeys.METHOD, "xml-fo");
                serializer.transform(domSource, streamResult);
                out.close();

                String result = new String(out.toByteArray());
                System.out.println(result);

        } catch (TransformerConfigurationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

}
}
1

There are 1 answers

0
Diogo On
    HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName));

    WordToFoConverter wordToFoConverter = new WordToFoConverter(XMLHelper.getDocumentBuilderFactory().newDocumentBuilder().newDocument());
    wordToFoConverter.processDocument(hwpfDocument);

    StringWriter stringWriter = new StringWriter();

    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.transform(new DOMSource(wordToFoConverter.getDocument()), new StreamResult(stringWriter));

    String result = stringWriter.toString();
    return result;