solr import files from multiple dataSource entity

525 views Asked by At

I am trying to import files from multiple folders.

My solrconfig.xml invokes the following file to use it with org.apache.solr.handler.dataimport.DataImportHandler.

<dataConfig>  
    <dataSource type="BinFileDataSource" />
        <document>
            <entity name="files1"
                    dataSource="null"
                    rootEntity="false"
                    processor="FileListEntityProcessor"
                    baseDir="/w/PDF/"
                    fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
                    onError="skip"
                    recursive="true">

                <field column="fileAbsolutePath" name="id" />
                <field column="fileSize" name="size" />
                <field column="fileLastModified" name="lastModified" />
                <field column="file" name="fileName"/>

                <entity
                    name="documentImport1"
                    processor="TikaEntityProcessor"
                    url="${files.fileAbsolutePath}"
                    format="text">
                    <field column="file" name="fileName"/>
                    <field column="Author" name="author" meta="true"/>
                    <field column="title" name="title" meta="true"/>
                    <field column="text" name="text"/>
                    <copyField source="content" dest="text"/>

                </entity>
            </entity>

            <entity name="files2"
                    dataSource="null"
                    rootEntity="false"
                    processor="FileListEntityProcessor"
                    baseDir="/w/KNOW-HOW/"
                    fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
                    onError="skip"
                    recursive="true">

                <field column="fileAbsolutePath" name="id" />
                <field column="fileSize" name="size" />
                <field column="fileLastModified" name="lastModified" />
                <field column="file" name="fileName"/>

                <entity
                    name="documentImport2"
                    processor="TikaEntityProcessor"
                    url="${files.fileAbsolutePath}"
                    format="text">
                    <field column="file" name="fileName"/>
                    <field column="Author" name="author" meta="true"/>
                    <field column="title" name="title" meta="true"/>
                    <field column="text" name="text"/>
                    <copyField source="content" dest="text"/>

                </entity>
            </entity>
        </document> 
</dataConfig>  

During import I get a FileNotFoundException.

What am I missing?

1

There are 1 answers

0
javaLover On

Just change the code into this in the second entity: ${files.fileAbsolutePath} to ${files2.fileAbsolutePath} .