XQuery in eXist-DB: not using index

60 views Asked by At

I'm trying to transform multiple METS-XML files by selecting "issues" via this XQuery:

(:  1:) xquery version "3.1";
(:  2:) 
(:  3:) import module namespace safename="http://my-xquery-module.org/safeName" at "xmldb:exist:///db/apps/digiZeitung/safeName.xqm";
(:  4:) 
(:  5:) declare namespace mets="http://www.loc.gov/METS/";
(:  6:) declare namespace mods="http://www.loc.gov/mods/v3";
(:  7:) declare namespace xlink="http://www.w3.org/1999/xlink";
(:  8:) declare namespace oai-pmh="http://www.openarchives.org/OAI/2.0/";
(:  9:) 
(: 10:) declare function local:url($issue as element()) {
(: 11:)     let $band := root($issue)
(: 12:)     let $top := $band/mets:mets/mets:structMap[@TYPE="LOGICAL"]//mets:div[not(mets:mptr)][1]
(: 13:)     let $dmdids := $top ! data(@DMDID) ! tokenize(., '\s+')
(: 14:)     let $mods := $band/mets:mets/mets:dmdSec[@ID=$dmdids]/mets:mdWrap/mets:xmlData/mods:mods
(: 15:)     return $mods/mods:location/mods:url[@access="object in context"]
(: 16:) };
(: 17:) declare function local:projectname($issue as element()) {
(: 18:)     let $url := local:url($issue)
(: 19:)     return tokenize($url, "/")[5]
(: 20:) };
(: 21:) 
(: 22:) declare function local:issue-date($issue as element()) {
(: 23:)     let $band := root($issue)
(: 24:)     let $dmdids := $issue ! data(@DMDID) ! tokenize(., '\s+')
(: 25:)     let $mods := $band/mets:mets/mets:dmdSec[@ID=$dmdids]/mets:mdWrap/mets:xmlData/mods:mods
(: 26:)     let $date := data($mods/mods:originInfo[@eventType='publication']/mods:dateIssued[@keyDate="yes" and @encoding='iso8601'][1])
(: 27:)     return $date
(: 28:) };
(: 29:) 
(: 30:) let $docname := "heidelberger_tageblatt"
(: 31:) let $ga := doc("/db/resources/digiZeitung/" || $docname || ".xml")
(: 32:) let $ga_div := $ga/mets:mets/mets:structMap[@TYPE="LOGICAL"]/mets:div[1]
(: 33:)     (:  let $gaDiv2 := $gaDiv/@* -- will nicht: https://stackoverflow.com/questions/3026038/how-to-get-node-without-children-in-xquery :)
(: 34:) let $ga_dmdids := tokenize( data($ga_div/@DMDID), '\s+')
(: 35:) let $ga_dmdsec := $ga/mets:mets/mets:dmdSec[@ID=$ga_dmdids]/mets:mdWrap/mets:xmlData/mods:mods/../../..
(: 36:) let $ga_mods := $ga_dmdsec/mets:mdWrap/mets:xmlData/mods:mods
(: 37:) let $ga_lang := $ga_mods/mods:language
(: 38:) let $ga_recordIdentifier_urn := data($ga_mods/mods:recordInfo/mods:recordIdentifier[@source="urn"])
(: 39:) let $ga_url := local:url($ga_div)
(: 40:) let $ga_projectname := local:projectname($ga_div)
(: 41:) 
(: 42:)     (: ==================================================================================== :)
(: 43:) for $mptr in $ga/mets:mets/mets:structMap[@TYPE="LOGICAL"]//mets:div/mets:mptr
(: 44:) let $href := data($mptr/@xlink:href) (: bloß keine [] um @xlink:href ! :)
(: 45:) let $projectname :=tokenize($href, '/')[5]
(: 46:) let $band := doc('/db/resources/digiZeitung/' || $projectname || '.xml')
(: 47:) let $top := $band/mets:mets/mets:structMap[@TYPE="LOGICAL"]//mets:div[not(mets:mptr)][1]
(: 48:) let $amdsec := $band/mets:mets/mets:amdSec[@ID=$top/@ADMID]
(: 49:) let $ga_href := data($band/mets:mets/mets:structMap[@TYPE="LOGICAL"]/mets:div/mets:mptr/@xlink:href)
(: 50:) 
(: 51:)     (: ==================================================================================== :)
(: 52:) for $issue in $band/mets:mets/mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="issue"]
(: 53:) where local:issue-date($issue)
(: 54:) let $dmdids := $issue ! data(@DMDID) ! tokenize(., '\s+')
(: 55:) let $dmdsec := $band/mets:mets/mets:dmdSec[@ID=$dmdids]/mets:mdWrap/mets:xmlData/mods:mods/mods:originInfo/mods:dateIssued[@encoding="iso8601"]/../../../../..
(: 56:) let $subdmdids := $issue//mets:div[@DMDID] ! data(@DMDID) ! tokenize(., '\s+')
(: 57:)     (: TODO »//@DMDID« geht NICHT bei mehreren @DMDIDs :)
(: 58:)     (:  let $dmdid_query := string-join(tokenize($dmdids, '\s+'), " or ") ... map ...
(: 59:)         XQuery / Walmsley / 2nd Ed.: XQuery does not provide any build-in
(: 60:)         support for evaluating dynamic paths
(: 61:)     :)
(: 62:) let $mods := $band/mets:mets/mets:dmdSec[@ID=$dmdids]/mets:mdWrap/mets:xmlData/mods:mods
(: 63:)     (: let $dmd := $mods/../../.. :)
(: 64:) let $subdmdsecs := $band/mets:mets/mets:dmdSec[@ID=$subdmdids]/mets:mdWrap/mets:xmlData/mods:mods/../../..
(: 65:) let $date := data($mods/mods:originInfo[@eventType='publication']/mods:dateIssued[@keyDate="yes" and @encoding='iso8601'][1])
(: 66:) let $y := fn:year-from-date($date)
(: 67:) let $ym := $y || "-" || fn:format-number(fn:month-from-date($date), "00")
(: 68:) let $ymd := $ym || "-" || fn:format-number(fn:day-from-date($date), "00")
(: 69:) 
(: 70:)     (: Beispiel <mets:div ID="log00004" TYPE="issue" DMDID="dmd00004" LABEL="04.01.1900"> :)
(: 71:) let $id := data($issue/@ID)
(: 72:) let $smLi := $band/mets:mets/mets:structLink/mets:smLink[@xlink:from=$id]
(: 73:) let $phy := $smLi/@xlink:to
(: 74:) let $smp := $band/mets:mets/mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@ID=$phy]
(: 75:) let $fid := data($smp/mets:fptr/@FILEID) (: sonst ist $fid »@FILEID=...« und nachfolgendes »[@ID=$fid]« benutzt keinen Index :)
(: 76:) (:  let $fs := $band/mets:mets/mets:fileSec/mets:fileGrp/mets:file[@ID=$fid]  :)
(: 77:) let $fs := 
(: 78:)         for $fg in $band/mets:mets/mets:fileSec/mets:fileGrp[mets:file[@ID=$fid]]
(: 79:)         return <mets:fileGrp USE="{$fg/@USE}">
(: 80:)         {
(: 81:)             $fg/mets:file[@ID=$fid]
(: 82:)         }
(: 83:)         </mets:fileGrp>
(: 84:) return 
(: 85:)     <oai-pmh:OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
(: 86:)     <oai-pmh:GetRecord> 
(: 87:)     <oai-pmh:record>
(: 88:)     <oai-pmh:header>
(: 89:)         <oai-pmh:identifier>{$projectname || "--" || $issue/@ID}</oai-pmh:identifier>
(: 90:)     </oai-pmh:header>
(: 91:)     <oai-pmh:metadata>
(: 92:)     <mets:mets>
(: 93:)         {$amdsec}
(: 94:)         {$ga_dmdsec}
(: 95:)         <mets:dmdSec ID="{$dmdsec/@ID}">
(: 96:)             <mets:mdWrap MDTYPE="MODS">
(: 97:)                 <mets:xmlData>
(: 98:)                     <mods:mods>
(: 99:)                         {insert-before(
(:100:)                             insert-before((:language_01:)$dmdsec/mets:mdWrap/mets:xmlData/mods:mods/*,
(:101:)                                 1, $ga_lang),
(:102:)                             1,
(:103:)                             <mods:recordInfo>
(:104:)                                 <mods:recordIdentifier source="urn">
(:105:)                                     {$ga_recordIdentifier_urn || "--" || $ymd || "--" || $issue/@ID}
(:106:)                                 </mods:recordIdentifier>
(:107:)                             </mods:recordInfo>
(:108:)                         )}
(:109:)                     </mods:mods>
(:110:)                 </mets:xmlData>
(:111:)             </mets:mdWrap>
(:112:)         </mets:dmdSec>
(:113:)         {$subdmdsecs}
(:114:)                         
(:115:)         <mets:structMap TYPE="LOGICAL">
(:116:)             <mets:div ID="{$ga_projectname}" TYPE="newspaper" DMDID="{$ga_dmdids}"><!-- possible ID collision with issue -->
(:117:)                 <mets:mptr LOCTYPE="URL" xlink:href="{$ga_href}"/>
(:118:)                 <mets:div ID="{$ga_projectname || "--" || $y}" TYPE="year" ORDERLABEL="{$y}">
(:119:)                     <mets:mptr LOCTYPE="URL" xlink:href="{$ga_href || "/" || $y}"/>
(:120:)                     <mets:div ID="{$ga_projectname || "--" || $ym}" TYPE="month" ORDERLABEL="{$ym}">
(:121:)                         <mets:div ID="{$ga_projectname || "--" || $ymd}" TYPE="day" ORDERLABEL="{$ymd}">
(:122:)                             <mets:div ID="{(: $projectname || "--" || → NEIN, weil sonst smLink nicht passt :) $issue/@ID}" DMDID="{$issue/@DMDID}" TYPE="issue" />
(:123:)                         </mets:div>
(:124:)                     </mets:div>
(:125:)                 </mets:div>
(:126:)             </mets:div>
(:127:)         </mets:structMap>
(:128:)         <mets:structMap TYPE="PHYSICAL">
(:129:)         <mets:div ID="phys0" TYPE="physSequence">
(:130:)             {$smp}
(:131:)         </mets:div>
(:132:)         </mets:structMap>
(:133:)         <mets:structLink>
(:134:)             <mets:smLink xlink:from="{$id}" xlink:to="phys0" />
(:135:)             {$smLi}
(:136:)         </mets:structLink>
(:137:)         <mets:fileSec>
(:138:)         {$fs}
(:139:)         </mets:fileSec>
(:140:)     </mets:mets>
(:141:)     </oai-pmh:metadata>
(:142:)     </oai-pmh:record>
(:143:)     </oai-pmh:GetRecord>
(:144:)     </oai-pmh:OAI-PMH>

The query works, but is very slow.

Monex says "No index", but e. g. line 72 has a "new range" index entry for @xlink:from in corresponding collection.xconf and the files in eXist-db data directory data/range contain strings of this attribute.

collection.xconf:

<collection xmlns="http://exist-db.org/collection-config/1.0" xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <index xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink">
        <!-- Range indexes -->
        <range>
            <create qname="@ADMID" type="xs:string"/>
            <create qname="@DMDID" type="xs:string"/>
            <create qname="@encoding" type="xs:string"/>
            <create qname="@eventType" type="xs:string"/>
            <create qname="@FILEID" type="xs:string"/>
            <create qname="@ID" type="xs:string"/>
            <create qname="@keyDate" type="xs:string"/>
            <create qname="@TYPE" type="xs:string"/>
            <create qname="@USE" type="xs:string"/>
            <create qname="@xlink:from" type="xs:string"/>
            <create qname="@xlink:href" type="xs:string"/>
            <create qname="@xlink:to" type="xs:string"/>
        </range>
    </index>
</collection>

Merged screenshots excerpts of Monex / XQuery / collection.xconf

So in short I expected eXist-db to use the "new range" index.

(Perhaps one could point me to a good debugging/logging facility to find out why eXist-db is not using the index)

0

There are 0 answers