Can't index data in alphabetical order in spanish alphabet before to select it in a query

128 views Asked by At

I have a set of assets which had a property "name". I want to get a dynamic number of those assets and I should get it alphabetically sorted by that "name" property. I query that with this query:

type=dam:Asset
path=/content/dam/en/foobar/contacts/
orderby=@jcr:content/data/master/@name
orderby.sort=asc
p.limit=3

and this is working, so in a set of names:

[Paloma, Abel, José, Eduardo]

it retrieves:

Abel, Eduardo, José.

The problem is with spanish alphabet, in which Á is the same letter as A. So in a set of:

[Paloma, Abel, José, Álvaro, Eduardo]

it retrieves:

Abel, Eduardo, José.

Being Álvaro excluded because its not part of the first 3 elements after ordeby it, when in should be the second, it should retrieve:

Abel, Álvaro, Eduardo.

So, to fix that, I've created a custom oak lucene index like below:

<?xml version="1.0" encoding="UTF-8"?>
<jcr:root xmlns:oak="http://jackrabbit.apache.org/oak/ns/1.0" xmlns:jcr="http://www.jcp.org/jcr/1.0" xmlns:nt="http://www.jcp.org/jcr/nt/1.0" xmlns:rep="internal"
    jcr:mixinTypes="[rep:AccessControllable]"
    jcr:primaryType="nt:unstructured">
    <socialLucene/>
    <workflowDataLucene/>
    <slingeventJob/>
    <jcrLanguage/>
    <versionStoreIndex/>
    <repMembers/>
    <cqReportsLucene/>
    <commerceLucene/>
    <counter/>
    <authorizables/>
    <enablementResourceName/>
    <externalPrincipalNames/>
    <cmLucene/>
    <foobarCFIndexFilter
        jcr:primaryType="oak:QueryIndexDefinition"
        async="[async,nrt]"
        evaluatePathRestrictions="{Boolean}true"
        includedPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
        queryPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
        reindex="{Boolean}false"
        reindexCount="{Long}24"
        seed="{Long}3850652403740003290"
        type="lucene">
        <analyzers jcr:primaryType="nt:unstructured">
            <default jcr:primaryType="nt:unstructured">
                <filters jcr:primaryType="nt:unstructured">
                    <Synonym
                        jcr:primaryType="nt:unstructured"
                        format="solr"
                        synonyms="synonyms.txt">
                        <synonyms.txt/>
                    </Synonym>
                </filters>
                <tokenizer
                    jcr:primaryType="nt:unstructured"
                    name="Classic"/>
            </default>
        </analyzers>
        <indexRules jcr:primaryType="nt:unstructured">
            <nt:base jcr:primaryType="nt:unstructured">
                <properties jcr:primaryType="nt:unstructured">
                    <title
                        jcr:primaryType="nt:unstructured"
                        analyzed="{Boolean}true"
                        isRegexp="{Boolean}false"
                        name="jcr:content/data/master/title"
                        nodeScopeIndex="{Boolean}true"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"
                        type="String"/>
                    <date
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/data/master/date"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"/>
                    <sectors
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/data/master/sectors"
                        propertyIndex="{Boolean}true"/>
                    <contentFragment
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/contentFragment"
                        propertyIndex="{Boolean}true"/>
                    <model
                        jcr:primaryType="nt:unstructured"
                        name="cq:model"
                        propertyIndex="{Boolean}true"/>
                    <name
                        jcr:primaryType="nt:unstructured"
                        analyzed="{Boolean}true"
                        isRegexp="{Boolean}false"
                        name="jcr:content/data/master/name"
                        nodeScopeIndex="{Boolean}true"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"
                        type="String"/>
                </properties>
            </nt:base>
        </indexRules>
    </foobarCFIndexFilter>
    <cqProjectLucene/>
    <ntFolderDamLucene/>
    <acPrincipalName/>
    <uuid/>
    <damAssetLucene/>
    <rep:policy/>
    <cqPayloadPath/>
    <nodetypeLucene/>
    <nodetype/>
    <ntBaseLucene/>
    <reference/>
    <principalName/>
    <cqTagLucene/>
    <lucene/>
    <repTokenIndex/>
    <externalId/>
    <authorizableId/>
    <cqPageLucene/>
</jcr:root>

Where in the synonyms.txt I had:

á, a

Á, A

and so on. Also tried with a charFilter with Mapping equivalent chars. I have made sure that my custom oak index is the one my query is using with Query Performance Diagnosis tool. But nothing works, after reindex the query results are the same. How to solve that?

0

There are 0 answers