Group and split Xmldocument on nth level descendant

55 views Asked by At

I want to split an XmlDocument into an array of XmlDocuments, where each splitted XmlDocument contains records of a certain period (Year/Month combination). The complicating factor, imo, is that the grouping should occur on nested elements.

Example input:

<?xml version="1.0" encoding="utf-8"?>
<Example>
  <RecordA>
    <RecordA1>
      <RecordA11>
        <ElementA11></ElementA11>
      </RecordA11>
    </RecordA1>
    <RecordA2>
      <ElementA2></ElementA2>
    </RecordA2>
  </RecordA>
  <RecordB>
    <RecordB1>
      <ElementB1></ElementB1>
      <RecordB11>
        <ElementB11></ElementB11>
        <RecordB111>
          <RecordB1111>
            <RecordB11111>
              <ElementB11111></ElementB11111>
            </RecordB11111>
            <ElementB1111></ElementB1111>
            <RecordB11112>
              <Dates>
                <StartDate>2014-05-29</StartDate>
                <EndDate>2014-05-29</EndDate>
              </Dates>
            </RecordB11112>
            <RecordB11112>
              <Dates>
                <StartDate>2014-06-02</StartDate>
                <EndDate>2014-06-02</EndDate>
              </Dates>
            </RecordB11112>
            <RecordB11112>
              <Dates>
                <StartDate>2014-05-21</StartDate>
                <EndDate>2014-05-21</EndDate>
              </Dates>
            </RecordB11112>
            <RecordB11112>
              <Dates>
                <StartDate>2014-04-09</StartDate>
                <EndDate>2014-04-09</EndDate>
              </Dates>
            </RecordB11112>
            <RecordB11112>
              <Dates>
                <StartDate>2014-06-05</StartDate>
                <EndDate>2014-06-05</EndDate>
              </Dates>
            </RecordB11112>
          </RecordB1111>
        </RecordB111>
      </RecordB11>
    </RecordB1>
  </RecordB>
</Example>

Wanted output:

<?xml version="1.0" encoding="utf-8"?>
<Examples>
  <Example>
    <RecordA>
      <RecordA1>
        <RecordA11>
          <ElementA11></ElementA11>
        </RecordA11>
      </RecordA1>
      <RecordA2>
        <ElementA2></ElementA2>
      </RecordA2>
    </RecordA>
    <RecordB>
      <RecordB1>
        <ElementB1></ElementB1>
        <RecordB11>
          <ElementB11></ElementB11>
          <RecordB111>
            <RecordB1111>
              <RecordB11111>
                <ElementB11111></ElementB11111>
              </RecordB11111>
              <ElementB1111></ElementB1111>
              <RecordB11112>
                <Dates>
                  <StartDate>2014-05-29</StartDate>
                  <EndDate>2014-05-29</EndDate>
                </Dates>
              </RecordB11112>
              <RecordB11112>
                <Dates>
                  <StartDate>2014-05-21</StartDate>
                  <EndDate>2014-05-21</EndDate>
                </Dates>
              </RecordB11112>
            </RecordB1111>
          </RecordB111>
        </RecordB11>
      </RecordB1>
    </RecordB>
  </Example>
  <Example>
    <RecordA>
      <RecordA1>
        <RecordA11>
          <ElementA11></ElementA11>
        </RecordA11>
      </RecordA1>
      <RecordA2>
        <ElementA2></ElementA2>
      </RecordA2>
    </RecordA>
    <RecordB>
      <RecordB1>
        <ElementB1></ElementB1>
        <RecordB11>
          <ElementB11></ElementB11>
          <RecordB111>
            <RecordB1111>
              <RecordB11111>
                <ElementB11111></ElementB11111>
              </RecordB11111>
              <ElementB1111></ElementB1111>
              <RecordB11112>
                <Dates>
                  <StartDate>2014-04-09</StartDate>
                  <EndDate>2014-04-09</EndDate>
                </Dates>
              </RecordB11112>
            </RecordB1111>
          </RecordB111>
        </RecordB11>
      </RecordB1>
    </RecordB>
  </Example>
  <Example>
    <RecordA>
      <RecordA1>
        <RecordA11>
          <ElementA11></ElementA11>
        </RecordA11>
      </RecordA1>
      <RecordA2>
        <ElementA2></ElementA2>
      </RecordA2>
    </RecordA>
    <RecordB>
      <RecordB1>
        <ElementB1></ElementB1>
        <RecordB11>
          <ElementB11></ElementB11>
          <RecordB111>
            <RecordB1111>
              <RecordB11111>
                <ElementB11111></ElementB11111>
              </RecordB11111>
              <ElementB1111></ElementB1111>
              <RecordB11112>
                <Dates>
                  <StartDate>2014-06-02</StartDate>
                  <EndDate>2014-06-02</EndDate>
                </Dates>
              </RecordB11112>
              <RecordB11112>
                <Dates>
                  <StartDate>2014-06-05</StartDate>
                  <EndDate>2014-06-05</EndDate>
                </Dates>
              </RecordB11112>
            </RecordB1111>
          </RecordB111>
        </RecordB11>
      </RecordB1>
    </RecordB>
  </Example>
</Examples>
1

There are 1 answers

1
Martin Honnen On BEST ANSWER

I think you can use Muenchian grouping to identify the first item in each group, then you need to recreate the tree for each group:

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

<xsl:output method="xml" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:key name="group" match="RecordB11112" use="substring(Dates/StartDate, 1, 7)"/>

<xsl:template match="/">
  <Examples>
    <xsl:apply-templates select="//RecordB11112[generate-id() = generate-id(key('group', substring(Dates/StartDate, 1, 7))[1])]"/>
  </Examples>
</xsl:template>

<xsl:template match="RecordB11112">
  <xsl:variable name="to-be-copied" select="key('group', substring(Dates/StartDate, 1, 7))"/>
  <xsl:apply-templates select="/*" mode="recreate">
    <xsl:with-param name="to-be-copied" select="$to-be-copied"/>
  </xsl:apply-templates>
</xsl:template>

<xsl:template match="@* | node()" mode="recreate">
  <xsl:param name="to-be-copied"/>
  <xsl:copy>
    <xsl:apply-templates select="@*" mode="recreate"/>
    <xsl:apply-templates mode="recreate">
      <xsl:with-param name="to-be-copied" select="$to-be-copied"/>
    </xsl:apply-templates>
  </xsl:copy>
</xsl:template>

<xsl:template match="RecordB11112" mode="recreate">
  <xsl:param name="to-be-copied"/>
  <xsl:if test="$to-be-copied[generate-id() = generate-id(current())]">
    <xsl:copy-of select="."/>
  </xsl:if>
</xsl:template>

</xsl:stylesheet>