How to convert an array of text segments into DOM tree object?

97 views Asked by At

Let's edit a text block in Figma as shown in the image:

enter image description here

Figma Plugin API gives the following segments for this text block:

const segments = [
  { "characters": "Lorem ", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "Ipsum", "fontWeight": 700, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": " is \nsimply dummy text of \n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "the printing and \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "typesetting \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 2, "hyperlink": null },
  { "characters": "industry. \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "Lorem Ipsum has been the ", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "industry's standard", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": { "type": "URL", "value": "http://example.com" } },
  { "characters": " dummy text ever since the 1500s, \n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "when an unknown \n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "printer took \na galley of \ntype and \n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 2, "hyperlink": null },
  { "characters": "scrambled it\n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "\nto make a type\n\n", "fontWeight": 400, "listOptions": { "type": "NONE" }, "indentation": 0, "hyperlink": null },
  { "characters": "specimen book.\n", "fontWeight": 400, "listOptions": { "type": "UNORDERED" }, "indentation": 1, "hyperlink": null },
  { "characters": "It has survived\n", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 3, "hyperlink": null },
  { "characters": "not only\nfive centuries,", "fontWeight": 400, "listOptions": { "type": "ORDERED" }, "indentation": 2, "hyperlink": null }
]

Since the list is long, let's simplify it a bit:

const segments = [
  { ind: 0, list: null, chars: "Lorem ", bold: false, link: null },
  { ind: 0, list: null, chars: "Ipsum", bold: true, link: null },
  { ind: 0, list: null, chars: " is \nsimply dummy text of \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "the printing and \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "typesetting \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "industry. \n", bold: false, link: null },
  { ind: 0, list: null, chars: "Lorem Ipsum has been the ", bold: false, link: null },
  { ind: 0, list: null, chars: "industry's standard", bold: false, link: "http://example.com" },
  { ind: 0, list: null, chars: " dummy text ever since the 1500s, \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "when an unknown \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "printer took \na galley of \ntype and \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "scrambled it\n", bold: false, link: null },
  { ind: 0, list: null, chars: "\nto make a type\n\n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "specimen book.\n", bold: false, link: null },
  { ind: 3, list: "OL", chars: "It has survived\n", bold: false, link: null },
  { ind: 2, list: "OL", chars: " not only\nfive centuries,", bold: false, link: null }
]

I'm trying to take this segments data and convert it into an HTML tree with Javascript. The output should be as follows:

<span>Lorem </span>
<strong>Ipsum</strong> 
<span> is <br>simply dummy text of </span>
<ul>
  <li>
    <span>the printing and </span>
  </li>
  <ul>
    <li><span>typesettting </span></li>
  </ul>
  <li><span>industry. </span></li>
</ul>
<span>Lorem Ipsum has been the </span>
<a href="http://example.com">industry's standard</a>
<span> dummy text ever since the 1500s, </span>
<ol>
  <li><span>when an unknown </span></li>
  <ul>
    <li><span>printer took </span></li>
    <li><span>a galley of </span></li>
    <li><span>type and </span></li>
  </ul>
  <li><span>scrambled it</span></li>
</ol>
<span>to make a type</span>
<ul>
  <li>
    <span>specimen book.</span>
  </li>
  <ol>
    <ol>
      <li><span>It has survived</span></li>
    </ol>
    <li><span>not only</span></li>
    <li><span>five countries,</span></li>
  </ol>
</ul>

I tried:

function getPureSegment(chars: string) {
  if (chars.endsWith("\n")) chars = chars.slice(0, -1)
  return ["<span>", chars.replaceAll(/\n/g, "<br>"), "</span>"]
}

function getOpeningListTag(segment) {
  const type = segment.listOptions.type
  if (type === "ORDERED") return "<ol>"
  if (type === "UNORDERED") return "<ul>"
}

function getClosingListTag(segment) {
  const type = segment.listOptions.type
  if (type === "ORDERED") return "</ol>"
  if (type === "UNORDERED") return "</ul>"
}

function getHtml(segments) {
  let prevSegment = { indentation: 0 }
  return segments.flatMap((segment, idx) => {
    const pure = getPureSegment(segment.characters)
    let line
    const endsBreakLine = segment.characters.endsWith("\n")
    const isLastSegment = idx === segments.length - 1
    if (segment.indentation == 0) {
      if (segment.indentation < prevSegment.indentation) {
        line = [getClosingListTag(prevSegment), ...pure]
      } else {
        line = pure
      }
    } else if (segment.indentation > 0) {
      if (segment.indentation > prevSegment.indentation) {
        line = [getOpeningListTag(segment), "<li>", ...pure, (isLastSegment || segments[idx + 1].indentation < segment.indentation) && "</li>"].filter(Boolean)
      } else if (segment.indentation == prevSegment.indentation) {
        line = [segments[idx - 1].characters.endsWith("\n") && "<li>", ...pure, endsBreakLine && "</li>"].filter(Boolean)
      } else {
        line = [getClosingListTag(segment), "<li>", ...pure, endsBreakLine && "</li>"].filter(Boolean)
      }
      if (isLastSegment) line.push("</ul>".repeat(segment.indentation))
    }
    prevSegment = segment
    return line
  }).join("\n")
}
1

There are 1 answers

3
Peter Seliger On BEST ANSWER

A reliable solution can be achieved already by a single reduce based iteration cycle over the OP's computed segments array.

The markup gets aggregated while reduce consumes the array by invoking the reducer function for each segments-item. Thus, the reducer function needs to be implemented in a way where one can keep track of the aggregated markup's opened/closed nested list-tags. One way of achieving it, is to provide a collector object as the reduce methods initial value, which does carry all the necessary data in addition to e.g. its result property.

const segments = [
  { ind: 0, list: null, chars: "Lorem ", bold: false, link: null },
  { ind: 0, list: null, chars: "Ipsum", bold: true, link: null },
  { ind: 0, list: null, chars: " is \nsimply dummy text of \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "the printing and \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "typesetting \n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "industry. \n", bold: false, link: null },
  { ind: 0, list: null, chars: "Lorem Ipsum has been the ", bold: false, link: null },
  { ind: 0, list: null, chars: "industry's standard", bold: false, link: "http://example.com" },
  { ind: 0, list: null, chars: " dummy text ever since the 1500s, \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "when an unknown \n", bold: false, link: null },
  { ind: 2, list: "UL", chars: "printer took \na galley of \ntype and \n", bold: false, link: null },
  { ind: 1, list: "OL", chars: "scrambled it\n", bold: false, link: null },
  { ind: 0, list: null, chars: "\nto make a type\n\n", bold: false, link: null },
  { ind: 1, list: "UL", chars: "specimen book.\n", bold: false, link: null },
  { ind: 2, list: "OL", chars: "It has survived\nnot only\nfive centuries,", bold: false, link: null }
];
const markup = segments
  .reduce(aggregateMarkup, { result: '' })
  .result;

document
  .querySelector('#test')
  .innerHTML = markup;

console.log('markup ...', markup);
body { margin: 0; }
#test { width: 50%; }
.as-console-wrapper { left: auto!important; width: 50%; min-height: 100%;  }
<script>
function aggregateMarkup(collector, segment, idx, segmentArray) {
  let { openTagNames = [], result = '' } = collector;
  const {
    ind: indentation, list,
    bold: isBold, link, chars = '',
  } = segment;

  const isLink = !!link;
  const isListItem = !!list;

  const isOpeningListItem =
    isListItem && ((segmentArray[idx - 1]?.ind ?? 0) < indentation);

  const upcomingIndentation = (segmentArray[idx + 1]?.ind ?? 0);

  const isTerminatingListItem =
    isListItem && (upcomingIndentation < indentation);

  const isListItemTerminator =
    (indentation > 1) && (upcomingIndentation < indentation);

  let markup = chars
    .trim()
    .replace(/(?:^[\n\s]+)|(?:[\n\s]+$)/g, '')
    .replace(/\n/g, isListItem && '<\/li><li>' || '<br\/>');

  if (!isBold && !isLink && !isListItem) {

    result = `${ result } <span>${ markup }<\/span> `;

  } else {
    const listTagName = String(list ?? '').toLowerCase();

    if (isOpeningListItem) {
      // - keep track of opening list tag-names
      //   by pushing the currently opened tag-name
      //   into the tracking-list for every opened list-tag.
      openTagNames.push(listTagName);

      const regXTerminatingLastLiTag = /<\/li>\s*$/;
      if (regXTerminatingLastLiTag.test(result)) {

        result = result.replace(regXTerminatingLastLiTag, '');
      }
      result = `${ result }<${ listTagName }>`;
    }

    if (isBold) {
      markup = `<strong>${ markup }<\/strong>`;
    }
    if (isLink) {
      markup = `<a href="${ link }">${ markup }<\/a>`;
    }
    if (isListItem) {
      markup = `<li>${ markup }<\/li>`;
    }
    result = [result, markup].join(' ');

    if (isTerminatingListItem) {
      // - keep track of opening list tag-names
      //   by removing the last tag-name from the
      //   tracking-list for every closed list-tag.
      openTagNames.pop();

      result = `${ result }<\/${ listTagName }>`;
    }
    const regXTerminatingLastListTag = /<\/[ou]l>\s*$/;
    if (isListItemTerminator) {

      result = `${ result }<\/li>`;
    }
  }

  if ((idx >= segmentArray.length - 1) && !!openTagNames.length) {
    // - at the time, the entire segments array has been iterated
    //   make sure to terminate every tracked, still unclosed tag.
    result = openTagNames
      .reduce((markup, tagName) => `${ markup }<\/${ tagName }>`, result);
  }
  return { openTagNames, result };
}
</script>

<div id="test"></div>