How to convert html string into whatsapp message format using javascript

1.2k views Asked by At

I want to convert below html string into recommended whatsapp message format using javascript

let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';

into below format

*TEST*  BODY _*WITH *_ SAMPLE *_~FORMAT~_* HERE
2

There are 2 answers

0
trincot On BEST ANSWER

I would advise using a DOM Parser to parse the HTML, and then iterate over the DOM you get from it. This way the resulting text will also have all HTML entities resolved to text, any HTML comments will have been removed, and it will not break when spacing in HTML tags or their attributes is different from expected.

I would also make sure the formatting characters are put adjacent to the word they apply to, so leaving any surrounding white space out of it:

function htmlToFormat(html) {
    const codes = { B: "*", I: "_", STRIKE: "~" };
    const {body} = new DOMParser().parseFromString(htmlText, "text/html");
    const dfs = ({childNodes}) => Array.from(childNodes, node => {
        if (node.nodeType == 1) {
            const s = dfs(node);
            const code = codes[node.tagName];
            return code ? s.replace(/^(\s*)(?=\S)|(?<=\S)(\s*)$/g, `$1${code}$2`) : s;
        } else {
            return node.textContent;
        }
    }).join("");

    return dfs(body);
}

// Demo
let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
console.log(htmlToFormat(htmlText));

0
Gopalakrishnan M On

Based on the @trincot solution, added some additional changes

function htmlToFormat(htmlText) {
  const matches = htmlText.matchAll(/<i><b>(.*?)<\/b><\/i>/g);
  for (const match of matches) {
    if (match[1]) {
      htmlText = htmlText.replace(match[0], `<b><i>${match[1]}</i></b>`);
    }
  }
  // format strike text
  let strikeArr = htmlText.match(/<strike(.*?)<\/strike>/g);
  if (strikeArr.length) {
    for (let i=0;i< strikeArr.length; i++) {
      if (strikeArr[i]) {
        strikeText = strikeArr[i].match(/style="(.*?)">/g);
        if (strikeText.length) {
          let strikeTextSplit = strikeText[0].split(";");
          let italicStatus = false;
          let boldStatus = false;
          let trimedText = strikeArr[i].replace(/\s*\S*\="[^"]+"\s*/gm, "");
          for (let j=0; j< strikeTextSplit.length; j++) {
            if (strikeTextSplit[j].includes("italic")) {
              italicStatus = true;
            }
            if (strikeTextSplit[j].includes("bold")) {
              boldStatus = true;
            }
          }
          if (italicStatus) {
            trimedText = `<i>${trimedText}</i>`;
          }
          if (boldStatus) {
            trimedText = `<b>${trimedText}</b>`
          }
          htmlText = htmlText.replace(strikeArr[i], trimedText);
        }
      }
    }
  }
  const codes = { B: "*", I: "_", STRIKE: "~" };
  const { body } = new DOMParser().parseFromString(htmlText, "text/html");
  const dfs = ({ childNodes }) => Array.from(childNodes, node => {
    if (node.nodeType == 1) {
      const s = dfs(node);
      const code = codes[node.tagName];
      return code ? s.replace(/^(\s*)(?=\S)|(?<=\S)(\s*)$/g, `$1${code}$2`) : s;
    } else {
      return node.textContent;
    }
  }).join("");

  return dfs(body);
}

let htmlText = '<b>TEST </b> BODY <i><b>WITH </b></i>SAMPLE <strike style="font-weight: bold; font-style: italic;">FORMAT&nbsp;</strike>&nbsp; &nbsp; HERE"';
console.log(htmlToFormat(htmlText));