import {
  removeSpecialChars,
  removeWords,
  stripHtmlTags,
  transformConnectionWords,
} from "./convertSpecialChars";

const connectionWords = [] //transformConnectionWords();

export const parseContent = (data) => {
  const lowerCaseData = data && data.toLowerCase();
  const noSpecialChars = removeSpecialChars(lowerCaseData);

  const div = document.createElement("div");
  div.innerHTML = noSpecialChars;

  const headings1 = div.querySelectorAll("h1");
  const headings2 = div.querySelectorAll("h2");
  const headings3 = div.querySelectorAll("h3");
  const headings4 = div.querySelectorAll("h4");
  const headings5 = div.querySelectorAll("h5");
  const paragraphs = div.querySelectorAll("p");
  const lists = div.querySelectorAll("li");
  const tables = div.querySelectorAll("td");
  const images = div.querySelectorAll("img");

  let allLists = "";
  for (const list of lists.values()) {
    const value = list.innerHTML;
    allLists = allLists.concat(" ", value);
  }

  let allTables = "";
  const allHeadingsArr = [];
  for (const table of tables.values()) {
    const value = table.innerHTML;
    allTables = allTables.concat(" ", value);
  }
//aici headings
  let allHeadings1 = "";
  for (const heading1 of headings1.values()) {
    const value = heading1.innerHTML;
    allHeadings1 = allHeadings1.concat(" ", value);
  }

  let allHeadings2 = "";
  for (const heading2 of headings2.values()) {
    const value = heading2.innerHTML;
    allHeadingsArr.push(value);
    allHeadings2 = allHeadings2.concat(" ", value);
  }

  let allHeadings3 = "";
  for (const heading3 of headings3.values()) {
    const value = heading3.innerHTML;
    allHeadingsArr.push(value);
    allHeadings3 = allHeadings3.concat(" ", value);
  }

  let allHeadings4 = "";
  for (const heading4 of headings4.values()) {
    const value = heading4.innerHTML;
    allHeadingsArr.push(value);
    allHeadings4 = allHeadings4.concat(" ", value);
  }

  let allHeadings5 = "";
  for (const heading5 of headings5.values()) {
    const value = heading5.innerHTML;
    allHeadingsArr.push(value);
    allHeadings5 = allHeadings5.concat(" ", value);
  }

  let allParagraphs = "";
  for (const paragraph of paragraphs.values()) {
    const value = paragraph.innerHTML;
    allParagraphs = allParagraphs.concat(" ", value);
  }

  // concat normal paragraphs with content from lists and inside tables
  // (this is for matching keywords in article even if the are inside a list - <ul> or <ol>)
  const paragraphsWithLists = allParagraphs
    .concat(" ", allLists)
    .concat(" ", allTables);

  // add a space on every `concat` so our content will be separated by space
  const headings = allHeadings2
    .concat(" ", allHeadings3)
    .concat(" ", allHeadings4)
    .concat(" ", allHeadings5);

  const trimmedParagraphs = paragraphsWithLists.trim();

  const trimmedHeadings = removeWords(headings, connectionWords).trim();

  const parsedData = {
    headings1: stripHtmlTags(allHeadings1),
    headings: stripHtmlTags(trimmedHeadings),
    paragraphs: stripHtmlTags(trimmedParagraphs),
    headingsArr: allHeadingsArr,

    totalImages: images.length,
  };

  return parsedData;
};
