import * as stringSimilarity from 'string-similarity';

import { getArrayWithUniqueEntries } from '@stsm/shared/util/array-util';

export interface WordWiseComparisonResult {
  matchedWords: string[];
  partiallyMatchedWords: string[];
  overallSimilarity: number;
}

export function compareTextsWordWise(text1: string, text2: string, similarityThreshold: number = 0.6): WordWiseComparisonResult {
  const matchedWords: string[] = [];
  const partiallyMatchedWords: string[] = [];

  // compare per word
  const text1Words: string[] = cleanText(text1).split(' ');
  const text2Words = cleanText(text2).split(' ');

  // used to calculate the overall similarity in the end
  const individualSimilarities: number[] = [];

  text1Words.forEach((word: string) => {
    const text2WordsWithoutDiacritics = text2Words.map((word: string) => removeDiacritics(word.toLowerCase()));
    const match = text2Words[
      stringSimilarity.findBestMatch(removeDiacritics(word.toLowerCase()), text2WordsWithoutDiacritics).bestMatchIndex
    ] as string;

    if (word === match) {
      matchedWords.push(word, match);
      individualSimilarities.push(1); // similarity of 100%
    } else {
      // partial match using the given similarityThreshold
      let similarity = stringSimilarity.compareTwoStrings(removeDiacritics(word.toLowerCase()), removeDiacritics(match.toLowerCase()));

      //if words not equal just because of diacritics or capitalisation, make similarity partial (0.9), not 1
      if (similarity === 1 && word !== match) {
        similarity = 0.9;
      }
      individualSimilarities.push(similarity);

      if (similarity > similarityThreshold) {
        partiallyMatchedWords.push(word, match);
      }
    }
  });

  const overallSimilarity =
    individualSimilarities.reduce((partialSum: number, a: number) => partialSum + a, 0) / individualSimilarities.length;

  return {
    matchedWords: getArrayWithUniqueEntries(matchedWords),
    partiallyMatchedWords: getArrayWithUniqueEntries(partiallyMatchedWords),
    overallSimilarity,
  };
}

export function replaceNewLinesWithSpaces(text: string): string {
  let simplifiedText = text.replace(/(\r\n|\n|\r)/gm, ' ');
  simplifiedText = replaceDuplicateSpacesWithSingleSpace(simplifiedText);

  return simplifiedText;
}

export function replaceDuplicateSpacesWithSingleSpace(text: string): string {
  return text.replace(/ +(?= )/g, '');
}

export function removeDiacritics(text: string): string {
  return text.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}

// todo - does it make sense?
function cleanText(text: string): string {
  return replaceNewLinesWithSpaces(text).replace(/[.,:!?]/g, ' ');
}

export const replaceHtmlTagsWithSeparator = (html: string, separator: string = ' '): string => {
  let simplifiedText = html.replace(/<[^>]*>/g, separator).trim();
  simplifiedText = replaceDuplicateSpacesWithSingleSpace(simplifiedText);

  return simplifiedText;
};

export const replaceLatexFormulaWithSeparator = (html: string, separator: string = ' '): string => {
  let simplifiedText = html
    .replace(/\$\$[^]*\$\$/g, separator)
    .replace(/\\\([^]*\\\)/g, separator)
    .replace(/\\\[[^]*\\\]/g, separator)
    .trim();
  simplifiedText = replaceDuplicateSpacesWithSingleSpace(simplifiedText);

  return simplifiedText;
};
