// CustomProcessor.js

export const splitTextIntoWords = (text) => {
  return text.split(/\s+/).filter((word) => word.length > 0);
};

function removeSpecialChars(word) {
  const specialChars = [';', ',', '.','?', ' ', '"', '\'', '\(', '\)',':','\'s','“','”','[',']','‘'];
  let start = 0;
  let end = word.length - 1;

  // 移除开头的特殊符号
  while (start <= end && specialChars.includes(word[start])) {
    start++;
  }

  // 移除结尾的特殊符号
  while (end >= start && specialChars.includes(word[end])) {
    end--;
  }
  
  // 截取并返回处理后的单词
  let cleanedWord = word.substring(start, end + 1);

  // 检查并移除单词末尾的 's
  if (cleanedWord.toLowerCase().endsWith("’s") || cleanedWord.toLowerCase().endsWith("'s")) {
    cleanedWord = cleanedWord.substring(0, cleanedWord.length - 2);
  }

  return cleanedWord;
}

export const cleanWords = (words) => {
  const filtered = [];
  const cleaned = words
    .map((word) => {
      var t = removeSpecialChars(word);
      //is t contains number
      const hasNumber = /\d/.test(word);
      // If the word contains numbers, add it to the filtered array
      if (hasNumber) {
        filtered.push(word);
        return ''; // Remove the word from the cleaned list
      }
      // 判断是否不包含字母
      const hasOnlySpecialChars = /^[^a-zA-Z]+$/.test(t);
      if (hasOnlySpecialChars) {
        filtered.push(t);
        return ''; // Remove the word from the cleaned list
      }
      return t;
    })
    .filter((word) => word.length > 0);

  return { cleaned, filtered };
};

export const countWordOccurrences = (words) => {
  const wordCounts = {};
  words.forEach((word) => {
    word = word.toLowerCase();
    if (wordCounts[word]) {
      wordCounts[word]++;
    } else {
      wordCounts[word] = 1;
    }
  });
  return wordCounts;
};
