import { KeywordDensity } from '../../types/TextAnalysis';

const MIN_WORD_LENGTH = 3;
const EXCLUDED_WORDS = new Set([
  'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i',
  'der', 'die', 'das', 'und', 'in', 'zu', 'den', 'ist', 'sie', 'mit',
  'el', 'la', 'de', 'que', 'y', 'en', 'un', 'ser', 'se', 'no',
  'le', 'la', 'de', 'et', 'est', 'en', 'un', 'une', 'du', 'dans'
]);

export const analyzeKeywordDensity = (text: string): Array<KeywordDensity> => {
  if (!text.trim()) return [];

  const words = text.toLowerCase()
    .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '')
    .split(/\s+/)
    .filter(word => 
      word.length >= MIN_WORD_LENGTH && 
      !EXCLUDED_WORDS.has(word)
    );

  if (words.length === 0) return [];

  const wordCount = words.length;
  const frequency: { [key: string]: number } = {};

  words.forEach(word => {
    frequency[word] = (frequency[word] || 0) + 1;
  });

  return Object.entries(frequency)
    .map(([word, count]) => ({
      word,
      count,
      percentage: (count / wordCount) * 100
    }))
    .sort((a, b) => b.count - a.count)
    .slice(0, 10);
};