const articlesFinder = function (opts) {
  const generateId = () => Math.random().toString(36).substr(2, 9);

  const contentRegex = /^(?:(?:Section\s+)?(\d+\.\d{1,2})\s+(.*?))(?=\.|$)/i;
  const articleRegex = /^Article\s+(?:[IVX]+|[0-9]+)(?:\s*\.\s*[A-Z][A-Za-z\s]*|\s+[A-Z][A-Za-z\s]*)?/i;

  /* const getEquivalentTerms = function (term) {
     return [term];
   };
 
   const initAlternativeTerms = function (word, paragraphs) {
     const equivalentTerms = getEquivalentTerms(word);
     return equivalentTerms.map(term => ({
       word: term,
       match: term,
       id: generateId()
     }));
   };*/

  const findSections = (paragraphs) => {
    let articles = new Map();
    let currentArticle = null;

    paragraphs.forEach((para, index) => {
      // Only process sections if we're in the TOC
      if (!para.isToc) {
        return;
      }

      // Test against section regex
      const articleMatch = para.text.match(articleRegex);
      if (articleMatch) {
        const articleNumber = articleMatch[0];
  
        const articleTitle = articleNumber.trim();
        const key = articleNumber.trim().toLowerCase().replace(/\s+/g, '');

        if (!articles.has(key)) {
          currentArticle = {
            id: generateId(),
            match: articleTitle.split(' '),
            word: articleTitle.split(' ').slice(0, 2).join(' '),
            definition: null,
            definedDefinition: [],
            content: [],
            definedTableDetected: false,
            title: articleTitle,
            alternativeTerms: [],
            type: "article",
            number:index,
            sections: []
          };
          articles.set(key, currentArticle);
        } 
      }
    });


    let isMatchedArticle = null


    paragraphs.forEach(para => {
        // Check for new Article
        const articleMatch = para.text.match(articleRegex);
        if (articleMatch) {
            const key = articleMatch[0].trim().toLowerCase().replace(/\s+/g, '');
            isMatchedArticle = articles.get(key);
            return;
        }
        
        // Check for Section
        const contentMatch = para.text.match(contentRegex);
        if (contentMatch && isMatchedArticle) {
            const sectionNumber = contentMatch[1];
            const sectionTitle = contentMatch[2];
            
            isMatchedArticle.sections.push(contentMatch[0]);
            isMatchedArticle.definedDefinition.push({
              html: `<p style="margin-top: 0; margin-bottom: 8px;">${para.text.replace(/\s+\d+$/, '')}</p>`,
              text: para.text,
              isParagraph: para.isParagraph
          });
        }
    });
console.log(articles.values())
    return Array.from(articles.values()).sort((a, b) => a.number - b.number);
  };

  return {
    getArticles: function (dom) {
      let isToc = false;
      let isSection = false;

      const paragraphs = Array.from(dom.querySelectorAll("p, tr, h1, h2, h3, h4, h5, h6"))
        .filter(item => item.textContent?.trim())
        .map(item => {
          const text = item.textContent?.replace(/\s+/g, ' ').trim() || '';
          
          if (text.toLowerCase().match(/^table\s+of\s+contents/i)) {
            isToc = true;
          } else if (text.match(/^(Exhibit|Annex|Schedule|Index of Defined Terms)\s*(?:[A-Z0-9]+)?/i)) {
            isToc = false;
          }
          
          return {
            html: item.outerHTML?.replace(/&nbsp;/g, ' ').trim() || '',
            text,
            isParagraph: ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(item.localName),
            tableNestingLevel: (item.closest('table') ? 1 : 0),
            isToc,
            isSection
          };
        }).filter(item => item.isToc);
      return findSections(paragraphs)
    }
  };
};

export default articlesFinder;
