const sectionsFinder = function (opts) {
  const generateId = () => Math.random().toString(36).substr(2, 9);

  // Updated regex patterns to exclude percentage matches
  const sectionRegex = /^(?:Section\s+)?\d+\.\d{1,2}(?!\s*\(|\s*,?\s*Article|\s*%|\d*%).*?(?=\.|$)/i;
  const contentRegex = /^(?:(?:Section\s+)?(\d+\.\d{1,2})(?!\s*\(|\s*,?\s*Article|\s*%|\d*%)\s*(.*?))(?=\.|$)/i;
  const articleRegex = /^Article\s+[A-Z0-9]+/i;

  /* const getEquivalentTerms = function (term) {
     return [term];
   };
 
   const initAlternativeTerms = function (word, paragraphs) {
     const equivalentTerms = getEquivalentTerms(word);
     return equivalentTerms.map(term => ({
       word: term,
       match: term,
       id: generateId()
     }));
   };*/

  const findSections = (paragraphs) => {
    let sections = new Map();
    let currentSection = null;

    paragraphs.forEach((para, index) => {
      // Only process sections if we're in the TOC
      if (!para.isToc) {
        return;
      }

      // Test against section regex
      const sectionMatch = para.text.match(contentRegex);
      if (sectionMatch) {
        const sectionNumber = sectionMatch[1];
  
        const sectionTitle = `Section ${sectionNumber} ${(sectionMatch[2] || '').trim().replace(/\s+\d+$/, '')}`;
        const word = `${sectionMatch[0].includes('Section') ? 'Section' : ''} ${sectionNumber}`;

        if (!sections.has(sectionNumber)) {
          currentSection = {
            id: generateId(),
            match: [sectionTitle, word, sectionNumber],
            word: word,
            definition: null,
            definedDefinition: [],
            content: [],
            definedTableDetected: false,
            title: sectionTitle,
            alternativeTerms: [],
            type: "section",
            sectionNumber: sectionNumber
          };
          sections.set(sectionNumber, currentSection);
        } 
      }
    });
    let isMatchedSection = null
    let isSignaturePage = false;


    paragraphs.forEach(para => {
    

        if (para.text.toLowerCase().match(/\[Signature page(?: follows)?\]/i)) {
            isSignaturePage = true;
            return;
        }

        // Skip if we're in TOC section or after Signature page
        if (para.isToc || isSignaturePage) {
            return;
        }

        const contentMatch = para.text.match(contentRegex);
        if (contentMatch) {
            isMatchedSection = sections.get(contentMatch[1]);
        }
        
        if (isMatchedSection) {
            const cleanText = para.text.trim().replace(/\s+\d+$/, '');
       
            
            // Skip if text matches "Article X.x" pattern at the start
            if (
                para.isParagraph && 
                para.isSection &&
                cleanText !== isMatchedSection.word && 
                cleanText !== isMatchedSection.title) {
                isMatchedSection.definedDefinition.push({
                    html: para.html,
                    text: para.text,
                    isParagraph: para.isParagraph
                });
            }
        }
    });

    return Array.from(sections.values());
  };

  return {
    getSections: function (dom) {
      let isToc = false;
      let isSection = false;
      let isInArticle = false;  // Add this flag at the start of getSections

      const paragraphs = Array.from(dom.querySelectorAll("p, tr, h1, h2, h3, h4, h5, h6"))
        .filter(item => item.textContent?.trim())
        .map(item => {
          const text = item.textContent?.replace(/\s+/g, ' ').trim() || '';
          
          // Track if we're in a section and not under an article
          if (!isToc) {
            const sectionMatch = text.match(contentRegex);
            const articleMatch = text.match(articleRegex);
            
            if (sectionMatch) {
              isInArticle = false;  // Reset article flag when we hit a new section
              isSection = true;
            } else if (articleMatch) {
              isInArticle = true;   // Set article flag when we hit an article
              isSection = false;
            } else {
              // Content inherits previous isSection state, but only if not in an article
              isSection = !isInArticle;
            }
          }
         
          if (text.toLowerCase().match(/^table\s+of\s+contents/i)) {
            isToc = true;
          } else if (text.match(/^(Exhibit|Annex|Schedule|Index of Defined Terms)\s*(?:[A-Z0-9]+)?/i)) {
            isToc = false;
          }
          
          return {
            html: item.outerHTML?.replace(/&nbsp;/g, ' ').trim() || '',
            text,
            isParagraph: ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(item.localName),
            tableNestingLevel: (item.closest('table') ? 1 : 0),
            isToc,
            isSection
          };
        });
      return findSections(paragraphs)
    }
  };
};

export default sectionsFinder;
