import { querySelectorArray } from './utils'

// These steps specifically focus on Google Docs, but they'll run for anything
export const transformGoogleDocDOM = (dom: Document) => {
  querySelectorArray(dom, 'p.title').forEach(replaceTitleElement)
  querySelectorArray(dom, 'br').forEach(removeExtraLineBreaks)
  querySelectorArray(dom, 'a[href]').forEach(removeGoogleDocsTableOfContents)
  querySelectorArray(dom, 'a[href][id]').forEach(removeGoogleDocsComment)
  querySelectorArray(dom, 'span').forEach(trimWhitespace)
  querySelectorArray(dom, 'hr').forEach(convertPageBreaks)
  querySelectorArray(dom, 'a[href][id]').forEach((elt) =>
    convertFootnote(elt, dom)
  )
}

// Google Docs will put a BR right after a paragraph, which is redundant in our schema
// and leads to way too much space when pasting
const removeExtraLineBreaks = (elt: Element) => {
  if (
    elt.previousElementSibling?.tagName == 'P' ||
    elt.classList.contains('Apple-interchange-newline')
  ) {
    elt.remove()
  }
}

const replaceTitleElement = (elt: HTMLElement) => {
  const title = document.createElement('h1')
  title.className = 'title'
  title.innerHTML = elt.innerHTML
  elt.replaceWith(title)
}

const GOOGLE_DOCS_TOC_REGEX =
  /^https:\/\/docs\.google\.com\/document\/d\/[^/]+\/edit#heading=(h\.[^/]+)/
const removeGoogleDocsTableOfContents = (elt: HTMLElement) => {
  const href = elt.getAttribute('href')
  if (!href) return
  if (href.startsWith('#h.') || GOOGLE_DOCS_TOC_REGEX.test(href)) {
    // This is a link to a heading, so it's not gonna work (until we turn these into mentions someday)
    // If it's the only thing on a line (eg TOC), just remove it outright
    // Otherwise, just remove the link
    const parent = elt.closest('p')
    if (parent && parent.textContent == elt.textContent) {
      parent.remove()
    } else {
      elt.removeAttribute('href')
    }
  }
}

const removeGoogleDocsComment = (elt: HTMLElement) => {
  // The pointer to the comment
  if (elt.getAttribute('id')?.startsWith('cmnt_ref')) {
    const parent = elt.closest('sup')
    parent?.remove()
  } else if (elt.getAttribute('id')?.startsWith('cmnt')) {
    const parent = elt.closest('div')
    parent?.remove()
  }
}

const trimWhitespace = (elt: HTMLElement) => {
  // Google Docs indents are just a ton of nbsps
  // We want to remove these, but keep individual spaces which are key
  // for separating links from text
  elt.innerHTML = elt.innerHTML
    .replace(/(&nbsp;){2,}/g, '')
    .replace(/&nbsp;/g, ' ')
}

// These --- will be picked up by transformPasted > splitCards
const convertPageBreaks = (elt: HTMLElement) => {
  const dash = document.createElement('p')
  dash.innerHTML = '---'
  dash.className = 'hr-import' // For tests
  elt.replaceWith(dash)
}

// Matches "ftnt_ref1" to 1
const GOOGLE_FOOTNOTE_REGEX = /ftnt_ref(\d+)/
const convertFootnote = (elt: HTMLElement, dom: Document) => {
  const id = elt.getAttribute('id')
  if (!id) return
  if (id.startsWith('ftnt_ref')) {
    const footnoteId = parseInt(id.match(GOOGLE_FOOTNOTE_REGEX)?.[1] || '')
    const parent = elt.closest('sup')
    const note = dom.querySelector(`#ftnt${footnoteId}`)
    const noteContent = note?.closest('div')
    if (!footnoteId || !parent || !noteContent) return

    note?.remove()
    const replacementElt = document.createElement('div')
    replacementElt.className = 'imported-footnote'
    replacementElt.appendChild(noteContent)
    parent.replaceWith(replacementElt)
  }
}
