<?php

namespace Drupal\asciidoc_display;

use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\File\FileSystemInterface;

/**
 * AsciidocParser service.
 */
class AsciidocParser implements AsciidocParserInterface {

  /**
   * Cache service.
   *
   * @var \Drupal\Core\Cache\CacheBackendInterface;
   */
  protected $cache;

  /**
   * The file system service.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * DOM document created from HTML representing asciidoc page.
   *
   * @var \DOMDocument
   */
  protected $dom;

  /**
   * Constructs an AsciidocParser object.
   *
   * @param \Drupal\Core\Cache\CacheBackendInterface $cache
   *   The cache.asciidoc caching service.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system service.
   */
  public function __construct(CacheBackendInterface $cache, FileSystemInterface $file_system) {
    $this->cache = $cache;
    $this->fileSystem = $file_system;
    $this->dom = new \DomDocument();
  }

  /**
   * {@inheritDoc}
   */
  public function parseBareHtml(string $html, array $options = []) {
    $options += [
      'keep_toc' => FALSE,
      'toc_active' => '',
      'highlight_php' => FALSE,
      'link_prefix' => '',
      'image_prefix' => '',
      'parse_parents' => FALSE,
      'remove_h1_titles' => FALSE,
      'remove_h2_titles' => FALSE,
      'remove_source_line' => FALSE,
    ];

    // Check for previously cached results of parsing this HTML string with
    // the same set of options.
    $args = func_get_args();
    $cid = 'parse_' . md5(serialize($args));
    $cached = $this->cache->get($cid);
    if ($cached) {
      return $cached->data;
    }

    // Parsed output was not found in local storage or the cache, so we need
    // to parse it.
    $this->dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));

    // Read off the book title.
    $elem = $this->dom->getElementById('asciidoc-display-book-title');
    $info['book_title'] = $elem->nodeValue;

    // Read off the section title.
    $elem = $this->dom->getElementById('asciidoc-display-section-title');
    $info['section_title'] = $elem->nodeValue;

    // Read out and optionally remove the source files. These look like:
    // <span class="remark">Source file: config.txt</span>
    // Also, read out and remove the copyright notice. The old style for this
    // looks like: <span class="remark">Copyright notice: ...</span>.
    // For the new style, see below.
    $info['files'] = array();
    $info['copyright'] = '';
    $to_remove = [];
    foreach ($this->dom->getElementsByTagName('span') as $span) {
      if ($span->getAttribute('class') == 'remark') {
        $val = $span->nodeValue;
        $matches = array();
        if (preg_match('/^Source file: ([a-zA-Z_.0-9\-]+)$/', $val, $matches)) {
          $info['files'][] = $matches[1];
          if ($options['remove_source_line']) {
            $to_remove[] = $span;
          }
        }
        else {
          $full_span = $this->dom->saveHTML($span);
          // This is the old style of copyright notice, and is only OK for
          // English.
          if (preg_match('/Copyright notice: /i', $full_span)) {
            $span->removeAttribute('class');
            $info['copyright'] = $this->dom->saveHTML($span);
            $to_remove[] = $span;
          }
        }
      }
    }

    // Remove after first loop, so the DOMNodeList being iterated over remains
    // unmodified.
    foreach ($to_remove as $span) {
      // This is enclosed in an EM in a P tag. Remove it. We will display it on
      // every page instead.
      $pparent = $span->parentNode->parentNode;
      $pparent->parentNode->removeChild($pparent);
    }

    // Read out the new-style copyright notice and summary. These are
    // paragraph tags with a role attribute of "summary" or "copyright".
    $info['summary'] = '';
    $paragraphs = $this->dom->getElementsByTagName('p');
    foreach ($paragraphs as $paragraph) {
      $role = $paragraph->getAttribute('class');
      if ($role == 'summary' || $role == 'copyright') {
        // Remove the class. Remove the paragraph. Save it for return.
        $paragraph->removeAttribute('class');
        $info[$role] = ($role == 'copyright') ? $this->dom->saveHTML($paragraph) : $paragraph->nodeValue;
        $paragraph->parentNode->removeChild($paragraph);
      }
    }

    // Locate the image tags and record their source locations.
    $images = $this->dom->getElementsByTagName('img');
    $info['images'] = array();
    foreach ($images as $image) {
      $info['images'][] = $image->getAttribute('src');
    }

    // Modify the links and image sources.
    if ($options['link_prefix']) {
      $this->prefixUrls('a', 'href', $options['link_prefix']);
    }

    if ($options['image_prefix']) {
      $this->prefixUrls('img', 'src', $options['image_prefix']);
    }

    // Remove h1 titles, if requested.
    if ($options['remove_h1_titles']) {
      $h1s = $this->dom->getElementsByTagName('h1');
      foreach ($h1s as $h1) {
        if ($h1->getAttribute('class') == 'title') {
          $h1->parentNode->removeChild($h1);
        }
      }
    }

    // Remove h2 titles, if requested.
    if ($options['remove_h2_titles']) {
      $h2s = $this->dom->getElementsByTagName('h2');
      foreach ($h2s as $h2) {
        if ($h2->getAttribute('class') == 'title') {
          $h2->parentNode->removeChild($h2);
        }
      }
    }

    // Read off and/or remove the table of contents div, if there is one; it has
    // class "toc". Also locate the main content div, which has class
    // "asciidoc-display-main-content". And try to figure out what type of
    // page it is, based on the presence of divs of certain classes.
    $toc = NULL;
    $body = NULL;
    $has_chapter = FALSE;
    $has_topic = FALSE;
    $divs = $this->dom->getElementsByTagName('div');
    foreach ($divs as $div) {
      $class = $div->getAttribute('class');
      if (!$class) {
        continue;
      }
      $classes = array_filter(explode(' ', $class));

      // See if this is a chapter or topic page.
      $chapter_types = array('chapter', 'preface', 'appendix', 'index', 'glossary');
      foreach ($chapter_types as $ch_class) {
        if (in_array($ch_class, $classes)) {
          $has_chapter = TRUE;
          break;
        }
      }
      if (in_array('section', $classes)) {
        $has_topic = TRUE;
      }

      // Pick out or remove the table of contents.
      if (in_array('toc', $classes)) {
        if ($options['keep_toc']) {
          $toc = $div;
          // Add a class denoting that this is a table of contents that is
          // present on the page.
          $classes[] = 'asciidoc-home-toc';
          $toc->setAttribute('class', implode(' ', $classes));
        }
        else {
          $toc = $div->parentNode->removeChild($div);
        }
      }

      // Pick out the main body div.
      if (in_array('asciidoc-display-main-content', $classes)) {
        $body = $div;
      }
    }

    // Figure out the page type, based on the div classes found in the loop
    // above.
    if ($has_chapter) {
      $info['type'] = 'chapter';
    }
    elseif ($has_topic) {
      $info['type'] = 'topic';
    }
    else {
      $info['type'] = 'book';
    }

    if ($toc && $options['parse_parents']) {
      // Parse UL in the table of contents into a parents array.
      $children = $toc->childNodes;
      foreach ($children as $child) {
        if ($child->tagName == 'ul') {
          $info['parents'] = $this->calculateParents($child, 'index.html', $options['link_prefix']);
          break;
        }
      }
    }

    if ($toc && $options['toc_active']) {
      // Set any links in the table of contents to this URL to have class
      // "active", and their UL/LI parents to "active-trail".
      $links = $toc->getElementsByTagName('a');
      foreach ($links as $link) {
        $href = $link->getAttribute('href');
        if (strpos($href, $options['toc_active']) !== FALSE) {
          $this->setLinkActive($link);
        }
      }
    }

    if ($options['highlight_php']) {
      // Yes, we do need to call this several times. Sigh.
      $this->highlightXmlOnce($body);
      $this->highlightXmlOnce($body);
      $this->highlightXmlOnce($body);
      $this->highlightXmlOnce($body);
    }

    $info['body'] = $this->domElementToHtml($body);
    $info['toc'] = $this->domElementToHtml($toc);

    $item = new AsciidocItem($info, $options);

    $this->cache->set($cid, $item);
    return $item;
  }

  /**
   * Adds a prefix to internal URLs.
   *
   * @param string $tag
   *   Tag name to look for, normally either 'a' or 'img'.
   * @param string $attribute
   *   Attribute containing the URL, 'href' or 'src'.
   * @param string $prefix
   *   Prefix to put on internal links
   */
  protected function prefixUrls(string $tag, string $attribute, string $prefix) {
    $links = $this->dom->getElementsByTagName($tag);
    foreach ($links as $link) {
      $href = $link->getAttribute($attribute);
      if (!$href) {
        continue;
      }
      // Do not prefix any links with full URLs, or that already start with
      // the prefix.
      if (strpos($href, ':') !== FALSE || strpos($href, $prefix) === 0) {
        continue;
      }
      $link->setAttribute($attribute, $prefix . $href);
    }
  }

  /**
   * Recursively calculates the parents array from a table of contents div.
   *
   * @param DomElement $toc
   *   Table of contents to look through, which should be a UL element.
   * @param string $parent
   *   Parent file name for this part of the table of contents.
   * @param string $link_prefix
   *   Link prefix that may be prepended on href attributes of links, to strip
   *   off.
   * @param array $parents
   *   In-progress array of parents, if this is a recursive call.
   *
   * @return array
   *   Associative array whose keys are file names, and whose values are arrays
   *   with the file name of the parent in the table of contents (key: file), and
   *   the weight at this level of the table of contents (key: weight).
   */
  protected function calculateParents(\DOMElement $toc, string $parent, string $link_prefix, array $parents = []) {
    if (!isset($parents[$parent])) {
      $parents[$parent] = array('file' => '', 'weight' => 0);
    }

    $children = $toc->childNodes;
    $weight = -1;
    foreach ($children as $li) {
      $weight++;
      // Each LI should have an A link, and optionally a UL for another
      // level of table of contents.
      $li_children = $li->childNodes;
      $ul_to_do = FALSE;
      $this_file = '';
      foreach ($li_children as $elem) {
        if ($elem->tagName == 'a') {
          // Add this file to the parents array.
          $this_file = $elem->getAttribute('href');
          if ($link_prefix) {
            $this_file = str_replace($link_prefix, '', $this_file);
          }
          $parents[$this_file] = array('file' => $parent, 'weight' => $weight);
        }
        elseif ($elem->tagName == 'ul') {
          $ul_to_do = $elem;
        }
        // Parse the next level deep in the table of contents, and add it to
        // the parents array.
        if ($ul_to_do && $this_file) {
          $parents = $this->calculateParents($ul_to_do, $this_file, $link_prefix, $parents);
        }
      }
    }

    return $parents;
  }

  /**
   * Sets a DOM node to have class "active", and parents to "active_trail".
   *
   * @param \DOMNode $link
   *   A link that is deemed to be active.
   */
  protected function setLinkActive(\DOMNode $link) {
    // Set the link itself to have class "active".
    $class = $link->getAttribute('class');
    $classes = array_filter(explode(' ', $class));
    if (!in_array('active', $classes)) {
      $classes[] = 'active';
    }
    $link->setAttribute('class', implode(' ', $classes));

    // Set all LI elements that are parents of this to have class "active-trail".
    $node = $link;
    while ($node = $node->parentNode) {
      if (is_a($node, 'DOMElement') && ($node->tagName == 'li' || $node->tagName == 'ul')) {
        $class = $node->getAttribute('class');
        $classes = array_filter(explode(' ', $class));
        if (!in_array('active-trail', $classes)) {
          $classes[] = 'active-trail';
        }
        $node->setAttribute('class', implode(' ', $classes));
      }
    }
  }

  /**
   * Finds and highlights PHP code.
   *
   * Note: Because of how DOM elements work, you need to call this several
   * times, because when you replace nodes the array gets screwed up.
   *
   * @param \DOMNode $node
   *   DOM node object to highlight.
   */
  protected function highlightXmlOnce(\DOMNode $node) {
    $pres = $node->getElementsByTagName('pre');
    foreach ($pres as $pre) {
      $class = $pre->getAttribute('class');
      $classes = array_filter(explode(' ', $class));
      if (in_array('programlisting-php', $classes)) {
        $newcode = $this->highlightCode($pre->nodeValue);
        $newdoc = new \DOMDocument();
        $newdoc->loadHTML($newcode);
        $el = $newdoc->getElementsByTagName('code')->item(0);
        $newnode = $this->dom->importNode($el, TRUE);
        // This is the part that screws up the array of $pres. I tried keeping
        // track of what needs to be replaced, and doing it all at the end,
        // but the act of doing replaceChild changes the parent and all of
        // its children, so that doesn't work well. You just need to call this
        // function several times to make sure you get everything.
        $pre->parentNode->replaceChild($newnode, $pre);
      }
    }
  }

  /**
   * Highlights PHP code.
   *
   * @param string $code
   *   Code to highlight.
   *
   * @return string
   *   Highlighted code.
   */
  protected function highlightCode(string $code) {
    // Get rid of the &gt; and &lt; entities.
    $code = str_replace(array('&gt;', '&lt;'), array('>', '<'), trim($code));

    // Add a <?php tag to the front.
    $strip = FALSE;
    if (strpos('<?php', $code) !== 0) {
      $code = "<?php " . $code;
      $strip = TRUE;
    }

    // Highlight using native PHP code highlighter.
    $code = highlight_string($code, TRUE);

    // Strip off the initial <?php tag.
    if ($strip) {
      $code = implode('', explode('&lt;?php&nbsp;', $code, 2));
    }

    return $code;
  }

  /**
   * Render a DOMElement into an HTML string.
   *
   * @param \DOMElement $element;
   *   The DOM element to render.
   *
   * @return string
   *   HTML created from the provided DOM element.
   */
  protected function domElementToHtml($element) {
    if (!$element) {
      return '';
    }

    $doc = new \DOMDocument;
    $cloned = $element->cloneNode(TRUE);
    $doc->appendChild($doc->importNode($cloned, TRUE));
    $html = @$doc->saveHTML();
    if (!$html) {
      return '';
    }

    return $html;
  }

}
