<?php

namespace Drupal\ai_story_teller\Plugin\AiReporterScraper;

use Drupal\ai_story_teller\Plugin\AiRScraperPluginBase;
use Symfony\Component\DomCrawler\Crawler;

/**
 * Provides On.ge Scraper.
 *
 * @AiRScraperPlugin(
 *   id = "tabula_scraper",
 *   label = @Translation("Tabula Scraper")
 * )
 */
class TabulaScraper extends AiRScraperPluginBase {

  /**
   * {@inheritdoc}
   */
  public function scrape($url): array {
    $hrefs = [];

    if ($content = $this->httpClient->get($url)) {
      $content = $content->getBody()->getContents();
      $this->crawler->addContent($content);

      if ($parentDiv = $this->crawler->filter('.PagedNewsItemList_listWrap__Jr25w.PagedNewsItemList_type-grid__3ougZ')) {
        if ($items = $parentDiv->filter('.news-item-list-item')) {
          $hrefs = $items->each(function(Crawler $node, $i) {
            $links = $node->filter('a');
            return $links->each(function(Crawler $linkNode) {
              $href = $linkNode->attr('href');
              return $href;
            });
          });
        }
      }
    }

    $data = [];
    if (count($hrefs) > 0) {
      foreach ($hrefs as $href) {
        try {
          if ($content = $this->httpClient->get($href[0])) {
            $content = $content->getBody()->getContents();
            $this->crawler->clear();
            $this->crawler->addContent($content);

            $title = $this->crawler->filter('h1.ArticleHeaderDefault_title__2lrsJ');
            $body = $this->crawler->filter('div.ArticleContent_contentTextWrapper__n-T_q content-text');
            $published = $this->crawler->filter('time');

            if ($title && $body && $published) {
              $data[] = [
                'label' => $title->text(),
                'body' => $body->html(),
                'published' => $published->attr('datetime'),
                'url' => $href[0],
              ];
            }
          }
        }
        catch (\Exception $exception) {
        }
      }
    }

    return $data;
  }

}
