<?php

namespace Drupal\ai_interpolator_scraping_bot\Plugin\AiInterPolatorFieldRules;

use Drupal\ai_interpolator\Annotation\AiInterpolatorFieldRule;
use Drupal\ai_interpolator\PluginInterfaces\AiInterpolatorFieldRuleInterface;
use Drupal\ai_interpolator_scraping_bot\ScrapingBot;
use Drupal\Core\Entity\ContentEntityInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\Session\AccountProxyInterface;
use Drupal\Core\Utility\Token;
use Drupal\file\FileRepositoryInterface;
use ivan_boring\Readability\Configuration;
use ivan_boring\Readability\Readability;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * The rules for a image field.
 *
 * @AiInterpolatorFieldRule(
 *   id = "ai_interpolator_scraping_bot_image",
 *   title = @Translation("ScrapingBot Image Crawler"),
 *   field_rule = "image",
 *   target = "file"
 * )
 */
class ImageCrawler extends AiInterpolatorFieldRule implements AiInterpolatorFieldRuleInterface, ContainerFactoryPluginInterface {

  /**
   * ScrapingBot API Caller.
   */
  private ScrapingBot $scrapingBot;

  /**
   * The entity type manager.
   */
  public EntityTypeManagerInterface $entityManager;

  /**
   * The current user.
   */
  public AccountProxyInterface $currentUser;

  /**
   * The File System interface.
   */
  public FileSystemInterface $fileSystem;

  /**
   * The File Repo.
   */
  public FileRepositoryInterface $fileRepo;

  /**
   * The token system to replace and generate paths.
   */
  public Token $token;

  /**
   * Construct an image field.
   *
   * @param array $configuration
   *   Inherited configuration.
   * @param string $plugin_id
   *   Inherited plugin id.
   * @param mixed $plugin_definition
   *   Inherited plugin definition.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityManager
   *   The entity type manager.
   * @param \Drupal\Core\File\FileSystemInterface $fileSystem
   *   The File system interface.
   * @param \Drupal\Core\Utility\Token $token
   *   The token system.
   * @param \Drupal\file\FileRepositoryInterface $fileRepo
   *   The File repo.
   * @param \Drupal\Core\Session\AccountProxyInterface $currentUser
   *   The current user.
   * @param \Drupal\ai_interpolator_scraping_bot\ScrapingBot $scrapingBot
   *   The ScrapingBot requester.
   */
  public function __construct(
    array $configuration,
    $plugin_id,
    $plugin_definition,
    EntityTypeManagerInterface $entityManager,
    FileSystemInterface $fileSystem,
    Token $token,
    FileRepositoryInterface $fileRepo,
    AccountProxyInterface $currentUser,
    ScrapingBot $scrapingBot,
  ) {
    parent::__construct($configuration, $plugin_id, $plugin_definition);
    $this->entityManager = $entityManager;
    $this->fileSystem = $fileSystem;
    $this->fileRepo = $fileRepo;
    $this->currentUser = $currentUser;
    $this->token = $token;
    $this->scrapingBot = $scrapingBot;
  }

  /**
   * {@inheritDoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static(
      $configuration,
      $plugin_id,
      $plugin_definition,
      $container->get('entity_type.manager'),
      $container->get('file_system'),
      $container->get('token'),
      $container->get('file.repository'),
      $container->get('current_user'),
      $container->get('ai_interpolator_scraping_bot.api'),
    );
  }

  /**
   * {@inheritDoc}
   */
  public $title = 'ScrapingBot Image Crawler';

  /**
   * {@inheritDoc}
   */
  public function needsPrompt() {
    return FALSE;
  }

  /**
   * {@inheritDoc}
   */
  public function advancedMode() {
    return FALSE;
  }

  /**
   * {@inheritDoc}
   */
  public function placeholderText() {
    return "";
  }

  /**
   * {@inheritDoc}
   */
  public function allowedInputs() {
    return ['link'];
  }

  /**
   * {@inheritDoc}
   */
  public function extraAdvancedFormFields(ContentEntityInterface $entity, FieldDefinitionInterface $fieldDefinition) {
    $form['interpolator_use_chrome'] = [
      '#type' => 'checkbox',
      '#title' => $this->t('Use Chrome'),
      '#description' => $this->t("Use Chrome when scraping"),
      '#default_value' => $fieldDefinition->getConfig($entity->bundle())->getThirdPartySetting('ai_interpolator', 'interpolator_use_chrome', TRUE),
      '#weight' => -15,
    ];

    $form['interpolator_wait_for_network'] = [
      '#type' => 'checkbox',
      '#title' => $this->t('Wait for network'),
      '#description' => $this->t("Check if you want to wait for most ajax requests to finish until returning the Html content. This can slowdown or fail your scraping if some requests are never ending only use if really needed to get some price loaded asynchronously for example."),
      '#default_value' => $fieldDefinition->getConfig($entity->bundle())->getThirdPartySetting('ai_interpolator', 'interpolator_wait_for_network', FALSE),
      '#weight' => -14,
    ];

    $form['interpolator_proxy_country'] = [
      '#type' => 'select',
      '#options' => ScrapingBot::$proxyCountries,
      '#title' => $this->t('Proxy Country'),
      '#description' => $this->t("Where should the scraping take place."),
      '#default_value' => $fieldDefinition->getConfig($entity->bundle())->getThirdPartySetting('ai_interpolator', 'interpolator_proxy_country', 'US'),
      '#weight' => -13,
    ];

    $form['interpolator_use_premium_proxy'] = [
      '#type' => 'checkbox',
      '#title' => $this->t('Use Premium Proxy'),
      '#description' => $this->t("Use Premium Proxy when scraping. This is VERY expensive."),
      '#default_value' => $fieldDefinition->getConfig($entity->bundle())->getThirdPartySetting('ai_interpolator', 'interpolator_use_premium_proxy', FALSE),
      '#weight' => -12,
    ];

    return $form;
  }

  /**
   * {@inheritDoc}
   */
  public function generate(ContentEntityInterface $entity, FieldDefinitionInterface $fieldDefinition, array $interpolatorConfig) {
    $options['useChrome'] = $interpolatorConfig['use_chrome'];
    $options['waitForNetworkRequests'] = $interpolatorConfig['wait_for_network'];
    $options['proxyCountry'] = $interpolatorConfig['proxy_country'];
    $options['premiumProxy'] = $interpolatorConfig['use_premium_proxy'];
    $rawHtml = $this->scrapingBot->scrapeRaw($entity->{$interpolatorConfig['base_field']}->uri, $options);
    $readability = new Readability(new Configuration());
    $done = $readability->parse($rawHtml);
    return ['value' => $done ? $readability->getImage() : ''];
  }

  /**
   * {@inheritDoc}
   */
  public function verifyValue(ContentEntityInterface $entity, $value, FieldDefinitionInterface $fieldDefinition) {
    // Should be an url.
    if (!filter_var($value, FILTER_VALIDATE_URL)) {
      return FALSE;
    }
    // Otherwise it is ok.
    return TRUE;
  }

  /**
   * {@inheritDoc}
   */
  public function storeValues(ContentEntityInterface $entity, array $values, FieldDefinitionInterface $fieldDefinition) {
    $config = $fieldDefinition->getConfig($entity->bundle())->getSettings();
    // Transform string to boolean.
    $fileEntities = [];

    // Successful counter, to only download as many as max.
    $successFul = 0;
    foreach ($values as $value) {
      // Save filename.
      $fileName = explode('?', basename($value))[0];
      // If no ending exists.
      if (!strstr($fileName, '.')) {
        $fileName .= '.jpg';
      }
      // Everything validated, then we prepare the file path to save to.
      $filePath = $this->token->replace($config['uri_scheme'] . '://' . rtrim($config['file_directory'], '/')) . '/' . $fileName;
      // Create file entity from string.
      $binary = file_get_contents($value);
      $file = $this->generateFileFromString($binary, $filePath);

      // If we can save, we attach it.
      if ($file) {
        // Get resolution.
        $resolution = getimagesize($file->uri->value);
        // Add to the entities list.
        $fileEntities[] = [
          'target_id' => $file->id(),
          'alt' => $config['default_image']['alt'] ?? '',
          'title' => $config['default_image']['title'] ?? '',
          'width' => $resolution[0],
          'height' => $resolution[1],
        ];

        $successFul++;
        // If we have enough images, give up.
        if ($successFul == $fieldDefinition->getFieldStorageDefinition()->getCardinality()) {
          break;
        }
      }
    }

    // Then set the value.
    $entity->set($fieldDefinition->getName(), $fileEntities);
  }

  /**
   * Generate a file entity.
   *
   * @param string $binary
   *   The source binary.
   * @param string $dest
   *   The destination.
   *
   * @return \Drupal\file\FileInterface|false
   *   The file or false on failure.
   */
  private function generateFileFromString(string $binary, string $dest) {
    // Calculate path.
    $fileName = basename($dest);
    $path = substr($dest, 0, -(strlen($fileName) + 1));
    // Create directory if not existsing.
    $this->fileSystem->prepareDirectory($path, FileSystemInterface::CREATE_DIRECTORY);
    $file = $this->fileRepo->writeData($binary, $dest, FileSystemInterface::EXISTS_RENAME);
    if ($file) {
      return $file;
    }
    return FALSE;
  }

}
