<?php

namespace Drupal\ai_automator_extractor\Plugin\AiAutomatorType;

use Drupal\ai_automators\Attribute\AiAutomatorType;
use Drupal\ai_automators\PluginBaseClasses\ExternalBase;
use Drupal\ai_automators\PluginInterfaces\AiAutomatorTypeInterface;
use Drupal\Core\Entity\ContentEntityInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\StringTranslation\TranslatableMarkup;

/**
 * The rules for a Link field.
 */
#[AiAutomatorType(
  id: 'ai_automator_extractor_link',
  label: new TranslatableMarkup('Extractor: Link'),
  field_rule: 'link',
  target: '',
)]
class LinkExtractor extends ExternalBase implements AiAutomatorTypeInterface {

  /**
   * {@inheritDoc}
   */
  public $title = 'Extractor: Link';

  /**
   * {@inheritDoc}
   */
  public function needsPrompt() {
    return FALSE;
  }

  /**
   * {@inheritDoc}
   */
  public function advancedMode() {
    return FALSE;
  }

  /**
   * {@inheritDoc}
   */
  public function placeholderText() {
    return "";
  }

  /**
   * {@inheritDoc}
   */
  public function extraAdvancedFormFields(ContentEntityInterface $entity, FieldDefinitionInterface $fieldDefinition, FormStateInterface $formState, array $defaultValues = []) {
    $form['automator_extractor_disallow_extensions'] = [
      '#type' => 'textfield',
      '#title' => 'Disallowed Extensions',
      '#description' => $this->t('A space separated list of all extensions to skip.'),
      '#default_value' => $defaultValues['automator_extractor_disallow_extensions'] ?? "css js jpg jpeg gif tiff png pdf txt mp3 mp4 mov svg",
      '#weight' => 24,
    ];

    return $form;
  }

  /**
   * {@inheritDoc}
   */
  public function generate(ContentEntityInterface $entity, FieldDefinitionInterface $fieldDefinition, array $automatorConfig) {
    $disallowed = explode(" ", $automatorConfig['extractor_disallow_extensions']);

    $values = [];
    foreach ($entity->{$automatorConfig['base_field']} as $wrapperEntity) {
      preg_match_all('/(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])/i', $wrapperEntity->value, $matches);
      $values = [];
      if (isset($matches[0][0])) {
        foreach ($matches[0] as $uri) {
          $allow = TRUE;
          foreach ($disallowed as $ext) {
            if (substr($uri, -(strlen($ext) + 1)) == '.' . $ext) {
              $allow = FALSE;
            }
          }
          if ($allow) {
            $values[$uri] = $uri;
          }
        }
      }
    }
    return array_values($values);
  }

  /**
   * {@inheritDoc}
   */
  public function verifyValue(ContentEntityInterface $entity, $value, FieldDefinitionInterface $fieldDefinition, array $automatorConfig) {
    // Has to have a link an be valid.
    if (empty($value) || !filter_var($value, FILTER_VALIDATE_URL)) {
      return FALSE;
    }

    // Otherwise it is ok.
    return TRUE;
  }

  /**
   * {@inheritDoc}
   */
  public function storeValues(ContentEntityInterface $entity, array $values, FieldDefinitionInterface $fieldDefinition, array $automatorConfig) {
    $config = $fieldDefinition->getConfig($entity->bundle())->getSettings();
    foreach ($values as $key => $value) {
      $new['uri'] = $value;
      if ($config['title'] == 0) {
        $new['title'] = '';
      }
      $values[$key] = $new;
    }
    $entity->set($fieldDefinition->getName(), $values);
  }

}
