<?php

namespace Drupal\ai_agents\Plugin\AiFunctionCall;

use Drupal\Core\Plugin\Context\ContextDefinition;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Drupal\ai\Attribute\FunctionCall;
use Drupal\ai\Base\FunctionCallBase;
use Drupal\ai\Service\FunctionCalling\ExecutableFunctionCallInterface;
use Drupal\ai_agents\PluginInterfaces\AiAgentContextInterface;
use Drupal\ai_agents\Traits\WebScraperTrait;

/**
 * Plugin implementation of the web scraper function.
 */
#[FunctionCall(
  id: 'ai_agent:web_scraper',
  function_name: 'ai_agents_web_scraper',
  name: 'Web Scraper',
  description: 'This method can take an url and look at it and scrape that webpage.',
  group: 'information_tools',
  context_definitions: [
    'url' => new ContextDefinition(
      data_type: 'string',
      label: new TranslatableMarkup("URL"),
      description: new TranslatableMarkup("The full url to scrape including the protocol."),
      required: TRUE,
    ),
    'scrape_tool' => new ContextDefinition(
      data_type: 'string',
      label: new TranslatableMarkup("Scrape Tool"),
      description: new TranslatableMarkup("The scrape tool to use."),
      required: FALSE,
      default_value: 'simple_crawler',
      constraints: [
        'AllowedValues' => [
          'simple_crawler',
          'scrapingbot',
        ],
      ],
    ),
    'use_readable' => new ContextDefinition(
      data_type: 'boolean',
      label: new TranslatableMarkup("Use Readable"),
      description: new TranslatableMarkup("Use a readable version of the html. Meaning this will extract the text from the html."),
      required: FALSE,
      default_value: TRUE,
    ),
    'use_chrome' => new ContextDefinition(
      data_type: 'boolean',
      label: new TranslatableMarkup("Use Chrome"),
      description: new TranslatableMarkup("Use a chrome browser to scrape the page. (scrapingbot only)"),
      required: FALSE,
      default_value: TRUE,
    ),
    'premium_proxy' => new ContextDefinition(
      data_type: 'boolean',
      label: new TranslatableMarkup("Premium Proxy"),
      description: new TranslatableMarkup("Use a premium proxy to scrape the page. (scrapingbot only)"),
      required: FALSE,
      default_value: FALSE,
    ),
    'proxy_country' => new ContextDefinition(
      data_type: 'string',
      label: new TranslatableMarkup("Proxy Country"),
      description: new TranslatableMarkup("The two character country code to use for the proxy. (scrapingbot only)"),
      required: FALSE,
      default_value: 'US',
    ),
    'wait_for_network_requests' => new ContextDefinition(
      data_type: 'boolean',
      label: new TranslatableMarkup("Wait For Network Requests"),
      description: new TranslatableMarkup("Wait for network requests to finish before returning the html. (scrapingbot only)"),
      required: FALSE,
      default_value: TRUE,
    ),
  ],
)]
class WebScraper extends FunctionCallBase implements ExecutableFunctionCallInterface, AiAgentContextInterface {

  use WebScraperTrait;

  /**
   * The scraped html.
   *
   * @var string
   */
  protected string $html = "";

  /**
   * {@inheritdoc}
   */
  public function execute() {
    // Collect the context values.
    $url = $this->getContextValue('url');
    $scrape_tool = $this->getContextValue('scrape_tool');
    $use_readable = $this->getContextValue('use_readable');
    $use_chrome = $this->getContextValue('use_chrome');
    $premium_proxy = $this->getContextValue('premium_proxy');
    $proxy_country = $this->getContextValue('proxy_country');
    $wait_for_network_requests = $this->getContextValue('wait_for_network_requests');

    $success = $this->loadTool($scrape_tool);
    if (!$success) {
      $this->html = "Failed to load any scraper tools, please install one.";
      return;
    }
    // If its scrapingbot, create a config array.
    $config = [];
    if ($scrape_tool == 'scrapingbot') {
      $config = [
        'useChrome' => $use_chrome,
        'premiumProxy' => $premium_proxy,
        'proxyCountry' => $proxy_country,
        'waitForNetworkRequests' => $wait_for_network_requests,
      ];
    }
    $this->html = $this->scrape($url, $use_readable, $config);
  }

  /**
   * {@inheritdoc}
   */
  public function getReadableOutput(): string {
    return $this->html;
  }

}
