<?php

namespace Drupal\ai_tmgmt\Plugin\tmgmt\Translator;

use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\ai\OperationType\Chat\ChatInput;
use Drupal\ai\OperationType\Chat\ChatMessage;
use Drupal\ai\Utility\TextChunkerInterface;
use Drupal\ai\Utility\TokenizerInterface;
use Drupal\tmgmt\ContinuousTranslatorInterface;
use Drupal\tmgmt\Data;
use Drupal\tmgmt\JobInterface;
use Drupal\tmgmt\JobItemInterface;
use Drupal\tmgmt\TMGMTException;
use Drupal\tmgmt\Translator\AvailableResult;
use Drupal\tmgmt\TranslatorInterface;
use Drupal\tmgmt\TranslatorPluginBase;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * AI translator plugin.
 *
 * @TranslatorPlugin(
 *   id = "ai",
 *   label = @Translation("AI"),
 *   description = @Translation("AI Translator service."),
 *   ui = "Drupal\ai_tmgmt\AiTranslatorUi",
 *   logo = "icons/ai-module-logo.jpg",
 * )
 */
class AiTranslator extends TranslatorPluginBase implements ContainerFactoryPluginInterface, ContinuousTranslatorInterface {

  /**
   * Constructs the AI Translator.
   *
   * @param array $configuration
   *   A configuration array containing information about the plugin instance.
   * @param string $plugin_id
   *   The plugin_id for the plugin instance.
   * @param array $plugin_definition
   *   The plugin implementation definition.
   * @param \Drupal\Core\Language\LanguageManagerInterface $languageManager
   *   The language manager.
   * @param \Drupal\tmgmt\Data $dataHelper
   *   Data helper service.
   * @param \Drupal\ai\Utility\TokenizerInterface $tokenizer
   *   The tokenizer.
   * @param \Drupal\ai\Utility\TextChunkerInterface $textChunker
   *   The text chunker.
   */
  public function __construct(
    array $configuration,
    string $plugin_id,
    array $plugin_definition,
    protected LanguageManagerInterface $languageManager,
    protected Data $dataHelper,
    protected TokenizerInterface $tokenizer,
    protected TextChunkerInterface $textChunker,
  ) {
    parent::__construct($configuration, $plugin_id, $plugin_definition);
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static(
      $configuration,
      $plugin_id,
      $plugin_definition,
      $container->get('language_manager'),
      $container->get('tmgmt.data'),
      $container->get('ai.tokenizer'),
      $container->get('ai.text_chunker'),
    );
  }

  /**
   * {@inheritdoc}
   */
  public function checkAvailable(TranslatorInterface $translator): AvailableResult {
    if ($translator->getSetting('chat_model')) {
      return AvailableResult::yes();
    }

    return AvailableResult::no($this->t('@translator is not available. Make sure it is properly <a href=:configured>configured</a>.', [
      '@translator' => $translator->label(),
      ':configured' => $translator->toUrl()->toString(),
    ]));
  }

  /**
   * {@inheritdoc}
   */
  public function requestTranslation(JobInterface $job): void {
    $this->requestJobItemsTranslation($job->getItems());
    if (!$job->isRejected()) {
      $job->submitted('The translation job has been submitted.');
    }
  }

  /**
   * Split HTML into smaller chunks.
   *
   * @param array|string $text
   *   The text.
   * @param int $maxChunkTokens
   *   The maximum number of tokens.
   *
   * @return array
   *   The chunks of html.
   */
  public function htmlSplitter(array|string $text, int $maxChunkTokens): array {
    $doc = new \DOMDocument();
    @$doc->loadHTML(\mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'));

    $currentSegment = "";
    $currentTokens = 0;
    $segments = [];

    foreach ($doc->getElementsByTagName('body')->item(0)->childNodes as $node) {
      $nodeHTML = $doc->saveHTML($node);
      $tokens = $this->tokenizer->countTokens($nodeHTML);

      if ($currentTokens + $tokens > $maxChunkTokens) {
        $segments[] = $currentSegment;
        $currentSegment = "";
        $currentTokens = 0;
      }

      $currentTokens += $tokens;
      $currentSegment .= $nodeHTML;
    }

    if (!empty($currentSegment)) {
      $segments[] = $currentSegment;
    }
    return $segments;
  }

  /**
   * Get all text nodes.
   *
   * @param mixed $node
   *   The DOM Node.
   *
   * @return array
   *   The text nodes.
   */
  protected function getAllTextNodes(mixed $node): array {
    $textNodes = [];
    if ($node->nodeType == \XML_TEXT_NODE) {
      $textNodes[] = $node;
    }
    elseif ($node->nodeType == \XML_ELEMENT_NODE) {
      foreach ($node->childNodes as $child) {
        $textNodes = \array_merge($textNodes, $this->getAllTextNodes($child));
      }
    }
    return $textNodes;
  }

  /**
   * Check if a string contains HTML.
   *
   * @param string $string
   *   The string to check.
   *
   * @return bool
   *   If the given string contains HTML.
   */
  protected function isHtml(string $string): bool {
    return \preg_match("/<[^<]+>/", $string, $m) != 0;
  }

  /**
   * {@inheritdoc}
   */
  public function getSupportedRemoteLanguages(TranslatorInterface $translator): array {
    $languages = [];
    $site_languages = $this->languageManager->getLanguages();
    foreach ($site_languages as $langcode => $language) {
      $languages[$langcode] = $language->getName();
    }
    return $languages;
  }

  /**
   * {@inheritdoc}
   */
  public function getSupportedTargetLanguages(TranslatorInterface $translator, $source_language): array {
    $languages = $this->getSupportedRemoteLanguages($translator);
    // There are no language pairs, any supported language can be translated
    // into the others. If the source language is part of the languages,
    // then return them all, just remove the source language.
    if (\array_key_exists($source_language, $languages)) {
      unset($languages[$source_language]);
      return $languages;
    }
    return [];
  }

  /**
   * {@inheritdoc}
   */
  public function hasCheckoutSettings(JobInterface $job): bool {
    return FALSE;
  }

  /**
   * Local method to do request to AI service.
   *
   * @param \Drupal\tmgmt\TranslatorInterface $translator
   *   The translator entity to get the settings from.
   * @param string $action
   *   Action to be performed [translate, languages, detect].
   * @param array $request_query
   *   (Optional) Additional query params to be passed into the request.
   * @param array $options
   *   (Optional) Additional options that will be passed into the HTTP Request.
   *
   * @return string
   *   Translated string.
   */
  protected static function doRequest(
    TranslatorInterface $translator,
    string $action,
    array $request_query = [],
    array $options = [],
  ): string {
    if (!\in_array($action, ['translate', 'languages'], TRUE)) {
      throw new TMGMTException('Invalid action requested: @action', ['@action' => $action]);
    }

    $chunk = $request_query['text'];
    $settings = $translator->getSettings();
    $site_languages = \Drupal::languageManager()->getLanguages();
    $prompt = $settings['prompt'] ?? 'Translate from %source% into %target% language';

    // Replace the source and target language in the prompt.
    $system_prompt = str_replace(
      ['%source%', '%target%'],
      [
        $site_languages[$request_query['source']]->getName(),
        $site_languages[$request_query['target']]->getName(),
      ],
      $prompt,
    );

    /** @var \Drupal\ai\AiProviderPluginManager $provider_manager */
    $provider_manager = \Drupal::service('ai.provider');
    /** @var \Drupal\ai\OperationType\Chat\ChatInterface $provider */
    $provider = $provider_manager->loadProviderFromSimpleOption($settings['chat_model']);
    $model_id = $provider_manager->getModelNameFromSimpleOption($settings['chat_model']);
    $messages = new ChatInput([
      new chatMessage('system', $system_prompt),
      new chatMessage('user', $chunk),
    ]);
    return $provider->chat($messages, $model_id)->getNormalized()->getText();
  }

  /**
   * {@inheritdoc}
   */
  public function requestJobItemsTranslation(array $job_items) {
    /** @var \Drupal\tmgmt\Entity\Job $job */
    $job = \reset($job_items)->getJob();
    $settings = $job->getTranslator()->getSettings();
    $this->tokenizer->setModel($settings['tokenizer_model']);
    $this->textChunker->setModel($settings['tokenizer_model']);
    $maxChunkTokens = (int) $settings['advanced']['max_tokens'] ?? 1024;
    $operations = [];

    foreach ($job_items as $job_item) {
      if ($job->isContinuous()) {
        $job_item->active();
      }
      // Pull the source data array through the job and flatten it.
      $data = $this->dataHelper->filterTranslatable($job_item->getData());

      $texts = [];
      $keys_sequence = [];

      // Build AI query param and preserve initial array keys.
      foreach ($data as $key => $value) {
        // Split the long text into chunks.
        if ($this->isHtml($value['#text'])) {
          $chunks = $this->htmlSplitter($value['#text'], $maxChunkTokens);
        }
        else {
          $chunks = $this->textChunker->chunkText($value['#text'], $maxChunkTokens, 0);
        }
        $texts[$key] = $chunks;
        $keys_sequence[] = $key;
      }

      $batch = [
        'title' => 'Translating job items',
        'finished' => [AiTranslator::class, 'batchFinished'],
      ];

      foreach ($texts as $key => $chunks) {
        foreach ($chunks as $chunk) {
          if (\trim($chunk) == "") {
            continue;
          }
          // Build operations array.
          $arguments = [$job, $key, $chunk, $keys_sequence];
          $operations[] = [
            '\Drupal\ai_tmgmt\Plugin\tmgmt\Translator\AiTranslator::batchRequestTranslation',
            $arguments,
          ];
        }
      }

      // Add beforeBatchFinished operation.
      $arguments = [$job_item];
      $operations[] = [
        '\Drupal\ai_tmgmt\Plugin\tmgmt\Translator\AiTranslator::beforeBatchFinished',
        $arguments,
      ];
    }

    // Set batch operations.
    if ($operations) {
      $batch['operations'] = $operations;
      \batch_set($batch);
    }
  }

  /**
   * Batch 'operation' callback for requesting translation.
   *
   * @param \Drupal\tmgmt\JobInterface $job
   *   The tmgmt job entity.
   * @param string $data_key
   *   The data key.
   * @param string $text
   *   The text to be translated.
   * @param array $keys_sequence
   *   Array of field name keys.
   * @param array $context
   *   The sandbox context.
   */
  public static function batchRequestTranslation(
    JobInterface $job,
    string $data_key,
    string $text,
    array $keys_sequence,
    array &$context,
  ): void {
    $translator = $job->getTranslator();

    // Build query params.
    $query_params = [
      'source' => $job->getSourceLangcode(),
      'target' => $job->getTargetLangcode(),
      'text' => $text,
    ];
    $result = self::doRequest($translator, 'translate', $query_params);

    if (!isset($context['results']['translation'])) {
      $context['results']['translation'] = [];
    }

    if (isset($context['results']['translation'][$data_key]) && $context['results']['translation'][$data_key]['#text'] != NULL) {
      $context['results']['translation'][$data_key]['#text'] .= "\n" . $result;
    }
    else {
      $context['results']['translation'][$data_key]['#text'] = $result;
    }
  }

  /**
   * Batch 'operation' callback.
   *
   * @param \Drupal\tmgmt\JobItemInterface $job_item
   *   The job item.
   * @param array $context
   *   The sandbox context.
   */
  public static function beforeBatchFinished(JobItemInterface $job_item, &$context): void {
    $context['results']['job_item'] = $job_item;
  }

  /**
   * Batch 'operation' callback.
   *
   * @param bool $success
   *   Batch success.
   * @param array $results
   *   Results.
   * @param array $operations
   *   Operations.
   */
  public static function batchFinished(bool $success, array $results, array $operations): void {
    $tmgmtData = \Drupal::service('tmgmt.data');

    if (isset($results['job_item']) && $results['job_item'] instanceof JobItemInterface) {
      $job_item = $results['job_item'];
      $job_item->addTranslatedData($tmgmtData->unflatten($results['translation']));
      $job = $job_item->getJob();
      \tmgmt_write_request_messages($job);
    }
  }

}
