<?php

namespace Drupal\ai_vdb_provider_opensearch;

use Drupal\Component\Serialization\Json;
use GuzzleHttp\Client;
use Drupal\Component\Utility\UrlHelper;

/**
 * OpenSearch API service for vector database operations.
 */
class Opensearch {

  /**
   * The http client.
   *
   * @var \GuzzleHttp\Client
   */
  protected Client $client;

  /**
   * API authentication.
   *
   * @var string
   */
  private string $apiKey = '';

  /**
   * The base URL.
   *
   * @var string
   */
  private string $baseUrl = '';

  /**
   * The port.
   *
   * @var int
   */
  private int $port = 9200;

  /**
   * Constructor.
   *
   * @param \GuzzleHttp\Client $client
   *   The http client.
   */
  public function __construct(Client $client) {
    $this->client = $client;
  }

  /**
   * Set the API key.
   *
   * @param string $apiKey
   *   The API key (username:password format).
   */
  public function setApiKey(string $apiKey) {
    $this->apiKey = $apiKey;
  }

  /**
   * Set the base URL.
   *
   * @param string $baseUrl
   *   The base URL.
   */
  public function setBaseUrl(string $baseUrl) {
    $this->baseUrl = $baseUrl;
  }

  /**
   * Set the port.
   *
   * @param int $port
   *   The port.
   */
  public function setPort(int $port) {
    $this->port = $port;
  }

  /**
   * Create index with vector field mappings.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param string $database_name
   *   The unused parameter for API compatibility.
   * @param int $dimension
   *   Vector dimension size.
   * @param string $metric_type
   *   The similarity metric type (cosine, l2, ip).
   * @param array $options
   *   Additional options for index creation.
   *
   * @return array
   *   The response data.
   */
  public function createCollection(string $collection_name, string $database_name, int $dimension, string $metric_type, array $options = []) {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);
    // Create index with vector search capabilities
    $settings = [
      'settings' => [
        'index' => [
          'number_of_shards' => $options['shards'] ?? 1,
          'number_of_replicas' => $options['replicas'] ?? 1,
          'knn' => TRUE,
          'max_result_window' => 20000, // Increase from default 10000
        ],
      ],
      'mappings' => [
        'properties' => [
          'vector' => [
            'type' => 'knn_vector',
            'dimension' => $dimension,
            'method' => [
              'name' => 'hnsw',
              'space_type' => $this->mapMetricType($metric_type),
              'engine' => 'nmslib',
              'parameters' => [
                'ef_construction' => 128,
                'm' => 16
              ]
            ]
          ],
        ]
      ],
    ];

    $path = $index_name;
    
    try {
      $response = $this->makeRequest($path, [], 'PUT', $settings);
      return Json::decode($response);
    }
    catch (\Exception $e) {
      // Check if it's a "resource already exists" error
      if (strpos($e->getMessage(), 'resource_already_exists_exception') !== false) {
        // Try to update the max_result_window setting on the existing index
        try {
          $settings_update = [
            'index' => [
              'max_result_window' => 20000,
            ]
          ];
          $update_path = $index_name . '/_settings';
          $update_response = $this->makeRequest($update_path, [], 'PUT', $settings_update);
          
          return [
            'acknowledged' => true,
            'index' => $index_name,
            'status' => 'exists',
            'message' => 'Index already exists, updated settings',
            'update_response' => Json::decode($update_response),
          ];
        }
        catch (\Exception $settingException) {
          // If we can't update settings, just return that the index exists
          return [
            'acknowledged' => true,
            'index' => $index_name,
            'status' => 'exists',
            'message' => 'Index already exists',
          ];
        }
      }
      // Other error, rethrow
      throw $e;
    }
  }

  /**
   * Maps metric types to OpenSearch space types.
   *
   * @param string $metric_type
   *   The metric type (COSINE, L2, IP).
   *
   * @return string
   *   OpenSearch space type.
   */
  private function mapMetricType(string $metric_type) {
    return match (strtoupper($metric_type)) {
      'COSINE' => 'cosine',
      'L2' => 'l2',
      'IP' => 'innerproduct',
      default => 'cosine',
    };
  }

  /**
   * Drop/delete an index.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The response data.
   */
  public function dropCollection(string $collection_name, string $database_name = ''): array {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);

    $response = $this->makeRequest($index_name, [], 'DELETE');
    return Json::decode($response);
  }

  /**
   * List all indices in OpenSearch.
   *
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The list of indices/collections.
   */
  public function listCollections(string $database_name = ''): array {
    // OpenSearch doesn't use database concept, so we'll ignore database_name
    $response = $this->makeRequest('_cat/indices?format=json', [], 'GET');
    $indices = Json::decode($response);

    // Format response to match expected structure
    $data = [];
    foreach ($indices as $index) {
      $data['data'][] = [
        'name' => $index['index'],
        'status' => $index['status'],
      ];
    }
    $data['code'] = 0;  // Success code

    return $data;
  }

  /**
   * Get information about an index.
   *
   * @param string $database_name
   *   Unused parameter for API compatibility.
   * @param string $collection_name
   *   The collection/index name.
   *
   * @return array
   *   The index details.
   */
  public function describeCollection(
    string $database_name = '',
    string $collection_name = '',
  ): array {
    try {
      // Get properly formatted index name
      $index_name = $this->formatIndexName($database_name, $collection_name);

      $response = $this->makeRequest($index_name, [], 'GET');
      $index_data = Json::decode($response);

      // Convert OpenSearch response to expected format
      $result = [
        'code' => 0,  // Success code
        'data' => [
          'collectionName' => $collection_name,
          'autoID' => TRUE,
          'fields' => [],
        ],
      ];
      
      // Handle case where index might exist but with different structure
      if (empty($index_data[$index_name])) {
        // Try with just index name without the prefix
        if (!empty($index_data[$collection_name])) {
          $index_name = $collection_name;
        }
        // If still not found, check if data is directly at root level
        elseif (!empty($index_data['mappings'])) {
          // Data is at the root level of the response
          $index_data = [$index_name => $index_data];
        }
      }

      // Extract mapping information
      if (!empty($index_data[$index_name]['mappings']['properties'])) {
        foreach ($index_data[$index_name]['mappings']['properties'] as $field_name => $field_info) {
          $field_type = $field_info['type'];
          $field_data = [
            'name' => $field_name,
            'type' => $field_name === 'vector' ? 'FloatVector' : $field_type,
          ];

          // Add dimension information for vector field
          if ($field_name === 'vector') {
            // Get dimension from the most appropriate place in the mapping
            $dimension = NULL;
            if (isset($field_info['dimension'])) {
              $dimension = $field_info['dimension'];
            }
            elseif (isset($field_info['dims'])) {
              $dimension = $field_info['dims'];
            }
            elseif (isset($field_info['method']['parameters']['dimensions'])) {
              $dimension = $field_info['method']['parameters']['dimensions'];
            }
            
            if ($dimension) {
              $field_data['params'] = [
                [
                  'key' => 'dim',
                  'value' => (string) $dimension,
                ],
              ];
            }
          }

          $result['data']['fields'][] = $field_data;
        }
      }

      // Add index settings
      if (!empty($index_data[$index_name]['settings']['index'])) {
        $settings = $index_data[$index_name]['settings']['index'];
        $result['data']['shardsNum'] = (int) ($settings['number_of_shards'] ?? 1);
        $result['data']['collectionID'] = $index_name;
      }

      // Add vector index information
      if (!empty($index_data[$index_name]['mappings']['properties']['vector']['method'])) {
        $vector_config = $index_data[$index_name]['mappings']['properties']['vector']['method'];
        $result['data']['indexes'] = [
          [
            'fieldName' => 'vector',
            'metricType' => $vector_config['space_type'] ?? 'cosine',
          ],
        ];
      }

      return $result;
    }
    catch (\Exception $e) {
      // Index doesn't exist or other error
      return [
        'code' => 100,  // Non-existent index
        'message' => $e->getMessage(),
      ];
    }
  }

  /**
   * Insert document into an index.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param array $data
   *   The document data including vector.
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The response data.
   */
  public function insertIntoCollection(string $collection_name, array $data, string $database_name = ''): array {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);

    // Generate a document ID if not provided
    $id = $data['id'] ?? uniqid();

    // OpenSearch expects _id to be separate from the document body
    $path = $index_name . '/_doc/' . $id;

    // Copy data for insertion, removing the id which is part of the URL
    $doc_data = $data;
    if (isset($doc_data['id'])) {
      unset($doc_data['id']);
    }

    // Ensure vector data is properly formatted
    if (isset($doc_data['vector'])) {
      // Make sure vector is an array of floats, not strings
      if (is_array($doc_data['vector'])) {
        $doc_data['vector'] = array_map('floatval', $doc_data['vector']);
      }
    }

    try {
      $response = $this->makeRequest($path, [], 'PUT', $doc_data);
      $result = Json::decode($response);

      // Format response to match expected structure
      return [
        'code' => 0, // Success code
        'data' => [
          'id' => $result['_id'] ?? $id,
        ],
      ];
    }
    catch (\Exception $e) {
      if (strpos($e->getMessage(), 'Request Entity Too Large') !== FALSE) {
        return [
          'code' => 1100,  // Size error code
          'message' => 'Document too large',
        ];
      }
      return [
        'code' => 500,  // Generic error code
        'message' => $e->getMessage(),
      ];
    }
  }

  /**
   * Delete documents from an index.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param array $ids
   *   The document IDs to delete.
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The response data.
   */
  public function deleteFromCollection(string $collection_name, array $ids, string $database_name = ''): array {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);

    // In OpenSearch, we can use the Delete By Query API
    $query = [
      'query' => [
        'terms' => [
          '_id' => $ids,
        ],
      ],
    ];

    $path = $index_name . '/_delete_by_query';
    $response = $this->makeRequest($path, [], 'POST', $query);
    $result = Json::decode($response);

    return [
      'code' => 0,  // Success code
      'data' => [
        'delete_count' => $result['deleted'] ?? 0,
      ],
    ];
  }

  /**
   * Query documents based on filters.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param array $output_fields
   *   Fields to include in results.
   * @param string $filters
   *   Filter expression in Drupal filter format.
   * @param int $limit
   *   Maximum number of results.
   * @param int $offset
   *   Result offset for pagination.
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The matching documents.
   */
  public function query(string $collection_name, array $output_fields, string $filters = 'id not in [0]', int $limit = 10, int $offset = 0, string $database_name = ''): array {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);

    // Convert the filter syntax to OpenSearch query DSL
    $query = $this->convertFilterToQuery($filters);

    // Build the search request
    $search_params = [
      'query' => $query,
      'size' => $limit,
      'from' => $offset,
    ];

    // Add _source filtering if output fields are specified
    if (!empty($output_fields) && !in_array('*', $output_fields)) {
      $search_params['_source'] = $output_fields;
    }

    $path = $index_name . '/_search';
    $response = $this->makeRequest($path, [], 'POST', $search_params);
    $results = Json::decode($response);

    // Format the response to match expected structure
    return $this->formatSearchResults($results);
  }

  /**
   * Perform vector similarity search.
   *
   * @param string $collection_name
   *   The collection/index name.
   * @param array $vector_input
   *   The query vector.
   * @param array $output_fields
   *   Fields to include in results.
   * @param string $filters
   *   Filter expression in Drupal filter format.
   * @param int $limit
   *   Maximum number of results.
   * @param int $offset
   *   Result offset for pagination.
   * @param string $database_name
   *   Unused parameter for API compatibility.
   *
   * @return array
   *   The search results.
   */
  public function search(string $collection_name, array $vector_input, array $output_fields, string $filters = '', int $limit = 10, int $offset = 0, string $database_name = '') {
    // Get properly formatted index name
    $index_name = $this->formatIndexName($database_name, $collection_name);

    // Build the knn query for OpenSearch
    $search_params = [
      'size' => $limit,
      'from' => $offset,
      'query' => [
        'knn' => [
          'vector' => [
            'vector' => $vector_input,
            'k' => $limit
          ]
        ]
      ],
      '_source' => $output_fields
    ];

    // Add filter if provided
    if (!empty($filters)) {
      $filter_query = $this->convertFilterToQuery($filters);
      // For OpenSearch with filters, use a different approach - post filter
      $search_params['post_filter'] = $filter_query;
    }
    
    // _source is already set in the search params

    $path = $index_name . '/_search';
    $response = $this->makeRequest($path, [], 'POST', $search_params);
    $results = Json::decode($response);

    // Format the response to match expected structure
    return $this->formatSearchResults($results);
  }

  /**
   * Format OpenSearch search results to match the expected structure.
   *
   * @param array $results
   *   The raw OpenSearch search results.
   *
   * @return array
   *   The formatted search results.
   */
  /**
   * Format an index name according to OpenSearch requirements.
   *
   * @param string $database_name
   *   The database name prefix.
   * @param string $collection_name
   *   The collection name.
   *
   * @return string
   *   Properly formatted OpenSearch index name.
   */
  protected function formatIndexName(string $database_name, string $collection_name): string {
    $index_name = strtolower($database_name . '_' . $collection_name);
    $index_name = preg_replace('/[^a-z0-9_]/', '_', $index_name);
    
    // Ensure the index name doesn't start with an underscore (OpenSearch restriction)
    if (strpos($index_name, '_') === 0) {
        $index_name = 'idx' . $index_name;
    }
    
    return $index_name;
  }
  
  /**
   * Format OpenSearch search results to match the expected structure.
   *
   * @param array $results
   *   The raw OpenSearch search results.
   *
   * @return array
   *   The formatted search results.
   */
  protected function formatSearchResults(array $results): array {
    $formatted = [
      'code' => 0,  // Success code
      'data' => [],
    ];

    if (!empty($results['hits']['hits'])) {
      foreach ($results['hits']['hits'] as $hit) {
        $doc = $hit['_source'];
        // Add _id to the document as 'id' if not already present
        $doc['id'] = $hit['_id'];
        // Add score if available
        if (isset($hit['_score'])) {
          $doc['score'] = $hit['_score'];
        }
        $formatted['data'][] = $doc;
      }
    }

    return $formatted;
  }

  /**
   * Convert Drupal filter syntax to OpenSearch query DSL.
   *
   * @param string $filter
   *   The filter string in Drupal format.
   *
   * @return array
   *   OpenSearch query DSL.
   */
  protected function convertFilterToQuery(string $filter): array {
    if (empty($filter)) {
      return ['match_all' => new \stdClass()];
    }

    // Handle "id not in [0]" default filter
    if ($filter === 'id not in [0]') {
      return ['match_all' => new \stdClass()];
    }

    // Parse simple equals conditions (field == value)
    if (preg_match('/\(([^)]+)\s+(==|=)\s+([^)]+)\)/', $filter, $matches)) {
      $field = trim($matches[1]);
      $value = trim($matches[3], ' "\'');

      // For text fields, use match query
      if (in_array($field, ['content', 'title'])) {
        return [
          'match' => [
            $field => $value,
          ],
        ];
      }

      // For keywords, use term query
      return [
        'term' => [
          $field => $value,
        ],
      ];
    }

    // Parse "in" conditions (field in ["value1","value2"])
    if (preg_match('/([^\s]+)\s+in\s+\[(.*?)\]/', $filter, $matches)) {
      $field = trim($matches[1]);
      $values = explode(',', $matches[2]);

      // Clean up the values (remove quotes)
      $clean_values = [];
      foreach ($values as $val) {
        $clean_values[] = trim($val, ' "\'');
      }

      return [
        'terms' => [
          $field => $clean_values,
        ],
      ];
    }

    // Handle AND conditions (expr1 && expr2)
    if (strpos($filter, '&&') !== FALSE) {
      $conditions = explode('&&', $filter);
      $must_queries = [];

      foreach ($conditions as $condition) {
        $must_queries[] = $this->convertFilterToQuery(trim($condition));
      }

      return [
        'bool' => [
          'must' => $must_queries,
        ],
      ];
    }

    // Default to match_all if we can't parse the filter
    return ['match_all' => new \stdClass()];
  }

  /**
   * Make HTTP request to OpenSearch API.
   *
   * @param string $path
   *   The API path.
   * @param array $query_string
   *   Query parameters.
   * @param string $method
   *   HTTP method.
   * @param mixed $body
   *   Request body data.
   * @param array $options
   *   Additional request options.
   *
   * @return string
   *   The response body.
   */
  protected function makeRequest($path, array $query_string = [], $method = 'GET', $body = '', array $options = []) {
    if (!$this->baseUrl) {
      throw new \Exception('No base url set.');
    }

    // Request options
    $options['connect_timeout'] = 120;
    $options['read_timeout'] = 120;
    $options['timeout'] = 120;

    // Set content type headers
    $options['headers']['Content-Type'] = 'application/json';
    $options['headers']['accept'] = 'application/json';

    // Add authentication if available
    if (!empty($this->apiKey)) {
      // OpenSearch typically uses Basic Auth with username:password
      if (strpos($this->apiKey, ':') !== FALSE) {
        $options['auth'] = explode(':', $this->apiKey, 2);
      }
      else {
        // If not in username:password format, try as Bearer token
        $options['headers']['Authorization'] = 'Bearer ' . $this->apiKey;
      }
    }

    // Add request body if provided
    if ($body) {
      $options['body'] = Json::encode($body);
    }

    // Build the full URL
    $url = rtrim($this->baseUrl, '/') . ':' . $this->port;
    $full_url = $url . '/' . $path;
    if (!empty($query_string)) {
      $full_url .= '?' . UrlHelper::buildQuery($query_string);
    }

    // Special handling for HEAD requests (just checking if index exists)
    if ($method === 'HEAD') {
      try {
        $res = $this->client->request($method, $full_url, $options);
        return (string) $res->getBody();
      }
      catch (\Exception $e) {
        // If we get a 404, it means the resource doesn't exist
        if ($e->getCode() === 404) {
          throw new \Exception('Resource not found', 404);
        }
        throw $e;
      }
    }
    
    // For all other requests
    try {
      $res = $this->client->request($method, $full_url, $options);
      return (string) $res->getBody();
    }
    catch (\GuzzleHttp\Exception\ClientException $e) {
      // Get the error response body for more information
      $response_body = (string) $e->getResponse()->getBody();
      $error_data = Json::decode($response_body);
      
      // Rethrow with more detailed error message
      if (!empty($error_data['error'])) {
        $error_type = $error_data['error']['type'] ?? 'unknown_error';
        $error_reason = $error_data['error']['reason'] ?? $e->getMessage();
        
        // Grab the first failed shard for more details
        $failed_shards = $error_data['error']['failed_shards'] ?? [];
        $shard_reason = '';
        if (!empty($failed_shards) && !empty($failed_shards[0]['reason'])) {
          $shard = $failed_shards[0];
          $shard_reason = " - Shard error: " . ($shard['reason']['type'] ?? '') . " - " . ($shard['reason']['reason'] ?? '');
        }
        
        // Include the request URL and body in the error
        $request_detail = " - Request: " . $method . " " . $full_url;
        if (!empty($options['body'])) {
          $request_detail .= " - Body: " . substr($options['body'], 0, 500) . (strlen($options['body']) > 500 ? '...' : '');
        }
        
        throw new \Exception("OpenSearch error: {$error_type} - {$error_reason}{$shard_reason}{$request_detail}", $e->getCode());
      }
      
      throw $e;
    }
  }


}
