<?php

namespace Drupal\seo_linkcheck_ai\Service;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\RequestException;
use GuzzleHttp\Exception\ConnectException;

class BrokenLinkCheckerService {

  protected $httpClient;

  public function __construct(Client $httpClient) {
    $this->httpClient = $httpClient;
  }

  public function checkBrokenLinks($url) {
    $brokenLinks = [];
    try {
      $response = $this->httpClient->get($url, ['http_errors' => false]);
      $html = (string) $response->getBody();

      // Use a regular expression to find all href attributes in the HTML
      preg_match_all('/<a\s+href=["\']([^"\']+)["\']/', $html, $matches);
      $links = array_unique($matches[1]);

      // Convert relative URLs to absolute URLs
      $baseUrl = $this->getBaseUrl($url);

      foreach ($links as $link) {
        // Skip non-HTTP/HTTPS links
        if (strpos($link, 'http') !== 0) {
          if (strpos($link, '#') === 0 || strpos($link, 'mailto:') === 0) {
            continue;
          }
          $link = $baseUrl . $link;
        }

        if (!$this->isValidLink($link)) {
          $brokenLinks[] = $link;
        }
      }
    }
    catch (RequestException $e) {
      // Handle exception for the main URL if necessary
    }

    return $brokenLinks;
  }

  private function getBaseUrl($url) {
    $parsedUrl = parse_url($url);
    return $parsedUrl['scheme'] . '://' . $parsedUrl['host'];
  }

  private function isValidLink($link) {
    $retries = 3;
    while ($retries > 0) {
      try {
        $response = $this->httpClient->get($link, [
          'http_errors' => false,
          'allow_redirects' => true,
          'timeout' => 5,
        ]);

        $statusCode = $response->getStatusCode();
        if ($statusCode < 400) {
          return true; // Valid link
        } elseif ($statusCode == 404) {
          return false; // Broken link
        }
      }
      catch (ConnectException $e) {
        // Connection timeout
      }
      catch (RequestException $e) {
        // Other HTTP request exceptions
      }

      $retries--;
    }

    return false;
  }
}
