<?php
/**
 * ===========================================
 * FLOWBOT DCI - URL PROCESSOR SERVICE
 * ===========================================
 * Main service that orchestrates URL processing
 */

declare(strict_types=1);

namespace FlowbotDCI\Services;

use FlowbotDCI\Core\Database;
use FlowbotDCI\Models\PinFeed;
use FlowbotDCI\Models\FeedData;
use FlowbotDCI\Models\User;
use FlowbotDCI\Utils\AuthorGenerator;
use FlowbotDCI\Utils\TagExtractor;
use FlowbotDCI\Utils\UrlPreProcessor;

class UrlProcessor
{
    private array $config;
    private Database $database;
    private ProgressTracker $tracker;
    private WebScraper $scraper;
    private MetadataExtractor $extractor;
    private EmbedGenerator $embedGenerator;
    private AuthorGenerator $authorGenerator;
    private TagExtractor $tagExtractor;
    private UrlPreProcessor $preProcessor;

    // v2.3: Smart domain rate limiter
    private DomainRateLimiter $rateLimiter;

    private PinFeed $pinFeedModel;
    private FeedData $feedDataModel;
    private User $userModel;

    private array $processLogs = [];

    public function __construct(array $config, Database $database, ProgressTracker $tracker)
    {
        $this->config = $config;
        $this->database = $database;
        $this->tracker = $tracker;

        // v2.3: Initialize smart domain rate limiter
        $this->rateLimiter = new DomainRateLimiter();

        // Initialize services
        $sslVerify = $config['processing']['ssl_verify'] ?? true;
        $this->scraper = new WebScraper($sslVerify);

        // v2.3: Inject rate limiter into scraper for 429 handling
        $this->scraper->setRateLimiter($this->rateLimiter);

        // Rate limit será ajustado dinamicamente no processBatch() baseado na distribuição de domínios
        $this->extractor = new MetadataExtractor($config['defaults']);
        $this->embedGenerator = new EmbedGenerator();
        $this->authorGenerator = new AuthorGenerator();
        $this->tagExtractor = new TagExtractor();
        $this->preProcessor = new UrlPreProcessor();

        // Initialize models
        $this->pinFeedModel = new PinFeed($database);
        $this->feedDataModel = new FeedData($database);
        $this->userModel = new User($database, $config['defaults']);
    }

    /**
     * Extract URLs from text
     */
    public function extractUrls(string $text): array
    {
        preg_match_all(
            '#\bhttps?://[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/))#',
            $text,
            $matches
        );

        return array_unique($matches[0] ?? []);
    }

    /**
     * Process one batch of URLs
     * OTIMIZADO: Pré-validação + rate limiting + batch operations
     */
    public function processBatch(): array
    {
        $batch = $this->tracker->getCurrentBatch();
        $phaseConfig = $this->tracker->getCurrentPhaseConfig();
        $phaseIndex = $this->tracker->getCurrentPhaseIndex();

        $batchStartTime = microtime(true);
        $this->processLogs = [];

        // PRÉ-PROCESSAMENTO: Validar e otimizar ordem das URLs
        $preProcessed = $this->preProcessor->process($batch);
        $validUrls = $preProcessed['optimized']; // URLs válidas em ordem otimizada
        $invalidUrls = $preProcessed['invalid'];

        // RATE LIMITING DINÂMICO: Ajusta baseado na distribuição de domínios
        $domainCount = $preProcessed['stats']['domains'] ?? 1;
        $urlCount = count($validUrls);

        if ($domainCount > 0 && $urlCount > 0) {
            $avgUrlsPerDomain = $urlCount / $domainCount;

            if ($domainCount <= 3 && $avgUrlsPerDomain > 10) {
                // Poucos domínios, muitas URLs por domínio = modo "single domain"
                // Permite mais concorrência mas com delays maiores
                $this->scraper->setMaxPerDomain(3);
            } elseif ($domainCount <= 10) {
                // Poucos domínios = 2 por domínio
                $this->scraper->setMaxPerDomain(2);
            } else {
                // Muitos domínios diferentes = 1 por domínio (mais seguro)
                $this->scraper->setMaxPerDomain(1);
            }
        } else {
            $this->scraper->setMaxPerDomain(1);
        }

        // Log invalid URLs and track domain stats
        foreach ($invalidUrls as $invalid) {
            $invalidDomain = parse_url($invalid['url'], PHP_URL_HOST) ?? 'unknown';

            $this->processLogs[] = [
                'url'     => $invalid['url'],
                'status'  => 'ignored',
                'message' => 'Pre-validation: ' . $invalid['reason'],
                'class'   => 'warning',
            ];

            // Track domain ignored
            $this->tracker->updateDomainStats($invalidDomain, 'ignored', 0);

            // Add detailed log entry
            $this->tracker->addDetailedLogEntry([
                'url' => $invalid['url'],
                'domain' => $invalidDomain,
                'status' => 'ignored',
                'message' => 'Pre-validation: ' . $invalid['reason'],
                'class' => 'warning',
                'phase' => $phaseIndex,
                'response_time' => 0,
                'http_code' => 0,
                'error_type' => null,
            ]);
        }

        // Se não há URLs válidas, retornar
        if (empty($validUrls)) {
            return $this->finalizeBatch($batchStartTime, [
                'processed' => 0,
                'ignored'   => count($invalidUrls),
                'errors'    => 0,
                'imported'  => 0,
                'total'     => count($batch),
                'move_to_next_phase' => [],
            ]);
        }

        // OTIMIZADO: Verificar URLs existentes em batch (1 query em vez de N)
        $existingUrls = $this->pinFeedModel->existsBatch($validUrls);

        // PERF-006: Pre-fetch DNS for all URLs in batch to warm up cache
        // This can speed up subsequent requests by 50-200ms per domain
        $this->scraper->prefetchDns($validUrls, 3);

        // Fetch all URLs in parallel (com rate limiting por domínio)
        $fetchResults = $this->scraper->fetchBatch(
            $validUrls,
            $phaseConfig['concurrency'],
            $phaseConfig['timeout']
        );

        // Process results
        $results = [
            'processed' => 0,
            'ignored'   => count($invalidUrls), // Já contando inválidas
            'errors'    => 0,
            'imported'  => 0,
            'total'     => count($batch),
            'move_to_next_phase' => [],

            // NOVO: Detalhes granulares
            'ignored_details' => [
                'duplicate'      => 0,
                'invalid_url'    => 0,
                'blocked_domain' => 0,
                'non_html'       => 0,
            ],
            'error_details' => [
                'timeout'    => 0,
                'http_429'   => 0,
                'http_404'   => 0,
                'http_403'   => 0,
                'http_5xx'   => 0,
                'connection' => 0,
                'metadata'   => 0,
                'other'      => 0,
            ],
        ];

        // Contabilizar URLs inválidas por razão
        foreach ($invalidUrls as $invalid) {
            $reason = $invalid['reason'] ?? 'invalid_url';
            $key = match($reason) {
                'blocked_domain', 'blocked' => 'blocked_domain',
                'non_html', 'extension' => 'non_html',
                default => 'invalid_url',
            };
            $results['ignored_details'][$key]++;
        }

        // OTIMIZADO: Coletar registros para batch insert
        $recordsToInsert = [];

        // v4.4: Cancel check variables - check every N URLs for cancellation
        $cancelCheckInterval = 5;
        $urlsProcessedInBatch = 0;

        foreach ($validUrls as $url) {
            // v4.4: Check for cancellation every N URLs
            $urlsProcessedInBatch++;
            if ($urlsProcessedInBatch % $cancelCheckInterval === 0) {
                if ($this->tracker->isCancelled()) {
                    // Mark as cancelled and break out of loop
                    $results['cancelled'] = true;
                    break;
                }
            }

            $url = trim($url);
            $fetchResult = $fetchResults[$url] ?? ['success' => false, 'error' => 'Unknown'];

            // OTIMIZADO: Usar cache de URLs existentes
            $alreadyExists = isset($existingUrls[$url]);

            $processResult = $this->processUrlOptimized($url, $fetchResult, $phaseIndex, $alreadyExists, $recordsToInsert);

            // Extract domain for tracking
            $domain = parse_url($url, PHP_URL_HOST) ?? 'unknown';
            $responseTime = $fetchResult['response_time'] ?? 0;

            // v2.1: Track HTTP code and response time for all requests
            $httpCode = $fetchResult['http_code'] ?? 0;
            if ($httpCode > 0) {
                $this->tracker->updateRequestStats($httpCode, $responseTime, $domain);
            }

            // Update counters
            $errorType = null;
            switch ($processResult['status']) {
                case 'imported':
                    $results['processed']++;
                    $results['imported']++;
                    // Track domain success
                    $this->tracker->updateDomainStats($domain, 'success', $responseTime);
                    break;
                case 'ignored':
                    $results['ignored']++;
                    // NOVO: Contabilizar razão do ignore
                    if (str_contains($processResult['message'] ?? '', 'Already exists')) {
                        $results['ignored_details']['duplicate']++;
                    }
                    // Track domain ignored
                    $this->tracker->updateDomainStats($domain, 'ignored', $responseTime);
                    break;
                case 'error':
                    $results['errors']++;
                    // NOVO: Classificar tipo de erro
                    $errorType = $this->classifyError(
                        $processResult['message'] ?? '',
                        $fetchResult['http_code'] ?? 0
                    );
                    $results['error_details'][$errorType]++;
                    // Track domain error with error type
                    $this->tracker->updateDomainStats($domain, 'error', $responseTime, $errorType);
                    break;
                case 'retry':
                    $results['move_to_next_phase'][] = $url;
                    // Track domain retry
                    $this->tracker->updateDomainStats($domain, 'retry', $responseTime);
                    // v2.1: Track retry count by phase
                    $this->tracker->incrementRetryCount($phaseIndex);
                    break;
            }

            // Add detailed log entry for logs page
            $this->tracker->addDetailedLogEntry([
                'url' => $url,
                'domain' => $domain,
                'status' => $processResult['status'],
                'message' => $processResult['message'] ?? '',
                'class' => $processResult['class'] ?? 'info',
                'phase' => $phaseIndex,
                'response_time' => $responseTime,
                'http_code' => $fetchResult['http_code'] ?? 0,
                'error_type' => $errorType,
            ]);

            $this->processLogs[] = $processResult;
        }

        // OTIMIZADO: Batch insert (dividido em chunks internamente)
        if (!empty($recordsToInsert)) {
            try {
                $this->pinFeedModel->insertBatch($recordsToInsert);
            } catch (\Exception $e) {
                error_log("Batch insert error: " . $e->getMessage());
            }
        }

        return $this->finalizeBatch($batchStartTime, $results);
    }

    /**
     * Finalize batch processing
     * v4.4: Now handles cancelled status for mid-batch cancel support
     */
    private function finalizeBatch(float $batchStartTime, array $results): array
    {
        $batchTime = microtime(true) - $batchStartTime;

        // Update tracker
        $this->tracker->updateAfterBatch($results, $batchTime);

        // v4.4: Check if cancelled (either from results or from tracker)
        $wasCancelled = ($results['cancelled'] ?? false) || $this->tracker->isCancelled();

        // Check if complete (including cancel as complete)
        $isComplete = $this->tracker->isComplete() || $wasCancelled;

        return [
            'complete'  => $isComplete,
            'cancelled' => $wasCancelled,  // v4.4: New field for cancel detection
            'data'      => $this->tracker->loadProgress(),
            'logs'      => $this->processLogs,
        ];
    }

    /**
     * Process single URL (método original mantido para compatibilidade)
     */
    private function processUrl(string $url, array $fetchResult, int $phaseIndex): array
    {
        $recordsToInsert = [];
        return $this->processUrlOptimized($url, $fetchResult, $phaseIndex, false, $recordsToInsert);
    }

    /**
     * OTIMIZADO: Processa URL e coleta registro para batch insert
     * Não insere diretamente - adiciona ao array $recordsToInsert
     */
    private function processUrlOptimized(string $url, array $fetchResult, int $phaseIndex, bool $alreadyExists, array &$recordsToInsert): array
    {
        // Check fetch success
        if (!$fetchResult['success'] || empty($fetchResult['html'])) {
            if ($phaseIndex < 3) {
                return [
                    'url'     => $url,
                    'status'  => 'retry',
                    'message' => "Fetch failed in phase $phaseIndex, moving to phase " . ($phaseIndex + 1),
                    'class'   => 'warning',
                ];
            }
            return [
                'url'     => $url,
                'status'  => 'error',
                'message' => "Fetch failed in final phase: " . ($fetchResult['error'] ?? 'Unknown'),
                'class'   => 'error',
            ];
        }

        // OTIMIZADO: Usar valor pré-calculado em vez de query
        if ($alreadyExists) {
            return [
                'url'     => $url,
                'status'  => 'ignored',
                'message' => 'Already exists in database',
                'class'   => 'warning',
            ];
        }

        // Extract metadata
        try {
            $metadata = $this->extractor->extract($fetchResult['html'], $url);
            $embedCode = $this->embedGenerator->generate($url);
            $author = $this->authorGenerator->generate();
            $tags = $this->tagExtractor->extract($metadata['title']);

            // Get or create user
            $userId = $this->userModel->getOrCreate($author);

            // Get or create feed data
            $sourceHost = parse_url($url, PHP_URL_HOST);
            $sourceDomainBase = 'https://' . $sourceHost;
            $feedData = $this->feedDataModel->getOrCreate($sourceDomainBase, $url);

            // OTIMIZADO: Coletar registro para batch insert em vez de inserir diretamente
            $recordsToInsert[] = [
                'title'            => $metadata['title'],
                'description'      => $metadata['description'],
                'thumbnail'        => $metadata['thumbnail'],
                'link'             => $url,
                'source_website'   => $sourceHost,
                'author'           => $author,
                'favicon'          => $metadata['favicon'],
                'tags'             => implode(', ', $tags),
                'embed_code'       => $embedCode,
                'source_domain'    => $sourceHost,
                'user_id'          => $userId,
                'source_domain_id' => $feedData['id'],
                'main_category_id' => $feedData['main_category_id'] ?? 0,
            ];

            return [
                'url'     => $url,
                'status'  => 'imported',
                'message' => "Prepared for import (phase $phaseIndex)",
                'class'   => 'success',
            ];

        } catch (\Exception $e) {
            return [
                'url'     => $url,
                'status'  => 'error',
                'message' => 'Processing error: ' . $e->getMessage(),
                'class'   => 'error',
            ];
        }
    }

    /**
     * NOVO: Classificar tipo de erro para estatísticas detalhadas
     */
    private function classifyError(string $errorMessage, int $httpCode): string
    {
        $errorLower = strtolower($errorMessage);

        return match(true) {
            str_contains($errorLower, 'timeout') => 'timeout',
            $httpCode === 429 => 'http_429',
            $httpCode === 404 => 'http_404',
            $httpCode === 403 => 'http_403',
            $httpCode >= 500 && $httpCode < 600 => 'http_5xx',
            str_contains($errorLower, 'connection') => 'connection',
            str_contains($errorLower, 'refused') => 'connection',
            str_contains($errorLower, 'reset') => 'connection',
            str_contains($errorLower, 'metadata') => 'metadata',
            str_contains($errorLower, 'processing error') => 'metadata',
            default => 'other',
        };
    }
}
