<?php
/**
 * ============================================
 * FLOWBOT DCI - UNIFIED CRAWLER API v1.0
 * ============================================
 * REST API controller for unified crawler operations.
 *
 * Endpoints:
 * - GET  /api/v1/crawler/unified/start     - Start unified crawl (SSE)
 * - GET  /api/v1/crawler/unified/search    - Search + crawl (SSE)
 * - POST /api/v1/crawler/unified/batch     - Batch URL processing
 * - GET  /api/v1/crawler/unified/status/{id} - Get job status
 * - POST /api/v1/crawler/unified/pause/{id}  - Pause job
 * - POST /api/v1/crawler/unified/resume/{id} - Resume job
 * - POST /api/v1/crawler/unified/stop/{id}   - Stop job
 * - DELETE /api/v1/crawler/unified/{id}      - Delete job
 * - GET  /api/v1/crawler/unified/jobs      - List all jobs
 * ============================================
 */

declare(strict_types=1);

namespace FlowbotDCI\Api\v1;

use FlowbotDCI\Core\Database;
use FlowbotDCI\Services\Crawler\UnifiedCrawler;
use FlowbotDCI\Services\Crawler\RelevanceScorer;
use FlowbotDCI\Services\Crawler\ContentExtractor;
use FlowbotDCI\Services\Crawler\DuplicateDetector;
use FlowbotDCI\Services\Crawler\RobotsHandler;
use FlowbotDCI\Services\Crawler\SitemapParser;
use FlowbotDCI\Services\WebScraper;
use FlowbotDCI\Services\DomainRateLimiter;
use FlowbotDCI\Services\CircuitBreaker;
use FlowbotDCI\Services\SearchEngine\SearchEngineAggregator;
use PDO;

class UnifiedCrawlerController
{
    const VERSION = '1.0';

    private Database $database;
    private PDO $pdo;
    private array $config;

    public function __construct(Database $database, array $config = [])
    {
        $this->database = $database;
        $this->pdo = $database->getConnection();
        $this->config = $config;
    }

    /**
     * Handle API request
     */
    public function handle(string $action, array $params = []): array
    {
        switch ($action) {
            case 'start':
                return $this->startCrawl($params);

            case 'search':
                return $this->searchCrawl($params);

            case 'batch':
                return $this->batchProcess($params);

            case 'status':
                return $this->getStatus($params['id'] ?? '');

            case 'pause':
                return $this->pauseJob($params['id'] ?? '');

            case 'resume':
                return $this->resumeJob($params['id'] ?? '');

            case 'stop':
                return $this->stopJob($params['id'] ?? '');

            case 'delete':
                return $this->deleteJob($params['id'] ?? '');

            case 'jobs':
                return $this->listJobs($params);

            case 'progress':
                return $this->getProgress($params['id'] ?? '');

            case 'logs':
                return $this->getLogs($params['id'] ?? '', $params);

            case 'domains':
                return $this->getDomainStats($params);

            case 'analytics':
                return $this->getAnalytics($params);

            default:
                return $this->error('Unknown action', 400);
        }
    }

    /**
     * Start unified crawl
     */
    private function startCrawl(array $params): array
    {
        // Validate required parameters
        $seedUrls = array_filter(explode("\n", $params['seed_urls'] ?? ''));
        $searchTerms = array_filter(explode("\n", $params['search_terms'] ?? ''));

        if (empty($seedUrls) && empty($searchTerms)) {
            return $this->error('Either seed_urls or search_terms is required', 400);
        }

        // Build crawler configuration
        $config = [
            'max_pages' => (int) ($params['max_pages'] ?? 100),
            'max_depth' => (int) ($params['max_depth'] ?? 3),
            'parallel_count' => (int) ($params['parallel_count'] ?? 5),
            'timeout' => (int) ($params['timeout'] ?? 10),
            'same_domain_only' => (bool) ($params['same_domain_only'] ?? true),
            'relevance_threshold' => (float) ($params['relevance_threshold'] ?? 2.0),
            'robots_policy' => $params['robots_policy'] ?? 'respect',
            'batch_size' => (int) ($params['batch_size'] ?? 20),
        ];

        // Parse domain lists
        $blockedDomains = array_filter(explode("\n", $params['blocked_domains'] ?? ''));
        $forcedDomains = array_filter(explode("\n", $params['forced_domains'] ?? ''));

        // Determine mode
        $mode = $params['mode'] ?? 'deep';
        if (!in_array($mode, ['deep', 'search', 'sitemap', 'infinite', 'hybrid'])) {
            $mode = 'deep';
        }

        // Create crawler instance
        $crawler = $this->createCrawler();
        $crawler->configure($config)
            ->setSeedUrls($seedUrls)
            ->setSearchTerms($searchTerms)
            ->setBlockedDomains($blockedDomains)
            ->setForcedDomains($forcedDomains);

        // Return job ID for SSE streaming
        return $this->success([
            'job_id' => $crawler->getJobId(),
            'mode' => $mode,
            'config' => $config,
            'message' => 'Crawl job created. Connect to SSE endpoint for real-time updates.',
            'sse_url' => "/api/v1/crawler/unified/stream?job_id={$crawler->getJobId()}",
        ]);
    }

    /**
     * Search and crawl
     */
    private function searchCrawl(array $params): array
    {
        $query = trim($params['query'] ?? '');

        if (empty($query)) {
            return $this->error('Search query is required', 400);
        }

        return $this->startCrawl(array_merge($params, [
            'search_terms' => $query,
            'mode' => 'search',
        ]));
    }

    /**
     * Batch process URLs
     */
    private function batchProcess(array $params): array
    {
        $urls = array_filter(explode("\n", $params['urls'] ?? ''));

        if (empty($urls)) {
            return $this->error('URLs are required', 400);
        }

        $crawler = $this->createCrawler();
        $crawler->configure([
            'max_pages' => count($urls),
            'max_depth' => 0, // No deep crawling for batch
        ])
        ->setSeedUrls($urls);

        return $this->success([
            'job_id' => $crawler->getJobId(),
            'total_urls' => count($urls),
            'message' => 'Batch job created',
        ]);
    }

    /**
     * Get job status
     */
    private function getStatus(string $jobId): array
    {
        if (empty($jobId)) {
            return $this->error('Job ID is required', 400);
        }

        try {
            $stmt = $this->pdo->prepare("
                SELECT
                    id, type, mode, seed_url, search_term, status,
                    pages_crawled, pages_found, errors, max_pages, max_depth,
                    created_at, updated_at, completed_at, last_error
                FROM crawler_jobs
                WHERE id = ?
            ");
            $stmt->execute([$jobId]);
            $job = $stmt->fetch();

            if (!$job) {
                return $this->error('Job not found', 404);
            }

            return $this->success($job);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Pause job
     */
    private function pauseJob(string $jobId): array
    {
        return $this->updateJobStatus($jobId, 'paused');
    }

    /**
     * Resume job
     */
    private function resumeJob(string $jobId): array
    {
        return $this->updateJobStatus($jobId, 'running');
    }

    /**
     * Stop job
     */
    private function stopJob(string $jobId): array
    {
        return $this->updateJobStatus($jobId, 'cancelled');
    }

    /**
     * Delete job
     */
    private function deleteJob(string $jobId): array
    {
        if (empty($jobId)) {
            return $this->error('Job ID is required', 400);
        }

        try {
            // Delete related data first
            $this->pdo->prepare("DELETE FROM crawler_activity_log WHERE job_id = ?")->execute([$jobId]);
            $this->pdo->prepare("DELETE FROM crawler_checkpoints WHERE job_id = ?")->execute([$jobId]);
            $this->pdo->prepare("DELETE FROM crawler_urls WHERE job_id = ?")->execute([$jobId]);

            // Delete job
            $stmt = $this->pdo->prepare("DELETE FROM crawler_jobs WHERE id = ?");
            $stmt->execute([$jobId]);

            if ($stmt->rowCount() === 0) {
                return $this->error('Job not found', 404);
            }

            return $this->success(['message' => 'Job deleted successfully']);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * List all jobs
     */
    private function listJobs(array $params): array
    {
        $page = max(1, (int) ($params['page'] ?? 1));
        $perPage = min(100, max(1, (int) ($params['per_page'] ?? 20)));
        $status = $params['status'] ?? null;
        $mode = $params['mode'] ?? null;
        $offset = ($page - 1) * $perPage;

        try {
            $where = [];
            $bindings = [];

            if ($status) {
                $where[] = 'status = ?';
                $bindings[] = $status;
            }
            if ($mode) {
                $where[] = 'mode = ?';
                $bindings[] = $mode;
            }

            $whereClause = !empty($where) ? 'WHERE ' . implode(' AND ', $where) : '';

            // Get total count
            $countSql = "SELECT COUNT(*) FROM crawler_jobs {$whereClause}";
            $stmt = $this->pdo->prepare($countSql);
            $stmt->execute($bindings);
            $total = (int) $stmt->fetchColumn();

            // Get jobs
            $sql = "
                SELECT
                    id, type, mode, seed_url, search_term, status,
                    pages_crawled, pages_found, errors, max_pages, max_depth,
                    created_at, updated_at, completed_at
                FROM crawler_jobs
                {$whereClause}
                ORDER BY created_at DESC
                LIMIT {$perPage} OFFSET {$offset}
            ";
            $stmt = $this->pdo->prepare($sql);
            $stmt->execute($bindings);
            $jobs = $stmt->fetchAll();

            return $this->success([
                'jobs' => $jobs,
                'pagination' => [
                    'total' => $total,
                    'page' => $page,
                    'per_page' => $perPage,
                    'total_pages' => ceil($total / $perPage),
                ],
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Get job progress
     */
    private function getProgress(string $jobId): array
    {
        if (empty($jobId)) {
            return $this->error('Job ID is required', 400);
        }

        try {
            $stmt = $this->pdo->prepare("
                SELECT
                    id, status, pages_crawled, pages_found, errors,
                    max_pages, max_depth, created_at, updated_at
                FROM crawler_jobs
                WHERE id = ?
            ");
            $stmt->execute([$jobId]);
            $job = $stmt->fetch();

            if (!$job) {
                return $this->error('Job not found', 404);
            }

            // Calculate progress
            $total = max($job['max_pages'], $job['pages_crawled'] + 1);
            $progress = round(($job['pages_crawled'] / $total) * 100, 2);

            // Calculate elapsed time
            $startTime = strtotime($job['created_at']);
            $elapsed = time() - $startTime;

            // Estimate remaining time
            $rate = $elapsed > 0 ? $job['pages_crawled'] / $elapsed : 0;
            $remaining = $rate > 0 ? ($total - $job['pages_crawled']) / $rate : 0;

            return $this->success([
                'job_id' => $jobId,
                'status' => $job['status'],
                'progress' => $progress,
                'processed' => $job['pages_crawled'],
                'imported' => $job['pages_found'],
                'errors' => $job['errors'],
                'total' => $total,
                'elapsed_seconds' => $elapsed,
                'remaining_seconds' => round($remaining),
                'processing_rate' => round($rate, 2),
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Get job logs
     */
    private function getLogs(string $jobId, array $params): array
    {
        if (empty($jobId)) {
            return $this->error('Job ID is required', 400);
        }

        $page = max(1, (int) ($params['page'] ?? 1));
        $perPage = min(500, max(1, (int) ($params['per_page'] ?? 100)));
        $level = $params['level'] ?? null;
        $offset = ($page - 1) * $perPage;

        try {
            $where = ['job_id = ?'];
            $bindings = [$jobId];

            if ($level) {
                $where[] = 'event_level = ?';
                $bindings[] = $level;
            }

            $whereClause = implode(' AND ', $where);

            // Get total count
            $stmt = $this->pdo->prepare("SELECT COUNT(*) FROM crawler_activity_log WHERE {$whereClause}");
            $stmt->execute($bindings);
            $total = (int) $stmt->fetchColumn();

            // Get logs
            $sql = "
                SELECT
                    id, event_type, event_level, message, url, http_code, response_time_ms, created_at
                FROM crawler_activity_log
                WHERE {$whereClause}
                ORDER BY created_at DESC
                LIMIT {$perPage} OFFSET {$offset}
            ";
            $stmt = $this->pdo->prepare($sql);
            $stmt->execute($bindings);
            $logs = $stmt->fetchAll();

            return $this->success([
                'logs' => $logs,
                'pagination' => [
                    'total' => $total,
                    'page' => $page,
                    'per_page' => $perPage,
                    'total_pages' => ceil($total / $perPage),
                ],
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Get domain statistics
     */
    private function getDomainStats(array $params): array
    {
        $page = max(1, (int) ($params['page'] ?? 1));
        $perPage = min(100, max(1, (int) ($params['per_page'] ?? 20)));
        $sortBy = in_array($params['sort_by'] ?? '', ['total_visits', 'successful_visits', 'failed_visits', 'avg_response_time', 'last_visit'])
            ? $params['sort_by']
            : 'total_visits';
        $sortDir = strtoupper($params['sort_dir'] ?? 'DESC') === 'ASC' ? 'ASC' : 'DESC';
        $offset = ($page - 1) * $perPage;

        try {
            // Get total count
            $stmt = $this->pdo->query("SELECT COUNT(*) FROM crawler_domain_stats");
            $total = (int) $stmt->fetchColumn();

            // Get domains
            $sql = "
                SELECT
                    domain, total_visits, successful_visits, failed_visits,
                    avg_response_time, last_visit, last_http_code, is_blocked, is_social_media,
                    ROUND((successful_visits / NULLIF(total_visits, 0)) * 100, 1) as success_rate
                FROM crawler_domain_stats
                ORDER BY {$sortBy} {$sortDir}
                LIMIT {$perPage} OFFSET {$offset}
            ";
            $stmt = $this->pdo->query($sql);
            $domains = $stmt->fetchAll();

            return $this->success([
                'domains' => $domains,
                'pagination' => [
                    'total' => $total,
                    'page' => $page,
                    'per_page' => $perPage,
                    'total_pages' => ceil($total / $perPage),
                ],
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Get analytics data
     */
    private function getAnalytics(array $params): array
    {
        $days = min(365, max(1, (int) ($params['days'] ?? 7)));
        $startDate = date('Y-m-d', strtotime("-{$days} days"));

        try {
            // Get daily metrics
            $stmt = $this->pdo->prepare("
                SELECT
                    metric_date,
                    SUM(total_jobs) as jobs,
                    SUM(completed_jobs) as completed,
                    SUM(failed_jobs) as failed,
                    SUM(total_urls_crawled) as urls_crawled,
                    SUM(total_urls_imported) as urls_imported,
                    SUM(total_urls_error) as urls_error,
                    AVG(avg_response_time_ms) as avg_response_time,
                    SUM(data_volume_bytes) as data_volume
                FROM crawler_metrics
                WHERE metric_date >= ?
                GROUP BY metric_date
                ORDER BY metric_date
            ");
            $stmt->execute([$startDate]);
            $dailyMetrics = $stmt->fetchAll();

            // Get summary
            $stmt = $this->pdo->prepare("
                SELECT
                    COUNT(*) as total_jobs,
                    SUM(pages_crawled) as total_crawled,
                    SUM(pages_found) as total_imported,
                    SUM(errors) as total_errors,
                    SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed_jobs,
                    SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_jobs,
                    SUM(CASE WHEN status = 'running' THEN 1 ELSE 0 END) as running_jobs
                FROM crawler_jobs
                WHERE created_at >= ?
            ");
            $stmt->execute([$startDate . ' 00:00:00']);
            $summary = $stmt->fetch();

            // Get top domains
            $stmt = $this->pdo->query("
                SELECT domain, total_visits, successful_visits,
                    ROUND((successful_visits / NULLIF(total_visits, 0)) * 100, 1) as success_rate
                FROM crawler_domain_stats
                ORDER BY total_visits DESC
                LIMIT 10
            ");
            $topDomains = $stmt->fetchAll();

            return $this->success([
                'summary' => $summary,
                'daily_metrics' => $dailyMetrics,
                'top_domains' => $topDomains,
                'period_days' => $days,
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Update job status
     */
    private function updateJobStatus(string $jobId, string $status): array
    {
        if (empty($jobId)) {
            return $this->error('Job ID is required', 400);
        }

        try {
            $completedAt = in_array($status, ['completed', 'failed', 'cancelled']) ? 'NOW()' : 'NULL';

            $stmt = $this->pdo->prepare("
                UPDATE crawler_jobs
                SET status = ?, completed_at = {$completedAt}, updated_at = NOW()
                WHERE id = ?
            ");
            $stmt->execute([$status, $jobId]);

            if ($stmt->rowCount() === 0) {
                return $this->error('Job not found', 404);
            }

            return $this->success([
                'job_id' => $jobId,
                'status' => $status,
                'message' => "Job {$status} successfully",
            ]);

        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Create configured crawler instance
     */
    private function createCrawler(): UnifiedCrawler
    {
        $crawler = new UnifiedCrawler();
        $crawler->setDatabase($this->database);

        // Set up web scraper if available
        if (class_exists(WebScraper::class)) {
            $scraper = new WebScraper();
            $crawler->setScraper($scraper);
        }

        // Set up robots handler
        $robotsHandler = new RobotsHandler();
        $robotsHandler->setDatabase($this->database);
        $crawler->setRobotsHandler($robotsHandler);

        // Set up sitemap parser
        $sitemapParser = new SitemapParser();
        $crawler->setSitemapParser($sitemapParser);

        // Set up rate limiter if available
        if (class_exists(DomainRateLimiter::class)) {
            $rateLimiter = new DomainRateLimiter();
            $crawler->setRateLimiter($rateLimiter);
        }

        // Set up circuit breaker if available
        if (class_exists(CircuitBreaker::class)) {
            $circuitBreaker = new CircuitBreaker();
            $crawler->setCircuitBreaker($circuitBreaker);
        }

        // Set up search aggregator if available
        if (class_exists(SearchEngineAggregator::class)) {
            $searchAggregator = new SearchEngineAggregator($this->config);
            $crawler->setSearchAggregator($searchAggregator);
        }

        return $crawler;
    }

    /**
     * Format success response
     */
    private function success(array $data): array
    {
        return [
            'success' => true,
            'data' => $data,
            'meta' => [
                'api_version' => self::VERSION,
                'timestamp' => date('c'),
            ],
        ];
    }

    /**
     * Format error response
     */
    private function error(string $message, int $code = 400): array
    {
        return [
            'success' => false,
            'error' => [
                'message' => $message,
                'code' => $code,
            ],
            'meta' => [
                'api_version' => self::VERSION,
                'timestamp' => date('c'),
            ],
        ];
    }
}
