<?php
/**
 * ===========================================
 * FLOWBOT DCI - CRAWLER API CONTROLLER v1.0
 * ===========================================
 * Handles crawler-related API endpoints including SSE streaming
 *
 * Endpoints:
 * - GET  /api/v1/crawler/start     - Start crawl with SSE stream
 * - POST /api/v1/crawler/search    - Search using search engines
 * - POST /api/v1/crawler/sitemap   - Import from sitemap
 * - GET  /api/v1/crawler/status    - Get crawl status
 * - POST /api/v1/crawler/stop      - Stop running crawl
 */

declare(strict_types=1);

namespace FlowbotDCI\Api\v1;

use FlowbotDCI\Services\CrawlManager;
use FlowbotDCI\Services\LinkExtractor;
use FlowbotDCI\Services\WebScraper;
use FlowbotDCI\Services\SitemapFetcher;
use FlowbotDCI\Services\SearchEngine\SearchEngineAggregator;

class CrawlerController
{
    const VERSION = '1.0';

    private ?\PDO $database = null;
    private array $config = [];
    private array $dbConfig = [];

    /**
     * Constructor accepts config arrays
     *
     * @param array $crawlerConfig Crawler configuration (search_engines, etc)
     * @param array $dbConfig Database configuration (host, name, user, password)
     */
    public function __construct(array $crawlerConfig = [], array $dbConfig = [])
    {
        $this->config = $crawlerConfig;
        $this->dbConfig = $dbConfig;

        // Try to connect to database if config provided
        if (!empty($dbConfig['host']) && !empty($dbConfig['name'])) {
            try {
                $dsn = sprintf(
                    'mysql:host=%s;dbname=%s;charset=%s',
                    $dbConfig['host'],
                    $dbConfig['name'],
                    $dbConfig['charset'] ?? 'utf8mb4'
                );
                $this->database = new \PDO(
                    $dsn,
                    $dbConfig['user'] ?? '',
                    $dbConfig['password'] ?? '',
                    [
                        \PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION,
                        \PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC,
                    ]
                );
            } catch (\PDOException $e) {
                // Database connection optional for crawler
                error_log("CrawlerController: DB connection failed: " . $e->getMessage());
            }
        }
    }

    /**
     * Handle API request
     */
    public function handle(string $action, array $params = []): array
    {
        try {
            switch ($action) {
                case 'crawler/start':
                    // SSE endpoint - handled separately
                    return $this->startCrawlSSE($params);

                case 'crawler/search':
                    return $this->searchEngines($params);

                case 'crawler/sitemap':
                    return $this->importSitemap($params);

                case 'crawler/status':
                    return $this->getCrawlStatus($params['id'] ?? '');

                case 'crawler/stop':
                    return $this->stopCrawl($params['id'] ?? '');

                case 'crawler/engines':
                    return $this->getAvailableEngines();

                default:
                    return $this->error("Unknown action: {$action}", 404);
            }
        } catch (\Exception $e) {
            return $this->error($e->getMessage(), 500);
        }
    }

    /**
     * Start crawl with SSE streaming
     * This is called directly, not through handle()
     */
    public function streamCrawl(array $params): void
    {
        // Setup SSE headers
        $this->setupSSE();

        // Validate parameters
        $startUrl = trim($params['startUrl'] ?? '');
        if (empty($startUrl)) {
            $this->sendSSE('error', ['message' => 'startUrl is required']);
            return;
        }

        // Ensure URL has protocol
        if (!preg_match('#^https?://#i', $startUrl)) {
            $startUrl = 'https://' . $startUrl;
        }

        // Create crawler
        $scraper = new WebScraper();
        $extractor = new LinkExtractor();
        $crawler = new CrawlManager($scraper, $extractor);

        if ($this->database) {
            $crawler->setDatabase($this->database);
        }

        // Configure crawler
        $crawler->configure([
            'maxPages' => (int)($params['maxPages'] ?? 100),
            'maxDepth' => (int)($params['maxDepth'] ?? 3),
            'maxParallel' => (int)($params['maxParallel'] ?? 5),
            'sameDomainOnly' => ($params['sameDomainOnly'] ?? 'true') === 'true',
            'searchTerm' => trim($params['searchTerm'] ?? ''),
            'respectRobots' => ($params['respectRobots'] ?? 'true') === 'true',
            'autoProcess' => ($params['autoProcess'] ?? 'true') === 'true',
            'timeout' => (int)($params['timeout'] ?? 10),
        ]);

        // Set event callback for SSE
        $crawler->setEventCallback(function ($event, $data) {
            $this->sendSSE($event, $data);
        });

        // Run the crawl
        foreach ($crawler->startCrawl($startUrl) as $event) {
            $this->sendSSE($event['event'], $event['data']);

            // Check if client disconnected
            if (connection_aborted()) {
                $crawler->stop();
                break;
            }
        }
    }

    /**
     * Search using search engines (non-SSE)
     */
    private function searchEngines(array $params): array
    {
        $query = trim($params['query'] ?? $params['searchTerm'] ?? '');
        if (empty($query)) {
            return $this->error('query is required', 400);
        }

        $maxResults = min((int)($params['maxResults'] ?? 30), 100);
        $engines = $params['engines'] ?? null;

        // Create aggregator
        $aggregator = new SearchEngineAggregator();
        $aggregator->registerFromConfig($this->config['search_engines'] ?? []);

        if (!$aggregator->hasAvailableEngines()) {
            return $this->error('No search engines available', 503);
        }

        // Search
        $engineList = $engines ? explode(',', $engines) : null;
        $results = $aggregator->searchAll($query, $maxResults, $engineList);

        return $this->success([
            'query' => $query,
            'results' => $results,
            'count' => count($results),
            'stats' => $aggregator->getStats(),
            'errors' => $aggregator->getErrors(),
            'enginesUsed' => $aggregator->getAvailableEngines(),
        ]);
    }

    /**
     * Stream search results via SSE
     */
    public function streamSearch(array $params): void
    {
        $this->setupSSE();

        $query = trim($params['query'] ?? $params['searchTerm'] ?? '');
        if (empty($query)) {
            $this->sendSSE('error', ['message' => 'query is required']);
            return;
        }

        $maxResults = min((int)($params['maxResults'] ?? 30), 100);
        $crawlResults = ($params['crawlResults'] ?? 'false') === 'true';
        $maxDepth = (int)($params['maxDepth'] ?? 2);

        // Create aggregator
        $aggregator = new SearchEngineAggregator();
        $aggregator->registerFromConfig($this->config['search_engines'] ?? []);

        $this->sendSSE('status', [
            'message' => "Searching for: {$query}",
            'engines' => $aggregator->getAvailableEngines(),
        ]);

        // Search
        $results = $aggregator->searchAll($query, $maxResults);

        $this->sendSSE('status', [
            'message' => "Found " . count($results) . " results",
            'count' => count($results),
            'stats' => $aggregator->getStats(),
        ]);

        // Emit each result
        foreach ($results as $result) {
            $this->sendSSE('result', $result);
        }

        // Optionally crawl each result
        if ($crawlResults && !empty($results)) {
            $this->sendSSE('status', [
                'message' => "Starting deep crawl of search results...",
            ]);

            $scraper = new WebScraper();
            $extractor = new LinkExtractor();
            $crawler = new CrawlManager($scraper, $extractor);

            $crawler->configure([
                'maxPages' => 10, // Limit per result
                'maxDepth' => $maxDepth,
                'maxParallel' => 3,
                'sameDomainOnly' => true,
            ]);

            foreach ($results as $index => $result) {
                if (connection_aborted()) {
                    break;
                }

                $this->sendSSE('status', [
                    'message' => "Crawling result " . ($index + 1) . "/" . count($results),
                    'url' => $result['url'],
                ]);

                foreach ($crawler->startCrawl($result['url']) as $event) {
                    $this->sendSSE($event['event'], $event['data']);
                }
            }
        }

        $this->sendSSE('done', [
            'message' => 'Search completed',
            'totalResults' => count($results),
            'stats' => $aggregator->getStats(),
        ]);
    }

    /**
     * Import URLs from sitemap
     */
    public function importSitemap(array $params): array
    {
        $url = trim($params['url'] ?? '');
        if (empty($url)) {
            return $this->error('url is required', 400);
        }

        $fetcher = new SitemapFetcher();
        $urls = $fetcher->fetch($url);

        if ($fetcher->getLastError()) {
            return $this->error($fetcher->getLastError(), 400);
        }

        return $this->success([
            'url' => $url,
            'urls' => $urls,
            'count' => count($urls),
        ]);
    }

    /**
     * Get crawl status
     */
    private function getCrawlStatus(string $crawlId): array
    {
        if (empty($crawlId)) {
            return $this->error('crawlId is required', 400);
        }

        if (!$this->database) {
            return $this->error('Database not available', 503);
        }

        $stmt = $this->database->prepare("
            SELECT * FROM crawler_jobs WHERE id = ?
        ");
        $stmt->execute([$crawlId]);
        $job = $stmt->fetch(\PDO::FETCH_ASSOC);

        if (!$job) {
            return $this->error('Crawl job not found', 404);
        }

        // Get URL stats
        $stmt = $this->database->prepare("
            SELECT status, COUNT(*) as count
            FROM crawler_urls
            WHERE job_id = ?
            GROUP BY status
        ");
        $stmt->execute([$crawlId]);
        $urlStats = $stmt->fetchAll(\PDO::FETCH_KEY_PAIR);

        return $this->success([
            'job' => $job,
            'urlStats' => $urlStats,
        ]);
    }

    /**
     * Stop a running crawl
     */
    private function stopCrawl(string $crawlId): array
    {
        if (empty($crawlId)) {
            return $this->error('crawlId is required', 400);
        }

        if (!$this->database) {
            return $this->error('Database not available', 503);
        }

        $stmt = $this->database->prepare("
            UPDATE crawler_jobs
            SET status = 'stopped', completed_at = NOW()
            WHERE id = ? AND status = 'running'
        ");
        $stmt->execute([$crawlId]);

        if ($stmt->rowCount() === 0) {
            return $this->error('Crawl job not found or not running', 404);
        }

        return $this->success([
            'message' => 'Crawl stopped',
            'crawlId' => $crawlId,
        ]);
    }

    /**
     * Public method to get job status (called from Application.php)
     */
    public function getStatus(string $jobId): array
    {
        return $this->getCrawlStatus($jobId);
    }

    /**
     * Public method to stop a job (called from Application.php)
     */
    public function stopJob(string $jobId): array
    {
        return $this->stopCrawl($jobId);
    }

    /**
     * Public method to get engines (called from Application.php)
     */
    public function getEngines(): array
    {
        return $this->getAvailableEngines();
    }

    /**
     * Get available search engines
     */
    private function getAvailableEngines(): array
    {
        $aggregator = new SearchEngineAggregator();
        $aggregator->registerFromConfig($this->config['search_engines'] ?? []);

        return $this->success([
            'engines' => $aggregator->getAvailableEngines(),
        ]);
    }

    /**
     * Setup SSE headers
     */
    private function setupSSE(): void
    {
        // Disable output buffering
        while (ob_get_level()) {
            ob_end_flush();
        }

        // Set headers
        header('Content-Type: text/event-stream');
        header('Cache-Control: no-cache');
        header('Connection: keep-alive');
        header('X-Accel-Buffering: no'); // Nginx

        // Disable time limit
        set_time_limit(0);

        // Flush headers
        flush();
    }

    /**
     * Send SSE event
     */
    private function sendSSE(string $event, array $data): void
    {
        echo "event: {$event}\n";
        echo "data: " . json_encode($data) . "\n\n";
        flush();
    }

    /**
     * Create success response
     */
    private function success(array $data): array
    {
        return [
            'success' => true,
            'data' => $data,
            'meta' => [
                'timestamp' => gmdate('Y-m-d\TH:i:s\Z'),
                'version' => self::VERSION,
            ],
        ];
    }

    /**
     * Create error response
     */
    private function error(string $message, int $code = 400): array
    {
        return [
            'success' => false,
            'error' => [
                'message' => $message,
                'code' => $code,
            ],
            'meta' => [
                'timestamp' => gmdate('Y-m-d\TH:i:s\Z'),
            ],
            'http_code' => $code,
        ];
    }
}
