<?php
/************************************************************
 * FLOWBOT DCI v7.0 - CRAWLER UNIFICADO SIMPLES
 * -------------------------------------------------------------
 * Versão simplificada que funciona igual ao crawler original
 * - Suporte a múltiplos buscadores (Bing, Yahoo, DuckDuckGo)
 * - Deep crawling com profundidade configurável
 * - Gerenciador: pause, resume, stop
 * - Design responsivo moderno
 ************************************************************/

mb_internal_encoding("UTF-8");
error_reporting(E_ALL);
ignore_user_abort(true);
set_time_limit(0);
ini_set('memory_limit', '2048M');

/**********************************************
 * 1) CONFIGURAÇÕES DE BANCO
 **********************************************/
$host    = 'localhost';
$db      = 'digupdog_FEED';
$user    = 'digupdog_FEEDadmin';
$pass    = 'Raimundinho1';
$charset = 'utf8mb4';

$options = [
    PDO::ATTR_ERRMODE            => PDO::ERRMODE_EXCEPTION,
    PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
    PDO::ATTR_EMULATE_PREPARES   => false,
];

try {
    $pdo = new PDO("mysql:host=$host;dbname=$db;charset=$charset", $user, $pass, $options);
} catch (Exception $e) {
    die("Falha na conexão: " . $e->getMessage());
}

/**********************************************
 * Tabela para evitar duplicados
 **********************************************/
function setupCrawlerSeenLinks(PDO $pdo) {
    $sql = "CREATE TABLE IF NOT EXISTS `crawler_seen_links_v7` (
        `id` BIGINT AUTO_INCREMENT PRIMARY KEY,
        `process_id` VARCHAR(255) NOT NULL,
        `link` VARCHAR(2048) NOT NULL,
        `depth` INT NOT NULL DEFAULT 0,
        `created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        UNIQUE KEY `unique_link` (`process_id`(100), `link`(500)),
        INDEX `idx_process` (`process_id`(100))
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;";
    $pdo->exec($sql);
}
setupCrawlerSeenLinks($pdo);

define('TABLE_SEEN_LINKS', 'crawler_seen_links_v7');

/**********************************************
 * 2) FUNÇÕES AUXILIARES
 **********************************************/
function isValidUrl($url) {
    if (!filter_var($url, FILTER_VALIDATE_URL)) {
        $try = 'https://' . ltrim($url, '/');
        if (!filter_var($try, FILTER_VALIDATE_URL)) {
            return false;
        }
        return $try;
    }
    return $url;
}

function cleanUrl($url) {
    if (strpos($url, '/url?q=') === 0) {
        $url = substr($url, 7);
    }
    $url = strtok($url, '&');
    return trim($url);
}

function shouldIgnoreLink($url) {
    $lower = strtolower($url);
    if (strpos($lower, 'mailto:') === 0) return true;
    if (strpos($lower, 'javascript:') === 0) return true;
    if (strpos($lower, '#') === 0) return true;

    $exts = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.zip', '.rar', '.7z', '.exe', '.dmg'];
    foreach ($exts as $e) {
        if (strpos($lower, $e) !== false) return true;
    }
    return false;
}

function isSearchEngineUrl($url) {
    $engines = ['bing.com', 'google.com', 'yahoo.com', 'duckduckgo.com', 'yandex.com', 'baidu.com'];
    $host = parse_url($url, PHP_URL_HOST);
    if (!$host) return false;
    foreach ($engines as $e) {
        if (stripos($host, $e) !== false) return true;
    }
    return false;
}

/**********************************************
 * 2.1) Funções de relevância
 **********************************************/
function calculateRelevanceScore($html, array $searchTerms) {
    if (empty($searchTerms)) return 999;

    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
    $xp = new DOMXPath($dom);

    $titleNode = $xp->query('//title');
    $titleText = $titleNode->length ? mb_strtolower($titleNode->item(0)->textContent) : '';

    $descNode = $xp->query('//meta[@name="description"]/@content');
    $descText = $descNode->length ? mb_strtolower($descNode->item(0)->nodeValue) : '';

    $h1Text = '';
    foreach ($xp->query('//h1') as $h1) {
        $h1Text .= ' ' . mb_strtolower($h1->textContent);
    }

    $bodyText = '';
    foreach ($xp->query('//p') as $p) {
        $bodyText .= ' ' . mb_strtolower($p->textContent);
    }

    $score = 0;
    foreach ($searchTerms as $term) {
        $t = mb_strtolower(trim($term));
        if (empty($t)) continue;
        $score += substr_count($titleText, $t) * 4;
        $score += substr_count($descText, $t) * 3;
        $score += substr_count($h1Text, $t) * 2;
        $score += substr_count($bodyText, $t) * 1;
    }
    return $score;
}

/**********************************************
 * 3) Extração de metadados
 **********************************************/
function extractMetadata($html, $url) {
    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
    $xp = new DOMXPath($dom);

    // Título
    $title = 'No title';
    $queries = ['//meta[@property="og:title"]/@content', '//title', '//h1'];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $val = trim($n->item(0)->nodeValue);
            if ($val) { $title = $val; break; }
        }
    }

    // Descrição
    $desc = 'No description';
    $queries = ['//meta[@property="og:description"]/@content', '//meta[@name="description"]/@content', '//p'];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $val = trim($n->item(0)->nodeValue);
            if ($val) { $desc = substr($val, 0, 500); break; }
        }
    }

    // Imagem
    $image = '';
    $queries = [
        '//meta[@property="og:image"]/@content',
        '//meta[@name="twitter:image"]/@content',
        '//img/@src'
    ];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $val = trim($n->item(0)->nodeValue);
            if ($val) {
                if (parse_url($val, PHP_URL_SCHEME) === null) {
                    $p = parse_url($url);
                    $val = $p['scheme'] . '://' . $p['host'] . '/' . ltrim($val, '/');
                }
                if (filter_var($val, FILTER_VALIDATE_URL)) {
                    $image = $val;
                    break;
                }
            }
        }
    }

    return [
        'title' => $title,
        'description' => $desc,
        'image' => $image,
        'domain' => parse_url($url, PHP_URL_HOST)
    ];
}

/**********************************************
 * 3.1) Extrair links de uma página
 **********************************************/
function extractLinks($html, $baseUrl, $sameDomainOnly = true) {
    $links = [];
    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">' . $html);

    $baseDomain = parse_url($baseUrl, PHP_URL_HOST);
    $baseScheme = parse_url($baseUrl, PHP_URL_SCHEME) ?: 'https';

    foreach ($dom->getElementsByTagName('a') as $a) {
        $href = $a->getAttribute('href');
        if (!$href || shouldIgnoreLink($href)) continue;

        // Normalizar URL
        if (strpos($href, '//') === 0) {
            $href = $baseScheme . ':' . $href;
        } elseif (strpos($href, '/') === 0) {
            $href = $baseScheme . '://' . $baseDomain . $href;
        } elseif (strpos($href, 'http') !== 0) {
            $href = $baseScheme . '://' . $baseDomain . '/' . $href;
        }

        $href = cleanUrl($href);
        if (!isValidUrl($href)) continue;
        if (isSearchEngineUrl($href)) continue;

        if ($sameDomainOnly) {
            $linkDomain = parse_url($href, PHP_URL_HOST);
            if ($linkDomain !== $baseDomain) continue;
        }

        $links[] = $href;
    }

    return array_unique($links);
}

/**********************************************
 * 4) Funções de busca
 **********************************************/
function fetchUrl($url, $timeout = 10) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_TIMEOUT => $timeout,
        CURLOPT_CONNECTTIMEOUT => 5,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
        CURLOPT_HTTPHEADER => [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language: en-US,en;q=0.5'
        ],
        CURLOPT_ENCODING => 'gzip,deflate'
    ]);
    $html = curl_exec($ch);
    $error = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    return ['html' => $html, 'error' => $error, 'code' => $code];
}

function searchBing($query, $maxPages = 3) {
    $links = [];
    for ($i = 0; $i < $maxPages; $i++) {
        $offset = $i * 10;
        $url = "https://www.bing.com/search?q=" . urlencode($query) . "&first=" . ($offset + 1);
        $result = fetchUrl($url, 10);
        if (!$result['html']) break;

        $dom = new DOMDocument();
        @$dom->loadHTML($result['html']);
        foreach ($dom->getElementsByTagName('a') as $a) {
            $href = $a->getAttribute('href');
            if ($href && strpos($href, 'http') === 0 && !isSearchEngineUrl($href)) {
                $href = cleanUrl($href);
                if (isValidUrl($href) && !shouldIgnoreLink($href)) {
                    $links[] = $href;
                }
            }
        }
    }
    return array_unique($links);
}

function searchDuckDuckGo($query) {
    $links = [];
    $url = "https://html.duckduckgo.com/html/?q=" . urlencode($query);
    $result = fetchUrl($url, 10);
    if (!$result['html']) return $links;

    $dom = new DOMDocument();
    @$dom->loadHTML($result['html']);
    foreach ($dom->getElementsByTagName('a') as $a) {
        $href = $a->getAttribute('href');
        if ($href && strpos($href, 'http') === 0 && !isSearchEngineUrl($href)) {
            $href = cleanUrl($href);
            if (isValidUrl($href) && !shouldIgnoreLink($href)) {
                $links[] = $href;
            }
        }
    }
    return array_unique($links);
}

/**********************************************
 * 5) Inserir no banco pinfeeds
 **********************************************/
function insertPinFeed(PDO $pdo, $data) {
    // Verificar se já existe
    $check = $pdo->prepare("SELECT id FROM pinfeeds WHERE link = ? LIMIT 1");
    $check->execute([$data['url']]);
    if ($check->fetch()) {
        return false; // Já existe
    }

    $tags = extractTagsFromTitle($data['title']);

    $stmt = $pdo->prepare("
        INSERT INTO pinfeeds (title, description, link, thumbnail, source_favicon, tags, author, user_id, created_at, status)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, NOW(), 'active')
    ");

    try {
        $stmt->execute([
            $data['title'],
            $data['description'],
            $data['url'],
            $data['image'] ?: 'default_image.jpg',
            '',
            implode(',', array_slice($tags, 0, 5)),
            'Crawler Bot',
            1
        ]);
        return true;
    } catch (Exception $e) {
        return false;
    }
}

function extractTagsFromTitle($title) {
    $t = mb_strtolower(preg_replace("/[.,\/#!\$%\^&\*;:{}=\-_~()\[\]\"']/", "", $title));
    $words = explode(' ', $t);
    $common = ["a", "an", "the", "and", "or", "but", "in", "at", "on", "with", "to", "for", "is", "of", "that", "it", "by", "from", "as", "are", "was", "be", "has", "have", "will", "this"];
    $filtered = array_filter($words, function($x) use ($common) {
        return !in_array($x, $common) && mb_strlen($x) > 2;
    });
    return array_values($filtered);
}

/**********************************************
 * 6) DIRETÓRIO DOS PROCESSOS
 **********************************************/
$baseTempDir = __DIR__ . '/../temp/crawler_v7/';
if (!is_dir($baseTempDir)) @mkdir($baseTempDir, 0777, true);

// Limpar processos antigos (+1 dia)
foreach (glob($baseTempDir . '*') as $f) {
    if (is_dir($f) && (time() - filemtime($f) > 86400)) {
        array_map('unlink', glob("$f/*.*"));
        @rmdir($f);
    }
}

// Define process_id
if (!empty($_REQUEST['process_id'])) {
    $processID = preg_replace('/[^a-zA-Z0-9_\-]/', '', $_REQUEST['process_id']);
    if (!$processID) $processID = uniqid('crawl_', true);
} else {
    $processID = uniqid('crawl_', true);
}

$procDir = $baseTempDir . $processID . '/';
$progressFilePath = $procDir . 'progress.json';

/**********************************************
 * 7) AÇÕES DO MANAGER
 **********************************************/
if (isset($_GET['action'])) {
    $action = $_GET['action'];

    if ($action === 'manager') {
        renderManager($baseTempDir);
        exit;
    }

    if ($action === 'pause' && file_exists($progressFilePath)) {
        $data = json_decode(file_get_contents($progressFilePath), true);
        $data['is_paused'] = true;
        file_put_contents($progressFilePath, json_encode($data));
        header("Location: ?action=manager");
        exit;
    }

    if ($action === 'resume' && file_exists($progressFilePath)) {
        $data = json_decode(file_get_contents($progressFilePath), true);
        $data['is_paused'] = false;
        file_put_contents($progressFilePath, json_encode($data));
        header("Location: ?action=manager");
        exit;
    }

    if ($action === 'stop') {
        if (is_dir($procDir)) {
            array_map('unlink', glob($procDir . '/*.*'));
            @rmdir($procDir);
        }
        header("Location: ?action=manager");
        exit;
    }
}

/**********************************************
 * 8) PROCESSAR FORMULÁRIO
 **********************************************/
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['search_terms'])) {

    $searchTermsRaw = trim($_POST['search_terms'] ?? '');
    $seedUrlsRaw = trim($_POST['seed_urls'] ?? '');
    $maxDepth = max(1, (int)($_POST['max_depth'] ?? 2));
    $maxPosts = max(1, (int)($_POST['max_posts'] ?? 100));
    $maxSearchPages = max(1, (int)($_POST['max_search_pages'] ?? 3));
    $minRelevance = max(0, (int)($_POST['min_relevance'] ?? 2));
    $sameDomainOnly = isset($_POST['same_domain_only']);

    $engines = [];
    if (!empty($_POST['engine_bing'])) $engines[] = 'bing';
    if (!empty($_POST['engine_ddg'])) $engines[] = 'duckduckgo';

    // Processar termos
    $searchTerms = array_filter(array_map('trim', explode("\n", $searchTermsRaw)));

    // Processar URLs seed
    $seedUrls = [];
    foreach (explode("\n", $seedUrlsRaw) as $line) {
        $line = trim($line);
        if ($line && ($v = isValidUrl($line)) && !shouldIgnoreLink($v)) {
            $seedUrls[] = $v;
        }
    }

    // Criar diretório do processo
    if (is_dir($procDir)) {
        array_map('unlink', glob($procDir . '/*.*'));
    } else {
        @mkdir($procDir, 0777, true);
    }

    // Limpar seen_links
    $pdo->prepare("DELETE FROM " . TABLE_SEEN_LINKS . " WHERE process_id = ?")->execute([$processID]);

    // Coletar links iniciais
    $allLinks = [];

    // Adicionar seeds
    foreach ($seedUrls as $url) {
        $allLinks[$url] = ['url' => $url, 'depth' => 0];
    }

    // Buscar nos motores
    foreach ($searchTerms as $term) {
        foreach ($engines as $engine) {
            $found = [];
            if ($engine === 'bing') {
                $found = searchBing($term, $maxSearchPages);
            } elseif ($engine === 'duckduckgo') {
                $found = searchDuckDuckGo($term);
            }
            foreach ($found as $url) {
                if (!isset($allLinks[$url])) {
                    $allLinks[$url] = ['url' => $url, 'depth' => 0];
                }
            }
        }
    }

    $allLinks = array_values($allLinks);

    // Salvar estado inicial
    $data = [
        'total_links' => count($allLinks),
        'processed_links' => 0,
        'imported_links' => 0,
        'ignored_links' => 0,
        'error_links' => 0,
        'queue' => $allLinks,
        'search_terms' => $searchTerms,
        'max_depth' => $maxDepth,
        'max_posts' => $maxPosts,
        'min_relevance' => $minRelevance,
        'same_domain_only' => $sameDomainOnly,
        'start_time' => time(),
        'is_paused' => false,
        'logs' => []
    ];

    file_put_contents($progressFilePath, json_encode($data));

    header("Location: ?process_id=" . urlencode($processID));
    exit;
}

/**********************************************
 * 9) PROCESSAR CRAWLING (se já existe processo)
 **********************************************/
if (file_exists($progressFilePath)) {
    $data = json_decode(file_get_contents($progressFilePath), true);

    if (!$data) {
        renderForm($processID);
        exit;
    }

    // Se pausado, só mostra progresso
    if (!empty($data['is_paused'])) {
        renderProgress($data, $processID);
        exit;
    }

    // Se atingiu max_posts
    if ($data['imported_links'] >= $data['max_posts']) {
        $data['logs'][] = ['type' => 'success', 'msg' => "Meta de {$data['max_posts']} posts atingida!"];
        file_put_contents($progressFilePath, json_encode($data));
        renderProgress($data, $processID, true);
        exit;
    }

    // Se não tem mais links na fila
    if (empty($data['queue'])) {
        renderProgress($data, $processID, true);
        exit;
    }

    // Processar batch
    $batchSize = 5;
    $batch = array_splice($data['queue'], 0, $batchSize);

    foreach ($batch as $item) {
        $url = $item['url'];
        $depth = $item['depth'];

        // Verificar se já processamos
        $check = $pdo->prepare("SELECT id FROM " . TABLE_SEEN_LINKS . " WHERE process_id = ? AND link = ? LIMIT 1");
        $check->execute([$processID, $url]);
        if ($check->fetch()) {
            $data['ignored_links']++;
            continue;
        }

        // Marcar como visto
        try {
            $pdo->prepare("INSERT INTO " . TABLE_SEEN_LINKS . " (process_id, link, depth) VALUES (?, ?, ?)")
                ->execute([$processID, $url, $depth]);
        } catch (Exception $e) {
            $data['ignored_links']++;
            continue;
        }

        // Buscar página
        $result = fetchUrl($url);
        $data['processed_links']++;

        if (!$result['html'] || $result['code'] >= 400) {
            $data['error_links']++;
            $data['logs'][] = ['type' => 'error', 'msg' => "Erro: $url (HTTP {$result['code']})"];
            continue;
        }

        // Calcular relevância
        $score = calculateRelevanceScore($result['html'], $data['search_terms']);

        if ($score < $data['min_relevance']) {
            $data['ignored_links']++;
            $data['logs'][] = ['type' => 'warning', 'msg' => "Baixa relevância ($score): $url"];
            continue;
        }

        // Extrair metadados
        $meta = extractMetadata($result['html'], $url);
        $meta['url'] = $url;

        // Inserir no banco
        if (insertPinFeed($pdo, $meta)) {
            $data['imported_links']++;
            $data['logs'][] = ['type' => 'success', 'msg' => "Importado: " . substr($meta['title'], 0, 50)];
        } else {
            $data['ignored_links']++;
            $data['logs'][] = ['type' => 'warning', 'msg' => "Duplicado: $url"];
        }

        // Deep crawl - adicionar novos links se ainda tem profundidade
        if ($depth < $data['max_depth'] && $data['imported_links'] < $data['max_posts']) {
            $newLinks = extractLinks($result['html'], $url, $data['same_domain_only']);
            $added = 0;
            foreach (array_slice($newLinks, 0, 20) as $newUrl) {
                if (!isset($data['queue'][$newUrl])) {
                    $data['queue'][] = ['url' => $newUrl, 'depth' => $depth + 1];
                    $data['total_links']++;
                    $added++;
                }
            }
            if ($added > 0) {
                $data['logs'][] = ['type' => 'info', 'msg' => "Encontrados $added novos links em $url"];
            }
        }
    }

    // Manter apenas últimos 50 logs
    $data['logs'] = array_slice($data['logs'], -50);

    // Salvar progresso
    file_put_contents($progressFilePath, json_encode($data));

    // Renderizar e continuar
    renderProgress($data, $processID);
    exit;
}

// Se não existe processo, mostrar formulário
renderForm($processID);
exit;

/**********************************************
 * FUNÇÕES DE RENDERIZAÇÃO
 **********************************************/

function renderManager($baseTempDir) {
    $dirs = glob($baseTempDir . '*', GLOB_ONLYDIR);
    ?>
    <!DOCTYPE html>
    <html lang="pt-BR">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Gerenciador - Flowbot v7.0</title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
        <style>
            * { box-sizing: border-box; margin: 0; padding: 0; }
            body {
                font-family: 'Inter', sans-serif;
                background: linear-gradient(135deg, #0f0f23 0%, #1a1a3e 100%);
                color: #e0e0e0;
                min-height: 100vh;
                padding: 20px;
            }
            .container {
                max-width: 1000px;
                margin: 0 auto;
            }
            h1 {
                color: #00ff88;
                margin-bottom: 20px;
                font-size: 28px;
            }
            .btn {
                display: inline-block;
                padding: 8px 16px;
                border-radius: 6px;
                text-decoration: none;
                font-weight: 500;
                font-size: 14px;
                margin: 2px;
                transition: all 0.2s;
            }
            .btn-primary { background: #3b82f6; color: white; }
            .btn-success { background: #22c55e; color: white; }
            .btn-warning { background: #f59e0b; color: white; }
            .btn-danger { background: #ef4444; color: white; }
            .btn:hover { transform: translateY(-1px); opacity: 0.9; }
            table {
                width: 100%;
                border-collapse: collapse;
                background: rgba(255,255,255,0.05);
                border-radius: 12px;
                overflow: hidden;
            }
            th, td {
                padding: 15px;
                text-align: left;
                border-bottom: 1px solid rgba(255,255,255,0.1);
            }
            th { background: rgba(0,255,136,0.1); color: #00ff88; }
            .status-running { color: #22c55e; }
            .status-paused { color: #f59e0b; }
            .empty { text-align: center; padding: 40px; color: #888; }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>🚀 Gerenciador de Processos</h1>
            <p style="margin-bottom: 20px;">
                <a href="?" class="btn btn-primary">+ Novo Processo</a>
            </p>

            <?php if (empty($dirs)): ?>
                <div class="empty">Nenhum processo ativo.</div>
            <?php else: ?>
                <table>
                    <thead>
                        <tr>
                            <th>Processo</th>
                            <th>Progresso</th>
                            <th>Status</th>
                            <th>Ações</th>
                        </tr>
                    </thead>
                    <tbody>
                    <?php foreach ($dirs as $d):
                        $pid = basename($d);
                        $pf = $d . "/progress.json";
                        if (!file_exists($pf)) continue;
                        $dat = json_decode(file_get_contents($pf), true);
                        if (!$dat) continue;

                        $isPaused = !empty($dat['is_paused']);
                        $imported = $dat['imported_links'] ?? 0;
                        $total = $dat['total_links'] ?? 0;
                        $percent = $total > 0 ? round(($imported / $total) * 100) : 0;
                    ?>
                        <tr>
                            <td><strong><?= htmlspecialchars($pid) ?></strong></td>
                            <td><?= $imported ?> / <?= $total ?> (<?= $percent ?>%)</td>
                            <td class="<?= $isPaused ? 'status-paused' : 'status-running' ?>">
                                <?= $isPaused ? '⏸️ Pausado' : '▶️ Executando' ?>
                            </td>
                            <td>
                                <?php if ($isPaused): ?>
                                    <a href="?action=resume&process_id=<?= urlencode($pid) ?>" class="btn btn-success">▶ Resume</a>
                                <?php else: ?>
                                    <a href="?action=pause&process_id=<?= urlencode($pid) ?>" class="btn btn-warning">⏸ Pause</a>
                                <?php endif; ?>
                                <a href="?process_id=<?= urlencode($pid) ?>" class="btn btn-primary">👁 Ver</a>
                                <a href="?action=stop&process_id=<?= urlencode($pid) ?>" class="btn btn-danger" onclick="return confirm('Tem certeza?')">⏹ Stop</a>
                            </td>
                        </tr>
                    <?php endforeach; ?>
                    </tbody>
                </table>
            <?php endif; ?>
        </div>
    </body>
    </html>
    <?php
}

function renderForm($processID) {
    ?>
    <!DOCTYPE html>
    <html lang="pt-BR">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Crawler Unificado - Flowbot v7.0</title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
        <style>
            * { box-sizing: border-box; margin: 0; padding: 0; }
            body {
                font-family: 'Inter', sans-serif;
                background: linear-gradient(135deg, #0f0f23 0%, #1a1a3e 100%);
                color: #e0e0e0;
                min-height: 100vh;
                display: flex;
                align-items: center;
                justify-content: center;
                padding: 20px;
            }
            .form-container {
                background: rgba(255,255,255,0.05);
                backdrop-filter: blur(10px);
                border: 1px solid rgba(255,255,255,0.1);
                border-radius: 20px;
                padding: 40px;
                max-width: 600px;
                width: 100%;
            }
            h1 {
                color: #00ff88;
                margin-bottom: 10px;
                font-size: 28px;
                display: flex;
                align-items: center;
                gap: 10px;
            }
            .subtitle {
                color: #888;
                margin-bottom: 30px;
            }
            .form-group {
                margin-bottom: 20px;
            }
            label {
                display: block;
                margin-bottom: 8px;
                font-weight: 500;
                color: #ccc;
            }
            textarea, input[type="number"], input[type="text"] {
                width: 100%;
                padding: 12px 15px;
                border: 1px solid rgba(255,255,255,0.2);
                border-radius: 10px;
                background: rgba(0,0,0,0.3);
                color: #fff;
                font-family: 'Inter', sans-serif;
                font-size: 14px;
                transition: border-color 0.2s;
            }
            textarea:focus, input:focus {
                outline: none;
                border-color: #00ff88;
            }
            textarea { resize: vertical; min-height: 100px; }
            .checkbox-group {
                display: flex;
                flex-wrap: wrap;
                gap: 15px;
                margin-top: 10px;
            }
            .checkbox-label {
                display: flex;
                align-items: center;
                gap: 8px;
                cursor: pointer;
                padding: 10px 15px;
                background: rgba(0,0,0,0.2);
                border-radius: 8px;
                border: 1px solid rgba(255,255,255,0.1);
                transition: all 0.2s;
            }
            .checkbox-label:hover {
                border-color: #00ff88;
            }
            .checkbox-label input:checked + span {
                color: #00ff88;
            }
            .row {
                display: grid;
                grid-template-columns: 1fr 1fr;
                gap: 15px;
            }
            .btn {
                display: inline-block;
                padding: 15px 30px;
                border: none;
                border-radius: 10px;
                font-weight: 600;
                font-size: 16px;
                cursor: pointer;
                transition: all 0.2s;
                text-decoration: none;
            }
            .btn-primary {
                background: linear-gradient(135deg, #00ff88, #00cc6a);
                color: #000;
                width: 100%;
            }
            .btn-primary:hover {
                transform: translateY(-2px);
                box-shadow: 0 5px 20px rgba(0,255,136,0.3);
            }
            .btn-link {
                color: #00ff88;
                background: none;
                padding: 10px;
            }
            .actions {
                display: flex;
                justify-content: space-between;
                align-items: center;
                margin-top: 20px;
            }
            @media (max-width: 500px) {
                .row { grid-template-columns: 1fr; }
                .form-container { padding: 20px; }
            }
        </style>
    </head>
    <body>
        <div class="form-container">
            <h1>🚀 Crawler Unificado v7.0</h1>
            <p class="subtitle">Busca inteligente com múltiplos motores</p>

            <form method="POST" action="?process_id=<?= htmlspecialchars($processID) ?>">
                <div class="form-group">
                    <label>🔍 Termos de busca (um por linha)</label>
                    <textarea name="search_terms" placeholder="web scraping tutorial&#10;python crawler&#10;data extraction"></textarea>
                </div>

                <div class="form-group">
                    <label>🌐 URLs seed (opcional, um por linha)</label>
                    <textarea name="seed_urls" rows="3" placeholder="https://example.com&#10;https://blog.example.org"></textarea>
                </div>

                <div class="form-group">
                    <label>🔎 Motores de busca</label>
                    <div class="checkbox-group">
                        <label class="checkbox-label">
                            <input type="checkbox" name="engine_bing" value="1" checked>
                            <span>🔵 Bing</span>
                        </label>
                        <label class="checkbox-label">
                            <input type="checkbox" name="engine_ddg" value="1" checked>
                            <span>🦆 DuckDuckGo</span>
                        </label>
                    </div>
                </div>

                <div class="row">
                    <div class="form-group">
                        <label>📊 Profundidade máxima</label>
                        <input type="number" name="max_depth" value="2" min="1" max="5">
                    </div>
                    <div class="form-group">
                        <label>📝 Máx. posts a importar</label>
                        <input type="number" name="max_posts" value="100" min="1" max="10000">
                    </div>
                </div>

                <div class="row">
                    <div class="form-group">
                        <label>📄 Páginas por buscador</label>
                        <input type="number" name="max_search_pages" value="3" min="1" max="10">
                    </div>
                    <div class="form-group">
                        <label>⭐ Relevância mínima</label>
                        <input type="number" name="min_relevance" value="2" min="0" max="20">
                    </div>
                </div>

                <div class="form-group">
                    <div class="checkbox-group">
                        <label class="checkbox-label">
                            <input type="checkbox" name="same_domain_only" value="1">
                            <span>🔒 Apenas mesmo domínio (deep crawl)</span>
                        </label>
                    </div>
                </div>

                <div class="actions">
                    <a href="?action=manager" class="btn btn-link">📋 Gerenciador</a>
                    <button type="submit" class="btn btn-primary">▶️ Iniciar Crawler</button>
                </div>
            </form>
        </div>
    </body>
    </html>
    <?php
}

function renderProgress($data, $processID, $finished = false) {
    $total = max(1, $data['total_links']);
    $processed = $data['processed_links'];
    $imported = $data['imported_links'];
    $ignored = $data['ignored_links'];
    $errors = $data['error_links'];
    $queued = count($data['queue'] ?? []);

    $percent = round(($processed / $total) * 100);
    $elapsed = time() - $data['start_time'];
    $rate = $elapsed > 0 ? round($processed / $elapsed, 2) : 0;

    $isPaused = !empty($data['is_paused']);
    ?>
    <!DOCTYPE html>
    <html lang="pt-BR">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <?php if (!$finished && !$isPaused): ?>
        <meta http-equiv="refresh" content="2;url=?process_id=<?= urlencode($processID) ?>">
        <?php endif; ?>
        <title>Progresso - <?= htmlspecialchars($processID) ?></title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono&display=swap" rel="stylesheet">
        <style>
            * { box-sizing: border-box; margin: 0; padding: 0; }
            body {
                font-family: 'Inter', sans-serif;
                background: linear-gradient(135deg, #0f0f23 0%, #1a1a3e 100%);
                color: #e0e0e0;
                min-height: 100vh;
                padding: 20px;
            }
            .container {
                max-width: 900px;
                margin: 0 auto;
            }
            h1 {
                color: #00ff88;
                margin-bottom: 5px;
                font-size: 24px;
            }
            .subtitle { color: #888; margin-bottom: 20px; font-size: 14px; }
            .status-bar {
                display: flex;
                gap: 10px;
                margin-bottom: 20px;
            }
            .status-badge {
                padding: 8px 16px;
                border-radius: 20px;
                font-size: 14px;
                font-weight: 500;
            }
            .status-running { background: rgba(34,197,94,0.2); color: #22c55e; }
            .status-paused { background: rgba(245,158,11,0.2); color: #f59e0b; }
            .status-done { background: rgba(59,130,246,0.2); color: #3b82f6; }
            .btn {
                display: inline-block;
                padding: 8px 16px;
                border-radius: 6px;
                text-decoration: none;
                font-weight: 500;
                font-size: 14px;
                transition: all 0.2s;
            }
            .btn-primary { background: #3b82f6; color: white; }
            .btn-success { background: #22c55e; color: white; }
            .btn-warning { background: #f59e0b; color: white; }
            .btn-danger { background: #ef4444; color: white; }
            .progress-container {
                background: rgba(255,255,255,0.05);
                border-radius: 15px;
                padding: 30px;
                margin-bottom: 20px;
            }
            .progress-bar {
                height: 30px;
                background: rgba(0,0,0,0.3);
                border-radius: 15px;
                overflow: hidden;
                margin-bottom: 20px;
            }
            .progress-fill {
                height: 100%;
                background: linear-gradient(90deg, #00ff88, #00cc6a);
                border-radius: 15px;
                display: flex;
                align-items: center;
                justify-content: center;
                color: #000;
                font-weight: 600;
                transition: width 0.5s;
            }
            .stats-grid {
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
                gap: 15px;
            }
            .stat-card {
                background: rgba(0,0,0,0.2);
                padding: 15px;
                border-radius: 10px;
                text-align: center;
            }
            .stat-value {
                font-size: 28px;
                font-weight: 700;
                color: #00ff88;
            }
            .stat-value.imported { color: #22c55e; }
            .stat-value.ignored { color: #f59e0b; }
            .stat-value.errors { color: #ef4444; }
            .stat-label {
                font-size: 12px;
                color: #888;
                margin-top: 5px;
            }
            .terminal {
                background: #0a0a15;
                border-radius: 10px;
                padding: 15px;
                font-family: 'JetBrains Mono', monospace;
                font-size: 13px;
                max-height: 300px;
                overflow-y: auto;
            }
            .log-line {
                padding: 4px 0;
                border-bottom: 1px solid rgba(255,255,255,0.05);
            }
            .log-success { color: #22c55e; }
            .log-warning { color: #f59e0b; }
            .log-error { color: #ef4444; }
            .log-info { color: #3b82f6; }
            .actions {
                display: flex;
                gap: 10px;
                margin-bottom: 20px;
            }
        </style>
    </head>
    <body>
        <div class="container">
            <h1>🚀 Crawler v7.0</h1>
            <p class="subtitle">Processo: <?= htmlspecialchars($processID) ?></p>

            <div class="status-bar">
                <?php if ($finished): ?>
                    <span class="status-badge status-done">✅ Concluído</span>
                <?php elseif ($isPaused): ?>
                    <span class="status-badge status-paused">⏸️ Pausado</span>
                <?php else: ?>
                    <span class="status-badge status-running">▶️ Executando</span>
                <?php endif; ?>
            </div>

            <div class="actions">
                <?php if (!$finished): ?>
                    <?php if ($isPaused): ?>
                        <a href="?action=resume&process_id=<?= urlencode($processID) ?>" class="btn btn-success">▶ Continuar</a>
                    <?php else: ?>
                        <a href="?action=pause&process_id=<?= urlencode($processID) ?>" class="btn btn-warning">⏸ Pausar</a>
                    <?php endif; ?>
                <?php endif; ?>
                <a href="?action=stop&process_id=<?= urlencode($processID) ?>" class="btn btn-danger">⏹ Parar</a>
                <a href="?action=manager" class="btn btn-primary">📋 Gerenciador</a>
                <a href="?" class="btn btn-primary">+ Novo</a>
            </div>

            <div class="progress-container">
                <div class="progress-bar">
                    <div class="progress-fill" style="width: <?= $percent ?>%">
                        <?= $percent ?>%
                    </div>
                </div>

                <div class="stats-grid">
                    <div class="stat-card">
                        <div class="stat-value"><?= $processed ?></div>
                        <div class="stat-label">Processados</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value imported"><?= $imported ?></div>
                        <div class="stat-label">Importados</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value ignored"><?= $ignored ?></div>
                        <div class="stat-label">Ignorados</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value errors"><?= $errors ?></div>
                        <div class="stat-label">Erros</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value"><?= $queued ?></div>
                        <div class="stat-label">Na fila</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value"><?= $rate ?></div>
                        <div class="stat-label">Links/seg</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value"><?= gmdate("H:i:s", $elapsed) ?></div>
                        <div class="stat-label">Tempo</div>
                    </div>
                    <div class="stat-card">
                        <div class="stat-value"><?= round(memory_get_usage(true)/1024/1024, 1) ?></div>
                        <div class="stat-label">MB RAM</div>
                    </div>
                </div>
            </div>

            <div class="terminal">
                <?php
                $logs = array_reverse($data['logs'] ?? []);
                foreach (array_slice($logs, 0, 30) as $log):
                    $class = 'log-' . ($log['type'] ?? 'info');
                ?>
                    <div class="log-line <?= $class ?>"><?= htmlspecialchars($log['msg'] ?? '') ?></div>
                <?php endforeach; ?>
                <?php if (empty($logs)): ?>
                    <div class="log-line log-info">Aguardando processamento...</div>
                <?php endif; ?>
            </div>
        </div>
    </body>
    </html>
    <?php
}
