<?php
session_start();

$host = 'localhost';
$db = 'digupdog_FEED';
$user = 'digupdog_FEEDadmin';
$pass = 'Raimundinho1';
$charset = 'utf8mb4';
$batchSize = 1000;
$progressFile = 'progress_index_2.txt';
$maxExecutionTime = 120;

ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
set_time_limit(0);
ini_set('memory_limit', '3G');
ob_start();

$startTime = time();
function shouldRestart($startTime, $maxExecutionTime) {
    return (time() - $startTime) > $maxExecutionTime;
}

echo "<html><head><meta http-equiv='refresh' content='120'></head><body><pre>";

try {
    $conn = new PDO("mysql:host=$host;dbname=$db;charset=$charset", $user, $pass);
    $conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);

    $totalStmt = $conn->query("SELECT COUNT(*) FROM pinfeeds");
    $totalRecords = $totalStmt->fetchColumn();
    echo "Total de registros a processar: $totalRecords\n";
    ob_flush();
    flush();

    $offset = file_exists($progressFile) ? (int)file_get_contents($progressFile) : 0;
    $processedRecords = $offset;

    for (; $offset < $totalRecords; $offset += $batchSize) {
        if (shouldRestart($startTime, $maxExecutionTime)) {
            file_put_contents($progressFile, $offset);
            echo "Reiniciando o script para evitar timeout...\n<script>window.location.reload();</script>";
            exit;
        }

        $stmt = $conn->prepare("SELECT id, title, description FROM pinfeeds LIMIT :offset, :batchSize");
        $stmt->bindValue(':offset', $offset, PDO::PARAM_INT);
        $stmt->bindValue(':batchSize', $batchSize, PDO::PARAM_INT);
        $stmt->execute();
        $feeds = $stmt->fetchAll(PDO::FETCH_ASSOC);

        foreach ($feeds as $feed) {
            indexContent($conn, $feed['id'], $feed['title'], $feed['description']);
            $processedRecords++;
        }

        file_put_contents($progressFile, $offset + $batchSize);
        
        echo sprintf(
            "Lote processado: %d de %d | Total processado: %d\n",
            min($offset + $batchSize, $totalRecords),
            $totalRecords,
            $processedRecords
        );
        ob_flush();
        flush();
    }

    if (file_exists($progressFile)) {
        unlink($progressFile);
    }

    echo "Processamento concluído.\n";

} catch (PDOException $e) {
    echo 'Erro na conexão com o banco de dados: ' . $e->getMessage();
}
echo "</pre></body></html>";

function sanitizeText($text) {
    return mb_substr(mb_convert_encoding(trim($text), 'UTF-8', 'UTF-8'), 0, 1024);
}

function generateNgrams($text, $n = 3) {
    $words = preg_split('/\s+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
    $ngrams = [];

    for ($i = 1; $i <= $n; $i++) {
        for ($j = 0; $j < count($words) - $i + 1; $j++) {
            $ngrams[] = implode(' ', array_slice($words, $j, $i));
        }
    }
    return $ngrams;
}

function indexContent($conn, $feedId, $title, $description) {
    $insertPhraseQuery = "INSERT INTO words (phrase) VALUES (:phrase) ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id)";
    $insertMappingQuery = "INSERT INTO title_description_index_2 (word_id, feed_id, occurrence_count)
                           VALUES (:word_id, :feed_id, 1)
                           ON DUPLICATE KEY UPDATE occurrence_count = occurrence_count + 1";

    $phraseStmt = $conn->prepare($insertPhraseQuery);
    $mappingStmt = $conn->prepare($insertMappingQuery);

    $phrases = array_merge(
        generateNgrams(sanitizeText($title)),
        generateNgrams(sanitizeText($description))
    );

    foreach ($phrases as $phrase) {
        if (!empty($phrase)) {
            $phraseStmt->execute([':phrase' => $phrase]);
            $wordId = $conn->lastInsertId();
            $mappingStmt->execute([':word_id' => $wordId, ':feed_id' => $feedId]);
        }
    }
}
?>
