F-002 fix: Remove secrets and externalize config
This commit is contained in:
255
project/web/index/new/worker_bulk.php
Executable file
255
project/web/index/new/worker_bulk.php
Executable file
@@ -0,0 +1,255 @@
|
||||
<?php
|
||||
// ============================================================
|
||||
// worker_bulk.php — doble prompt: inglés y español independientes
|
||||
// ============================================================
|
||||
|
||||
require_once __DIR__ . '/bootstrap.php';
|
||||
|
||||
date_default_timezone_set('Europe/Madrid');
|
||||
mb_internal_encoding('UTF-8');
|
||||
@ini_set('max_execution_time', '0');
|
||||
@set_time_limit(0);
|
||||
|
||||
/* === CONFIG === */
|
||||
$OPENAI_API_KEY = trim((string) legacy_config('openai.api_key', ''));
|
||||
$OPENAI_MODEL = legacy_config('openai.model', 'gpt-4o-mini');
|
||||
$OPENAI_ENDPOINT = legacy_config('openai.endpoint', 'https://api.openai.com/v1/chat/completions');
|
||||
|
||||
$LANG_ES = (int) legacy_config('store.language_es', 4);
|
||||
$LANG_EN = (int) legacy_config('store.language_en', 1);
|
||||
$STORE_NAME = legacy_config('store.name', 'Natural - Mercado de Vida');
|
||||
|
||||
$LOG_FILE = legacy_config('paths.worker_log', __DIR__ . '/logs/worker.log');
|
||||
$PROMPT_EN_FILE = legacy_config('paths.prompt_en', __DIR__ . '/inc/prompt_en.md');
|
||||
$PROMPT_ES_FILE = legacy_config('paths.prompt_es', __DIR__ . '/inc/prompt_es.md');
|
||||
$BATCH_SIZE = (int) legacy_config('worker.batch_size', 20);
|
||||
$MIN_HTML_LENGTH = (int) legacy_config('worker.min_html_length', 500);
|
||||
|
||||
$SHARD_TOTAL = 1;
|
||||
$SHARD_INDEX = 0;
|
||||
|
||||
if (PHP_SAPI === 'cli' && isset($argv)) {
|
||||
foreach ($argv as $arg) {
|
||||
if (strpos($arg, '--shards=') === 0) {
|
||||
$value = (int)substr($arg, 9);
|
||||
if ($value > 0) $SHARD_TOTAL = min($value, 16); // evita saturar en exceso
|
||||
} elseif (strpos($arg, '--shard=') === 0) {
|
||||
$value = (int)substr($arg, 8);
|
||||
if ($value >= 0) $SHARD_INDEX = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($SHARD_INDEX >= $SHARD_TOTAL) {
|
||||
$SHARD_INDEX = $SHARD_TOTAL - 1;
|
||||
}
|
||||
if ($SHARD_INDEX < 0) $SHARD_INDEX = 0;
|
||||
|
||||
/* === FUNCIONES === */
|
||||
function log_msg($msg) {
|
||||
global $LOG_FILE;
|
||||
$time = date('Y-m-d H:i:s');
|
||||
file_put_contents($LOG_FILE, "[$time] $msg\n", FILE_APPEND);
|
||||
}
|
||||
|
||||
function obtener_respuesta($prompt, $key, $model, $max_tokens = 2000, $retries = 3) {
|
||||
$endpoint = legacy_config('openai.endpoint', 'https://api.openai.com/v1/chat/completions');
|
||||
|
||||
if ($key === '' || strpos($key, 'CHANGE_ME_') === 0) {
|
||||
log_msg('❌ Missing openai.api_key in config/local.php');
|
||||
return '';
|
||||
}
|
||||
|
||||
for ($i = 1; $i <= $retries; $i++) {
|
||||
$ch = curl_init($endpoint);
|
||||
$data = [
|
||||
'model' => $model,
|
||||
'messages' => [['role' => 'user', 'content' => $prompt]],
|
||||
'temperature' => 0.6,
|
||||
'max_tokens' => $max_tokens
|
||||
];
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_HTTPHEADER => [
|
||||
'Content-Type: application/json',
|
||||
'Authorization: Bearer ' . trim($key)
|
||||
],
|
||||
CURLOPT_POST => true,
|
||||
CURLOPT_POSTFIELDS => json_encode($data),
|
||||
CURLOPT_TIMEOUT => 180
|
||||
]);
|
||||
$result = curl_exec($ch);
|
||||
$err = curl_error($ch);
|
||||
$http = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($err) { log_msg("⚠️ cURL error ($i/$retries): $err"); sleep(2); continue; }
|
||||
if ($http !== 200) { log_msg("⚠️ HTTP $http on attempt $i"); sleep(3); continue; }
|
||||
|
||||
$json = json_decode($result, true);
|
||||
$txt = $json['choices'][0]['message']['content'] ?? '';
|
||||
if ($txt && mb_strlen(trim($txt)) > 50) return trim($txt);
|
||||
|
||||
log_msg("⚠️ Empty response attempt $i");
|
||||
sleep(2);
|
||||
}
|
||||
log_msg("❌ No response after $retries attempts");
|
||||
return '';
|
||||
}
|
||||
|
||||
function limpiar_html($t) {
|
||||
if (!$t) return '';
|
||||
|
||||
// 🔧 Quita fences Markdown (```html ... ```)
|
||||
$t = preg_replace('/^```[a-zA-Z]*\s*/m', '', $t);
|
||||
$t = preg_replace('/```$/m', '', $t);
|
||||
$t = preg_replace('/```[\s\S]*?```/', '', $t);
|
||||
|
||||
// Quita h1/h2 pero conserva contenido
|
||||
$t = preg_replace('/<\/?h1[^>]*>/i', '', $t);
|
||||
$t = preg_replace('/<\/?h2[^>]*>/i', '', $t);
|
||||
|
||||
// Convierte div y section a <p>
|
||||
$t = preg_replace('/<\s*div[^>]*>/i', '<p>', $t);
|
||||
$t = preg_replace('/<\s*\/div\s*>/i', '</p>', $t);
|
||||
$t = preg_replace('/<\s*section[^>]*>/i', '<p>', $t);
|
||||
$t = preg_replace('/<\s*\/section\s*>/i', '</p>', $t);
|
||||
|
||||
// Quita scripts y estilos
|
||||
$t = preg_replace('/<script.*?<\/script>/is', '', $t);
|
||||
$t = preg_replace('/<style.*?<\/style>/is', '', $t);
|
||||
|
||||
// Quita markdown residual
|
||||
$t = str_replace('```', '', $t);
|
||||
|
||||
// Limpieza de espacios
|
||||
$t = preg_replace('/[ \t]+/', ' ', $t);
|
||||
$t = preg_replace('/\n{2,}/', "\n", $t);
|
||||
return trim($t);
|
||||
}
|
||||
|
||||
/* Elimina emojis y normaliza espacios */
|
||||
function sanitize_for_db($text) {
|
||||
if ($text === null || $text === '') return '';
|
||||
$text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text);
|
||||
$text = preg_replace('/\s+/', ' ', $text);
|
||||
return trim($text);
|
||||
}
|
||||
|
||||
function sentence_case($text) {
|
||||
if (empty($text)) return '';
|
||||
$text = trim(mb_strtolower($text, 'UTF-8'));
|
||||
$first = mb_strtoupper(mb_substr($text, 0, 1, 'UTF-8'), 'UTF-8');
|
||||
return $first . mb_substr($text, 1, null, 'UTF-8');
|
||||
}
|
||||
|
||||
/* === DB === */
|
||||
$db = legacy_new_mysqli();
|
||||
if ($db->connect_errno) { log_msg('❌ DB: ' . $db->connect_error); exit; }
|
||||
|
||||
/* === Prompt base === */
|
||||
if (!file_exists($PROMPT_EN_FILE) || !file_exists($PROMPT_ES_FILE)) {
|
||||
log_msg("❌ Missing prompt files.");
|
||||
exit;
|
||||
}
|
||||
$PROMPT_EN = file_get_contents($PROMPT_EN_FILE);
|
||||
$PROMPT_ES = file_get_contents($PROMPT_ES_FILE);
|
||||
if (trim($PROMPT_EN) === '' || trim($PROMPT_ES) === '') {
|
||||
log_msg("❌ Empty prompt files.");
|
||||
exit;
|
||||
}
|
||||
|
||||
/* === Worker === */
|
||||
$shardLabel = $SHARD_TOTAL > 1 ? " | shard {$SHARD_INDEX}/{$SHARD_TOTAL}" : '';
|
||||
log_msg("🚀 Worker iniciado (modo doble prompt, batch={$BATCH_SIZE}{$shardLabel})");
|
||||
|
||||
$shardFilter = $SHARD_TOTAL > 1 ? " AND MOD(id, {$SHARD_TOTAL}) = {$SHARD_INDEX}" : '';
|
||||
$q = $db->query("SELECT * FROM oc_product_queue WHERE processed=0{$shardFilter} ORDER BY id ASC LIMIT $BATCH_SIZE");
|
||||
if (!$q || $q->num_rows === 0) { log_msg("⏸️ Cola vacía."); exit; }
|
||||
|
||||
while ($row = $q->fetch_assoc()) {
|
||||
$pid = (int)$row['product_id'];
|
||||
log_msg("🔄 Procesando producto $pid...");
|
||||
|
||||
$r = $db->query("
|
||||
SELECT p.ean, d.name
|
||||
FROM oc_product p
|
||||
LEFT JOIN oc_product_description d ON p.product_id=d.product_id AND d.language_id=$LANG_ES
|
||||
WHERE p.product_id=$pid
|
||||
");
|
||||
if (!$r || !$prod = $r->fetch_assoc()) {
|
||||
log_msg("⚠️ Producto $pid no encontrado");
|
||||
$db->query("UPDATE oc_product_queue SET processed=1, log='No encontrado' WHERE product_id=$pid");
|
||||
continue;
|
||||
}
|
||||
|
||||
$producto = $prod['name'];
|
||||
$ean = $prod['ean'];
|
||||
|
||||
// === Prompts personalizados ===
|
||||
$prompt_en = str_replace(['$producto', '$ean'], [$producto, $ean], $PROMPT_EN);
|
||||
$prompt_es = str_replace(['$producto', '$ean'], [$producto, $ean], $PROMPT_ES);
|
||||
|
||||
// === Generar EN ===
|
||||
$raw_en = obtener_respuesta($prompt_en, $OPENAI_API_KEY, $OPENAI_MODEL, 2200);
|
||||
file_put_contents(__DIR__ . "/logs/raw_openai_en_$pid.txt", $raw_en);
|
||||
|
||||
$clean_en = limpiar_html($raw_en);
|
||||
$html_en = sanitize_for_db($clean_en);
|
||||
$meta_en = sanitize_for_db(mb_substr(strip_tags($clean_en), 0, 255, 'UTF-8'));
|
||||
|
||||
// === Generar ES ===
|
||||
$raw_es = obtener_respuesta($prompt_es, $OPENAI_API_KEY, $OPENAI_MODEL, 2200);
|
||||
file_put_contents(__DIR__ . "/logs/raw_openai_es_$pid.txt", $raw_es);
|
||||
|
||||
$clean_es = limpiar_html($raw_es);
|
||||
$html_es = sanitize_for_db($clean_es);
|
||||
$meta_es = sanitize_for_db(mb_substr(strip_tags($clean_es), 0, 255, 'UTF-8'));
|
||||
|
||||
// === Longitud de contenido ===
|
||||
$len_en = mb_strlen($html_en);
|
||||
$len_es = mb_strlen($html_es);
|
||||
file_put_contents(__DIR__ . "/logs/html_debug_$pid.txt",
|
||||
"EN ($len_en):\n$html_en\n\nES ($len_es):\n$html_es"
|
||||
);
|
||||
|
||||
if ($len_en < $MIN_HTML_LENGTH || $len_es < $MIN_HTML_LENGTH) {
|
||||
log_msg("❌ Texto demasiado corto (EN=$len_en / ES=$len_es) PID $pid");
|
||||
$db->query("UPDATE oc_product_queue
|
||||
SET processed=1, processed_at=NOW(), result_en=0, result_es=0, needs_verify=1, log='Texto corto'
|
||||
WHERE product_id=$pid");
|
||||
continue;
|
||||
}
|
||||
|
||||
// === Guardar ===
|
||||
$u_title_en = sentence_case("$producto | $STORE_NAME");
|
||||
$u_h1_en = $producto;
|
||||
$u_h2_en = sentence_case("benefits and properties of $producto");
|
||||
|
||||
$u_title_es = sentence_case("comprar $producto | $STORE_NAME");
|
||||
$u_h1_es = $producto;
|
||||
$u_h2_es = sentence_case("propiedades y beneficios de $producto");
|
||||
|
||||
$stmt = $db->prepare("UPDATE oc_product_description
|
||||
SET description=?, meta_description=?, u_title=?, u_h1=?, u_h2=?
|
||||
WHERE product_id=? AND language_id=?");
|
||||
$stmt->bind_param('ssssssi', $html_en, $meta_en, $u_title_en, $u_h1_en, $u_h2_en, $pid, $LANG_EN);
|
||||
if (!$stmt->execute()) log_msg("❌ Error EN $pid: " . $stmt->error);
|
||||
$stmt->close();
|
||||
|
||||
$stmt = $db->prepare("UPDATE oc_product_description
|
||||
SET description=?, meta_description=?, u_title=?, u_h1=?, u_h2=?
|
||||
WHERE product_id=? AND language_id=?");
|
||||
$stmt->bind_param('ssssssi', $html_es, $meta_es, $u_title_es, $u_h1_es, $u_h2_es, $pid, $LANG_ES);
|
||||
if (!$stmt->execute()) log_msg("❌ Error ES $pid: " . $stmt->error);
|
||||
$stmt->close();
|
||||
|
||||
$db->query("UPDATE oc_product_queue
|
||||
SET processed=1, processed_at=NOW(), result_en=1, result_es=1, needs_verify=0, log='OK doble prompt'
|
||||
WHERE product_id=$pid");
|
||||
|
||||
log_msg("✅ $pid completado EN/ES (len EN=$len_en | ES=$len_es)");
|
||||
usleep(100000);
|
||||
}
|
||||
|
||||
log_msg("🏁 Worker finalizado.");
|
||||
Reference in New Issue
Block a user