0) $SHARD_TOTAL = min($value, 16); // evita saturar en exceso } elseif (strpos($arg, '--shard=') === 0) { $value = (int)substr($arg, 8); if ($value >= 0) $SHARD_INDEX = $value; } } } if ($SHARD_INDEX >= $SHARD_TOTAL) { $SHARD_INDEX = $SHARD_TOTAL - 1; } if ($SHARD_INDEX < 0) $SHARD_INDEX = 0; /* === FUNCIONES === */ function log_msg($msg) { global $LOG_FILE; $time = date('Y-m-d H:i:s'); file_put_contents($LOG_FILE, "[$time] $msg\n", FILE_APPEND); } function obtener_respuesta($prompt, $key, $model, $max_tokens = 2000, $retries = 3) { $endpoint = legacy_config('openai.endpoint', 'https://api.openai.com/v1/chat/completions'); if ($key === '' || strpos($key, 'CHANGE_ME_') === 0) { log_msg('❌ Missing openai.api_key in config/local.php'); return ''; } for ($i = 1; $i <= $retries; $i++) { $ch = curl_init($endpoint); $data = [ 'model' => $model, 'messages' => [['role' => 'user', 'content' => $prompt]], 'temperature' => 0.6, 'max_tokens' => $max_tokens ]; curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_HTTPHEADER => [ 'Content-Type: application/json', 'Authorization: Bearer ' . trim($key) ], CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode($data), CURLOPT_TIMEOUT => 180 ]); $result = curl_exec($ch); $err = curl_error($ch); $http = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($err) { log_msg("⚠️ cURL error ($i/$retries): $err"); sleep(2); continue; } if ($http !== 200) { log_msg("⚠️ HTTP $http on attempt $i"); sleep(3); continue; } $json = json_decode($result, true); $txt = $json['choices'][0]['message']['content'] ?? ''; if ($txt && mb_strlen(trim($txt)) > 50) return trim($txt); log_msg("⚠️ Empty response attempt $i"); sleep(2); } log_msg("❌ No response after $retries attempts"); return ''; } function limpiar_html($t) { if (!$t) return ''; // 🔧 Quita fences Markdown (```html ... ```) $t = preg_replace('/^```[a-zA-Z]*\s*/m', '', $t); $t = preg_replace('/```$/m', '', $t); $t = preg_replace('/```[\s\S]*?```/', '', $t); // Quita h1/h2 pero conserva contenido $t = preg_replace('/<\/?h1[^>]*>/i', '', $t); $t = preg_replace('/<\/?h2[^>]*>/i', '', $t); // Convierte div y section a

$t = preg_replace('/<\s*div[^>]*>/i', '

', $t); $t = preg_replace('/<\s*\/div\s*>/i', '

', $t); $t = preg_replace('/<\s*section[^>]*>/i', '

', $t); $t = preg_replace('/<\s*\/section\s*>/i', '

', $t); // Quita scripts y estilos $t = preg_replace('//is', '', $t); $t = preg_replace('//is', '', $t); // Quita markdown residual $t = str_replace('```', '', $t); // Limpieza de espacios $t = preg_replace('/[ \t]+/', ' ', $t); $t = preg_replace('/\n{2,}/', "\n", $t); return trim($t); } /* Elimina emojis y normaliza espacios */ function sanitize_for_db($text) { if ($text === null || $text === '') return ''; $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text); $text = preg_replace('/\s+/', ' ', $text); return trim($text); } function sentence_case($text) { if (empty($text)) return ''; $text = trim(mb_strtolower($text, 'UTF-8')); $first = mb_strtoupper(mb_substr($text, 0, 1, 'UTF-8'), 'UTF-8'); return $first . mb_substr($text, 1, null, 'UTF-8'); } /* === DB === */ $db = legacy_new_mysqli(); if ($db->connect_errno) { log_msg('❌ DB: ' . $db->connect_error); exit; } /* === Prompt base === */ if (!file_exists($PROMPT_EN_FILE) || !file_exists($PROMPT_ES_FILE)) { log_msg("❌ Missing prompt files."); exit; } $PROMPT_EN = file_get_contents($PROMPT_EN_FILE); $PROMPT_ES = file_get_contents($PROMPT_ES_FILE); if (trim($PROMPT_EN) === '' || trim($PROMPT_ES) === '') { log_msg("❌ Empty prompt files."); exit; } /* === Worker === */ $shardLabel = $SHARD_TOTAL > 1 ? " | shard {$SHARD_INDEX}/{$SHARD_TOTAL}" : ''; log_msg("🚀 Worker iniciado (modo doble prompt, batch={$BATCH_SIZE}{$shardLabel})"); $shardFilter = $SHARD_TOTAL > 1 ? " AND MOD(id, {$SHARD_TOTAL}) = {$SHARD_INDEX}" : ''; $q = $db->query("SELECT * FROM oc_product_queue WHERE processed=0{$shardFilter} ORDER BY id ASC LIMIT $BATCH_SIZE"); if (!$q || $q->num_rows === 0) { log_msg("⏸️ Cola vacía."); exit; } while ($row = $q->fetch_assoc()) { $pid = (int)$row['product_id']; log_msg("🔄 Procesando producto $pid..."); $r = $db->query(" SELECT p.ean, d.name FROM oc_product p LEFT JOIN oc_product_description d ON p.product_id=d.product_id AND d.language_id=$LANG_ES WHERE p.product_id=$pid "); if (!$r || !$prod = $r->fetch_assoc()) { log_msg("⚠️ Producto $pid no encontrado"); $db->query("UPDATE oc_product_queue SET processed=1, log='No encontrado' WHERE product_id=$pid"); continue; } $producto = $prod['name']; $ean = $prod['ean']; // === Prompts personalizados === $prompt_en = str_replace(['$producto', '$ean'], [$producto, $ean], $PROMPT_EN); $prompt_es = str_replace(['$producto', '$ean'], [$producto, $ean], $PROMPT_ES); // === Generar EN === $raw_en = obtener_respuesta($prompt_en, $OPENAI_API_KEY, $OPENAI_MODEL, 2200); file_put_contents(__DIR__ . "/logs/raw_openai_en_$pid.txt", $raw_en); $clean_en = limpiar_html($raw_en); $html_en = sanitize_for_db($clean_en); $meta_en = sanitize_for_db(mb_substr(strip_tags($clean_en), 0, 255, 'UTF-8')); // === Generar ES === $raw_es = obtener_respuesta($prompt_es, $OPENAI_API_KEY, $OPENAI_MODEL, 2200); file_put_contents(__DIR__ . "/logs/raw_openai_es_$pid.txt", $raw_es); $clean_es = limpiar_html($raw_es); $html_es = sanitize_for_db($clean_es); $meta_es = sanitize_for_db(mb_substr(strip_tags($clean_es), 0, 255, 'UTF-8')); // === Longitud de contenido === $len_en = mb_strlen($html_en); $len_es = mb_strlen($html_es); file_put_contents(__DIR__ . "/logs/html_debug_$pid.txt", "EN ($len_en):\n$html_en\n\nES ($len_es):\n$html_es" ); if ($len_en < $MIN_HTML_LENGTH || $len_es < $MIN_HTML_LENGTH) { log_msg("❌ Texto demasiado corto (EN=$len_en / ES=$len_es) PID $pid"); $db->query("UPDATE oc_product_queue SET processed=1, processed_at=NOW(), result_en=0, result_es=0, needs_verify=1, log='Texto corto' WHERE product_id=$pid"); continue; } // === Guardar === $u_title_en = sentence_case("$producto | $STORE_NAME"); $u_h1_en = $producto; $u_h2_en = sentence_case("benefits and properties of $producto"); $u_title_es = sentence_case("comprar $producto | $STORE_NAME"); $u_h1_es = $producto; $u_h2_es = sentence_case("propiedades y beneficios de $producto"); $stmt = $db->prepare("UPDATE oc_product_description SET description=?, meta_description=?, u_title=?, u_h1=?, u_h2=? WHERE product_id=? AND language_id=?"); $stmt->bind_param('ssssssi', $html_en, $meta_en, $u_title_en, $u_h1_en, $u_h2_en, $pid, $LANG_EN); if (!$stmt->execute()) log_msg("❌ Error EN $pid: " . $stmt->error); $stmt->close(); $stmt = $db->prepare("UPDATE oc_product_description SET description=?, meta_description=?, u_title=?, u_h1=?, u_h2=? WHERE product_id=? AND language_id=?"); $stmt->bind_param('ssssssi', $html_es, $meta_es, $u_title_es, $u_h1_es, $u_h2_es, $pid, $LANG_ES); if (!$stmt->execute()) log_msg("❌ Error ES $pid: " . $stmt->error); $stmt->close(); $db->query("UPDATE oc_product_queue SET processed=1, processed_at=NOW(), result_en=1, result_es=1, needs_verify=0, log='OK doble prompt' WHERE product_id=$pid"); log_msg("✅ $pid completado EN/ES (len EN=$len_en | ES=$len_es)"); usleep(100000); } log_msg("🏁 Worker finalizado.");