cambios en la web

2025-10-12 17:51:14 +02:00 · 2025-10-12 17:51:14 +02:00 · a9c1e16bdd
commit a9c1e16bdd
parent 046a5ff369
6 changed files with 283 additions and 131 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -20,7 +20,7 @@ services:
    build:
      context: .
      args:
-        # La imagen llevará torch-cu121 por reutilizar Dockerfile; web no usa GPU.
+        # Reutiliza Dockerfile con torch-cu121; la web no usa GPU.
        TORCH_CUDA: cu121
    container_name: rss_web
    command: gunicorn --bind 0.0.0.0:8000 --workers 3 app:app
@ -33,9 +33,8 @@ services:
      - DB_USER=${DB_USER}
      - DB_PASS=${DB_PASS}
      - SECRET_KEY=${SECRET_KEY}
-      # Opcionales UI
+      # UI opcional
      # - NEWS_PER_PAGE=20
-      # Mostrar traducciones por defecto en la web
      - WEB_TRANSLATED_DEFAULT=1
      - DEFAULT_LANG=es
      - TRANSLATION_PREFERRED_LANGS=es
@ -78,31 +77,38 @@ services:
      - DB_USER=${DB_USER}
      - DB_PASS=${DB_PASS}

-      # --- Worker ---
+      # --- Worker (ajustes estables VRAM) ---
      - TARGET_LANGS=es
-      - TRANSLATOR_BATCH=4          # estable con 1.3B en 12 GB; ajusta si cambia la VRAM disponible
+      - TRANSLATOR_BATCH=8           # cuántas filas toma por ciclo
      - ENQUEUE=200
      - TRANSLATOR_SLEEP_IDLE=5

-      # Tokens (equilibrio calidad/VRAM ~<7GB)
-      - MAX_SRC_TOKENS=512
-      - MAX_NEW_TOKENS=256
+      # Tokens (seguro para NLLB-1.3B; evita >1024)
+      - MAX_SRC_TOKENS=680           # margen bajo el límite real del modelo
+      - MAX_NEW_TOKENS=400           # permite salidas más largas en cuerpos

-      # Beams: mejor título, cuerpo eficiente
-      - NUM_BEAMS_TITLE=3
-      - NUM_BEAMS_BODY=2
+      # Beams: mejor en títulos, eficiente en cuerpo
+      - NUM_BEAMS_TITLE=2
+      - NUM_BEAMS_BODY=1

      # Modelo NLLB 1.3B
      - UNIVERSAL_MODEL=facebook/nllb-200-1.3B

-      # Dispositivo (forzar GPU si está disponible; el worker cae a CPU si hay OOM)
+      # Chunking por frases (mejor coherencia en artículos largos)
+      - CHUNK_BY_SENTENCES=True
+      - CHUNK_MAX_TOKENS=700         # <= MAX_SRC_TOKENS (con margen)
+      - CHUNK_OVERLAP_SENTS=1        # solape de 1 frase para evitar cortes bruscos
+      - CLEAN_ARTICLE=1              # limpia “The post…”, “Læs også…”, etc.
+
+      # Dispositivo (usa GPU si hay; cae a CPU si hay OOM)
      - DEVICE=cuda

      # Rendimiento / estabilidad
      - PYTHONUNBUFFERED=1
      - HF_HOME=/root/.cache/huggingface
      - TOKENIZERS_PARALLELISM=false
-      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:64,garbage_collection_threshold:0.9
+      # Evita el assert del allocator de PyTorch
+      - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64,garbage_collection_threshold:0.9

      # GPU (requiere NVIDIA Container Toolkit en el host)
      - NVIDIA_VISIBLE_DEVICES=all