Preparar repositorio para despliegue: código fuente limpio

2026-01-23 02:00:40 +01:00 · 2026-01-23 02:00:40 +01:00 · 3eca832c1a
commit 3eca832c1a
parent 866f5c432d
76 changed files with 5434 additions and 3496 deletions
--- a/workers/llm_categorizer_worker.py
+++ b/workers/llm_categorizer_worker.py
@ -0,0 +1,482 @@
+#!/usr/bin/env python3
+"""
+LLM Categorizer Worker - Categoriza noticias usando ExLlamaV2 (local)
+
+Este worker:
+1. Lee 10 noticias sin categorizar de la base de datos
+2. Las envía a un LLM local (ExLlamaV2) para que determine la categoría
+3. Actualiza la base de datos con las categorías asignadas
+
+Modelo recomendado para RTX 3060 12GB:
+- Mistral-7B-Instruct-v0.2 (GPTQ/AWQ/EXL2)
+- OpenHermes-2.5-Mistral-7B
+- Neural-Chat-7B
+"""
+
+import os
+import sys
+import time
+import logging
+import json
+from typing import List, Dict, Optional
+import psycopg2
+from psycopg2.extras import execute_values
+
+# Configuración de logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='[llm_categorizer] %(asctime)s %(levelname)s: %(message)s'
+)
+log = logging.getLogger(__name__)
+
+# Configuración de base de datos
+DB_CONFIG = {
+    "host": os.environ.get("DB_HOST", "localhost"),
+    "port": int(os.environ.get("DB_PORT", 5432)),
+    "dbname": os.environ.get("DB_NAME", "rss"),
+    "user": os.environ.get("DB_USER", "rss"),
+    "password": os.environ.get("DB_PASS", ""),
+}
+
+# Configuración del worker
+BATCH_SIZE = int(os.environ.get("LLM_BATCH_SIZE", 10))  # 10 noticias por lote
+SLEEP_IDLE = int(os.environ.get("LLM_SLEEP_IDLE", 30))  # segundos
+MODEL_PATH = os.environ.get("LLM_MODEL_PATH", "/app/models/llm")
+GPU_SPLIT = os.environ.get("LLM_GPU_SPLIT", "auto")
+MAX_SEQ_LEN = int(os.environ.get("LLM_MAX_SEQ_LEN", 4096))
+CACHE_MODE = os.environ.get("LLM_CACHE_MODE", "FP16")
+
+# Categorías predefinidas
+CATEGORIES = [
+    "Política",
+    "Economía",
+    "Tecnología",
+    "Ciencia",
+    "Salud",
+    "Deportes",
+    "Entretenimiento",
+    "Internacional",
+    "Nacional",
+    "Sociedad",
+    "Cultura",
+    "Medio Ambiente",
+    "Educación",
+    "Seguridad",
+    "Otros"
+]
+
+class ExLlamaV2Categorizer:
+    """Wrapper para el modelo ExLlamaV2"""
+    
+    def __init__(self, model_path: str):
+        """
+        Inicializa el modelo ExLlamaV2
+        
+        Args:
+            model_path: Ruta al modelo descargado (formato EXL2, GPTQ, etc.)
+        """
+        self.model_path = model_path
+        self.model = None
+        self.tokenizer = None
+        self.cache = None
+        self.generator = None
+        
+        log.info(f"Inicializando ExLlamaV2 desde: {model_path}")
+        self._load_model()
+    
+    def _load_model(self):
+        """Carga el modelo y componentes necesarios"""
+        try:
+            from exllamav2 import (
+                ExLlamaV2,
+                ExLlamaV2Config,
+                ExLlamaV2Cache,
+                ExLlamaV2Tokenizer,
+            )
+            from exllamav2.generator import (
+                ExLlamaV2StreamingGenerator,
+                ExLlamaV2Sampler
+            )
+            
+            # Configuración del modelo
+            config = ExLlamaV2Config()
+            config.model_dir = self.model_path
+            config.prepare()
+            
+            # Optimizaciones para RTX 3060 12GB
+            config.max_seq_len = MAX_SEQ_LEN
+            config.scale_pos_emb = 1.0
+            config.scale_alpha_value = 1.0
+            
+            # Cargar modelo
+            self.model = ExLlamaV2(config)
+            
+            log.info("Cargando modelo en GPU...")
+            
+            # Configurar GPU split (auto para single GPU)
+            if GPU_SPLIT.lower() == "auto":
+                self.model.load_autosplit(cache=None)
+            else:
+                split = [float(x.strip()) for x in GPU_SPLIT.split(",")]
+                self.model.load(split)
+            
+            # Tokenizer
+            self.tokenizer = ExLlamaV2Tokenizer(config)
+            
+            # Cache
+            if CACHE_MODE == "FP16":
+                self.cache = ExLlamaV2Cache(self.model, lazy=True)
+            elif CACHE_MODE == "Q4":
+                from exllamav2 import ExLlamaV2Cache_Q4
+                self.cache = ExLlamaV2Cache_Q4(self.model, lazy=True)
+            else:
+                self.cache = ExLlamaV2Cache(self.model, lazy=True)
+            
+            # Generator
+            self.generator = ExLlamaV2StreamingGenerator(
+                self.model, 
+                self.cache, 
+                self.tokenizer
+            )
+            
+            # Configuración de sampling
+            self.settings = ExLlamaV2Sampler.Settings()
+            self.settings.temperature = 0.1  # Determinista para clasificación
+            self.settings.top_k = 10
+            self.settings.top_p = 0.9
+            self.settings.token_repetition_penalty = 1.05
+            
+            log.info("✓ Modelo cargado exitosamente")
+            
+        except ImportError as e:
+            log.error(f"Error: ExLlamaV2 no está instalado. Instalar con: pip install exllamav2")
+            log.error(f"Detalles: {e}")
+            raise
+        except Exception as e:
+            log.error(f"Error cargando modelo: {e}")
+            raise
+    
+    def categorize_news(self, news_items: List[Dict]) -> List[Dict]:
+        """
+        Categoriza un lote de noticias
+        
+        Args:
+            news_items: Lista de diccionarios con 'id', 'titulo', 'resumen'
+            
+        Returns:
+            Lista de diccionarios con 'id', 'categoria', 'confianza'
+        """
+        results = []
+        
+        for item in news_items:
+            categoria, confianza = self._categorize_single(
+                item['titulo'], 
+                item['resumen']
+            )
+            
+            results.append({
+                'id': item['id'],
+                'categoria': categoria,
+                'confianza': confianza
+            })
+            
+            log.info(f"Noticia {item['id']}: {categoria} (confianza: {confianza:.2f})")
+        
+        return results
+    
+    def _categorize_single(self, titulo: str, resumen: str) -> tuple:
+        """
+        Categoriza una noticia individual
+        
+        Returns:
+            (categoria, confianza)
+        """
+        # Construir prompt
+        prompt = self._build_prompt(titulo, resumen)
+        
+        # Generar respuesta
+        try:
+            self.generator.set_stop_conditions([self.tokenizer.eos_token_id])
+            
+            output = self.generator.generate_simple(
+                prompt,
+                self.settings,
+                max_new_tokens=50,  # Solo necesitamos la categoría
+                seed=1234
+            )
+            
+            # Parsear respuesta
+            categoria, confianza = self._parse_response(output)
+            
+            return categoria, confianza
+            
+        except Exception as e:
+            log.error(f"Error durante la generación: {e}")
+            return "Otros", 0.0
+    
+    def _build_prompt(self, titulo: str, resumen: str) -> str:
+        """
+        Construye el prompt para el LLM
+        
+        Usa el formato Mistral/ChatML
+        """
+        categories_str = ", ".join(CATEGORIES)
+        
+        # Prompt optimizado para clasificación
+        prompt = f"""<s>[INST] Eres un asistente experto en clasificación de noticias.
+
+Tu tarea es categorizar la siguiente noticia en UNA de estas categorías:
+{categories_str}
+
+Reglas:
+1. Responde SOLO con el nombre de la categoría
+2. Elige la categoría que MEJOR represente el contenido principal
+3. Si no estás seguro, usa "Otros"
+
+Noticia:
+Título: {titulo}
+Contenido: {resumen[:500]}
+
+Categoría: [/INST]"""
+        
+        return prompt
+    
+    def _parse_response(self, output: str) -> tuple:
+        """
+        Parsea la respuesta del LLM
+        
+        Returns:
+            (categoria, confianza)
+        """
+        # Limpiar respuesta
+        response = output.strip()
+        
+        # Buscar la categoría en la respuesta
+        for cat in CATEGORIES:
+            if cat.lower() in response.lower():
+                # Confianza simple basada en si es exacta
+                confianza = 0.9 if cat in response else 0.7
+                return cat, confianza
+        
+        # Si no se encuentra, usar "Otros"
+        return "Otros", 0.5
+
+
+def get_db_connection():
+    """Obtiene conexión a la base de datos"""
+    return psycopg2.connect(**DB_CONFIG)
+
+
+def initialize_schema(conn):
+    """
+    Asegura que existan las tablas necesarias
+    """
+    log.info("Verificando esquema de base de datos...")
+    
+    with conn.cursor() as cur:
+        # Agregar columnas si no existen
+        cur.execute("""
+            ALTER TABLE noticias 
+            ADD COLUMN IF NOT EXISTS llm_categoria VARCHAR(100),
+            ADD COLUMN IF NOT EXISTS llm_confianza FLOAT,
+            ADD COLUMN IF NOT EXISTS llm_processed BOOLEAN DEFAULT FALSE,
+            ADD COLUMN IF NOT EXISTS llm_processed_at TIMESTAMP;
+        """)
+        
+        # Crear índice para procesamiento eficiente
+        cur.execute("""
+            CREATE INDEX IF NOT EXISTS idx_noticias_llm_processed 
+            ON noticias(llm_processed) 
+            WHERE llm_processed = FALSE;
+        """)
+        
+        conn.commit()
+    
+    log.info("✓ Esquema verificado")
+
+
+def fetch_unprocessed_news(conn, limit: int = 10) -> List[Dict]:
+    """
+    Obtiene noticias sin procesar, agrupadas por feed_id
+    
+    Estrategia:
+    1. Obtiene una muestra de feeds con noticias pendientes
+    2. Selecciona un feed aleatorio de esa muestra
+    3. Obtiene hasta 'limit' noticias de ese feed específico
+    
+    Args:
+        conn: Conexión a la base de datos
+        limit: Número máximo de noticias a obtener
+        
+    Returns:
+        Lista de diccionarios con noticias
+    """
+    import random
+    
+    with conn.cursor() as cur:
+        # Paso 1: Identificar feeds candidatos
+        # Tomamos una muestra de las noticias más recientes pendientes
+        cur.execute("""
+            SELECT feed_id
+            FROM noticias
+            WHERE llm_processed = FALSE
+            ORDER BY fecha DESC
+            LIMIT 100
+        """)
+        
+        candidates = cur.fetchall()
+        
+        if not candidates:
+            return []
+            
+        # Extraer IDs únicos de feeds y elegir uno al azar
+        # Esto evita que un solo feed sature el worker (Round Robin pseudo-aleatorio)
+        unique_feeds = list(set(r[0] for r in candidates if r[0] is not None))
+        
+        if not unique_feeds:
+            return []
+            
+        target_feed_id = random.choice(unique_feeds)
+        
+        # Paso 2: Obtener lote del feed seleccionado
+        cur.execute("""
+            SELECT id, titulo, resumen
+            FROM noticias
+            WHERE llm_processed = FALSE
+              AND feed_id = %s
+              AND titulo IS NOT NULL
+              AND resumen IS NOT NULL
+            ORDER BY fecha DESC
+            LIMIT %s
+        """, (target_feed_id, limit))
+        
+        rows = cur.fetchall()
+        
+        log.info(f"Seleccionado feed_id {target_feed_id} para procesamiento ({len(rows)} items)")
+    
+    return [
+        {
+            'id': row[0],
+            'titulo': row[1],
+            'resumen': row[2]
+        }
+        for row in rows
+    ]
+
+
+def update_categorizations(conn, results: List[Dict]):
+    """
+    Actualiza las categorizaciones en la base de datos
+    
+    Args:
+        conn: Conexión a la base de datos
+        results: Lista de resultados de categorización
+    """
+    if not results:
+        return
+    
+    with conn.cursor() as cur:
+        # Preparar datos para update
+        update_data = [
+            (
+                r['categoria'],
+                r['confianza'],
+                r['id']
+            )
+            for r in results
+        ]
+        
+        # Actualizar en lote
+        execute_values(cur, """
+            UPDATE noticias AS n
+            SET 
+                llm_categoria = v.categoria,
+                llm_confianza = v.confianza,
+                llm_processed = TRUE,
+                llm_processed_at = NOW()
+            FROM (VALUES %s) AS v(categoria, confianza, id)
+            WHERE n.id = v.id
+        """, update_data)
+        
+        conn.commit()
+    
+    log.info(f"✓ Actualizadas {len(results)} noticias")
+
+
+def main():
+    """Main loop del worker"""
+    log.info("=== Iniciando LLM Categorizer Worker ===")
+    log.info(f"Batch size: {BATCH_SIZE}")
+    log.info(f"Model path: {MODEL_PATH}")
+    
+    # Verificar que existe el modelo
+    if not os.path.exists(os.path.join(MODEL_PATH, "config.json")):
+        log.error(f"❌ Error: No se encuentra el modelo (config.json) en {MODEL_PATH}")
+        log.error(f"Por favor descarga un modelo compatible (ej: Mistral-7B-Instruct-v0.2-GPTQ)")
+        log.error(f"Ejecuta: ./scripts/download_llm_model.sh")
+        # Dormir para no saturar logs si reinicia rápido
+        time.sleep(60)
+        sys.exit(1)
+    
+    # Inicializar esquema de base de datos
+    try:
+        with get_db_connection() as conn:
+            initialize_schema(conn)
+    except Exception as e:
+        log.error(f"❌ Error inicializando esquema: {e}")
+        sys.exit(1)
+    
+    # Cargar modelo
+    try:
+        categorizer = ExLlamaV2Categorizer(MODEL_PATH)
+    except Exception as e:
+        log.error(f"❌ Error cargando modelo: {e}")
+        sys.exit(1)
+    
+    log.info("✓ Worker inicializado correctamente")
+    log.info("Entrando en loop principal...")
+    
+    # Main loop
+    while True:
+        try:
+            with get_db_connection() as conn:
+                # Obtener noticias sin procesar
+                news_items = fetch_unprocessed_news(conn, BATCH_SIZE)
+                
+                if not news_items:
+                    log.debug(f"No hay noticias pendientes. Esperando {SLEEP_IDLE}s...")
+                    time.sleep(SLEEP_IDLE)
+                    continue
+                
+                log.info(f"Procesando {len(news_items)} noticias...")
+                
+                # Categorizar
+                results = categorizer.categorize_news(news_items)
+                
+                # Actualizar base de datos
+                update_categorizations(conn, results)
+                
+                # Estadísticas
+                categories_count = {}
+                for r in results:
+                    cat = r['categoria']
+                    categories_count[cat] = categories_count.get(cat, 0) + 1
+                
+                log.info(f"Distribución: {categories_count}")
+                
+                # Si procesamos el lote completo, continuar inmediatamente
+                if len(news_items) < BATCH_SIZE:
+                    time.sleep(SLEEP_IDLE)
+                    
+        except KeyboardInterrupt:
+            log.info("Deteniendo worker...")
+            break
+        except Exception as e:
+            log.exception(f"❌ Error en loop principal: {e}")
+            time.sleep(SLEEP_IDLE)
+    
+    log.info("Worker finalizado")
+
+
+if __name__ == "__main__":
+    main()
--- a/workers/simple_categorizer_worker.py
+++ b/workers/simple_categorizer_worker.py
@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+"""
+Simple Categorizer Worker - Categoriza noticias usando keywords
+Procesa 10 noticias por feed de manera balanceada
+"""
+
+import os
+import sys
+import time
+import logging
+import random
+import psycopg2
+from psycopg2.extras import execute_values
+from typing import List, Dict
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='[simple_categorizer] %(asctime)s %(levelname)s: %(message)s'
+)
+log = logging.getLogger(__name__)
+
+DB_CONFIG = {
+    "host": os.environ.get("DB_HOST", "localhost"),
+    "port": int(os.environ.get("DB_PORT", 5432)),
+    "dbname": os.environ.get("DB_NAME", "rss"),
+    "user": os.environ.get("DB_USER", "rss"),
+    "password": os.environ.get("DB_PASS", ""),
+}
+
+BATCH_SIZE = int(os.environ.get("CATEGORIZER_BATCH_SIZE", 10))
+SLEEP_IDLE = int(os.environ.get("CATEGORIZER_SLEEP_IDLE", 5))
+
+# Mapeo de keywords a categorías
+CATEGORY_KEYWORDS = {
+    "Ciencia": ["científico", "investigación", "estudio", "descubrimiento", "laboratorio", "experimento", "universidad", "研究", "science"],
+    "Cultura": ["museo", "arte", "exposición", "artista", "cultura", "patrimonio", "文化", "culture"],
+    "Deportes": ["fútbol", "deporte", "equipo", "partido", "jugador", "liga", "campeonato", "运动", "sport", "football"],
+    "Economía": ["economía", "mercado", "empresa", "inversión", "bolsa", "financiero", "banco", "经济", "economy"],
+    "Educación": ["educación", "escuela", "universidad", "estudiante", "profesor", "教育", "education"],
+    "Entretenimiento": ["cine", "película", "actor", "música", "concierto", "娱乐", "entertainment", "film"],
+    "Internacional": ["internacional", "país", "gobierno", "ministro", "外交", "international", "foreign"],
+    "Medio Ambiente": ["clima", "ambiental", "contaminación", "ecología", "sostenible", "环境", "environment", "climate"],
+    "Política": ["político", "gobierno", "presidente", "ministro", "parlamento", "elecciones", "政治", "politics"],
+    "Salud": ["salud", "hospital", "médico", "enfermedad", "tratamiento", "健康", "health"],
+    "Sociedad": ["social", "comunidad", "ciudadano", "población", "社会", "society"],
+    "Tecnología": ["tecnología", "digital", "software", "internet", "app", "技术", "technology", "tech"],
+}
+
+
+def get_db_connection():
+    return psycopg2.connect(**DB_CONFIG)
+
+
+def categorize_by_keywords(titulo: str, resumen: str) -> tuple:
+    """
+    Returns: (category_name, confidence)
+    """
+    text = f"{titulo} {resumen}".lower()
+    
+    scores = {}
+    for category, keywords in CATEGORY_KEYWORDS.items():
+        score = sum(1 for kw in keywords if kw.lower() in text)
+        if score > 0:
+            scores[category] = score
+    
+    if not scores:
+        return "Sociedad", 0.3  # Default
+    
+    best_category = max(scores, key=scores.get)
+    max_score = scores[best_category]
+    confidence = min(0.95, 0.5 + (max_score * 0.1))
+    
+    return best_category, confidence
+
+
+def fetch_unprocessed_news(conn, limit: int = 10) -> List[Dict]:
+    """Obtiene noticias que tienen traducción al español pero no han sido categorizadas"""
+    with conn.cursor() as cur:
+        # Obtener fuentes con noticias pendientes que tengan traducción 'done' en español
+        cur.execute("""
+            SELECT n.fuente_nombre
+            FROM noticias n
+            JOIN traducciones t ON n.id = t.noticia_id
+            WHERE n.llm_processed = FALSE 
+              AND t.lang_to = 'es'
+              AND t.status = 'done'
+            ORDER BY n.fecha DESC
+            LIMIT 100
+        """)
+        
+        candidates = cur.fetchall()
+        if not candidates:
+            return []
+        
+        unique_sources = list(set(r[0] for r in candidates if r[0] is not None))
+        if not unique_sources:
+            return []
+        
+        target_source = random.choice(unique_sources)
+        
+        # Obtener lote de la fuente seleccionada usando el texto traducido
+        cur.execute("""
+            SELECT n.id, t.titulo_trad, t.resumen_trad
+            FROM noticias n
+            JOIN traducciones t ON n.id = t.noticia_id
+            WHERE n.llm_processed = FALSE
+              AND n.fuente_nombre = %s
+              AND t.lang_to = 'es'
+              AND t.status = 'done'
+              AND t.titulo_trad IS NOT NULL
+            ORDER BY n.fecha DESC
+            LIMIT %s
+        """, (target_source, limit))
+        
+        rows = cur.fetchall()
+        log.info(f"Seleccionada fuente '{target_source}' → {len(rows)} items (USANDO TRADUCCIÓN ES)")
+    
+    return [{'id': r[0], 'titulo': r[1], 'resumen': r[2]} for r in rows]
+
+
+def update_categorizations(conn, results: List[Dict]):
+    """Actualiza las categorizaciones"""
+    if not results:
+        return
+    
+    with conn.cursor() as cur:
+        update_data = [
+            (r['categoria'], r['confianza'], r['id'])
+            for r in results
+        ]
+        
+        execute_values(cur, """
+            UPDATE noticias AS n
+            SET 
+                llm_categoria = v.categoria,
+                llm_confianza = v.confianza,
+                llm_processed = TRUE,
+                llm_processed_at = NOW()
+            FROM (VALUES %s) AS v(categoria, confianza, id)
+            WHERE n.id = v.id
+        """, update_data)
+        
+        conn.commit()
+    
+    log.info(f"✓ {len(results)} noticias categorizadas")
+
+
+def main():
+    log.info("=== Simple Categorizer Worker ===")
+    log.info(f"Batch: {BATCH_SIZE} | Sleep: {SLEEP_IDLE}s")
+    
+    # Inicializar esquema
+    try:
+        log.info("Conectando a la base de datos para verificar esquema...")
+        with get_db_connection() as conn:
+            log.info("Conexión establecida. Verificando columnas...")
+            with conn.cursor() as cur:
+                cur.execute("""
+                    ALTER TABLE noticias 
+                    ADD COLUMN IF NOT EXISTS llm_categoria VARCHAR(100),
+                    ADD COLUMN IF NOT EXISTS llm_confianza FLOAT,
+                    ADD COLUMN IF NOT EXISTS llm_processed BOOLEAN DEFAULT FALSE,
+                    ADD COLUMN IF NOT EXISTS llm_processed_at TIMESTAMP;
+                """)
+                cur.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_noticias_llm_processed 
+                    ON noticias(llm_processed) 
+                    WHERE llm_processed = FALSE;
+                """)
+                conn.commit()
+        log.info("✓ Esquema verificado")
+    except Exception as e:
+        log.error(f"❌ Error inicializando: {e}")
+        sys.exit(1)
+    
+    log.info("Entrando en loop principal...")
+    
+    while True:
+        try:
+            with get_db_connection() as conn:
+                news_items = fetch_unprocessed_news(conn, BATCH_SIZE)
+                
+                if not news_items:
+                    log.debug(f"Sin noticias pendientes. Sleep {SLEEP_IDLE}s...")
+                    time.sleep(SLEEP_IDLE)
+                    continue
+                
+                log.info(f"Procesando {len(news_items)} noticias...")
+                
+                results = []
+                for item in news_items:
+                    categoria, confianza = categorize_by_keywords(
+                        item['titulo'], 
+                        item['resumen']
+                    )
+                    results.append({
+                        'id': item['id'],
+                        'categoria': categoria,
+                        'confianza': confianza
+                    })
+                
+                update_categorizations(conn, results)
+                
+                # Estadísticas
+                stats = {}
+                for r in results:
+                    cat = r['categoria']
+                    stats[cat] = stats.get(cat, 0) + 1
+                
+                log.info(f"Distribución: {stats}")
+                
+                if len(news_items) < BATCH_SIZE:
+                    time.sleep(SLEEP_IDLE)
+                    
+        except KeyboardInterrupt:
+            log.info("Deteniendo worker...")
+            break
+        except Exception as e:
+            log.exception(f"❌ Error: {e}")
+            time.sleep(SLEEP_IDLE)
+    
+    log.info("Worker finalizado")
+
+
+if __name__ == "__main__":
+    main()
--- a/workers/translation_worker.py
+++ b/workers/translation_worker.py
@ -73,7 +73,7 @@ MAX_NEW_TOKENS_TITLE = _env_int("MAX_NEW_TOKENS_TITLE", 96)
 MAX_NEW_TOKENS_BODY = _env_int("MAX_NEW_TOKENS_BODY", 512)

 NUM_BEAMS_TITLE = _env_int("NUM_BEAMS_TITLE", 2)
-NUM_BEAMS_BODY = _env_int("NUM_BEAMS_BODY", 1)
+NUM_BEAMS_BODY = _env_int("NUM_BEAMS_BODY", 2)

 # HuggingFace model name (used for tokenizer)
 UNIVERSAL_MODEL = _env_str("UNIVERSAL_MODEL", "facebook/nllb-200-distilled-600M")
@ -304,6 +304,8 @@ def _translate_texts(src, tgt, texts, beams, max_new_tokens):
        target_prefix=target_prefix,
        beam_size=beams,
        max_decoding_length=max_new,
+        repetition_penalty=1.1,
+        no_repeat_ngram_size=4,
    )
    dt = time.time() - start