Preparar repositorio para despliegue: código fuente limpio

2026-01-23 02:00:40 +01:00 · 2026-01-23 02:00:40 +01:00 · 3eca832c1a
commit 3eca832c1a
parent 866f5c432d
76 changed files with 5434 additions and 3496 deletions
--- a/routers/home.py
+++ b/routers/home.py
@ -6,14 +6,14 @@ from utils.auth import get_current_user
 from config import DEFAULT_TRANSLATION_LANG, DEFAULT_LANG, NEWS_PER_PAGE_DEFAULT
 from models.categorias import get_categorias
 from models.paises import get_paises
-from models.noticias import buscar_noticias, buscar_noticias_semantica
-from cache import cached
+from models.noticias import buscar_noticias

 home_bp = Blueprint("home", __name__)

@home_bp.route("/")
@home_bp.route("/home")
 def home():
+    """Simplified home page to avoid timeouts."""
    page = max(int(request.args.get("page", 1)), 1)
    per_page = int(request.args.get("per_page", NEWS_PER_PAGE_DEFAULT))
    per_page = min(max(per_page, 10), 100)
@ -27,7 +27,6 @@ def home():
    lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]
    
    use_tr = not bool(request.args.get("orig"))
-    fecha_str = request.args.get("fecha") or ""
    fecha_filtro = None
    if fecha_str:
        try:
@ -35,129 +34,28 @@ def home():
        except ValueError:
            fecha_filtro = None

-    from utils.qdrant_search import semantic_search
+    # Búsqueda semántica solo si se solicita explícitamente y hay query
+    use_semantic = bool(request.args.get("semantic")) and bool(q)
    
-    # Logic for semantic search enabled by default if query exists, unless explicitly disabled
-    # If the user passed 'semantic=' explicitly as empty string, it might mean False, but for UX speed default to True is better.
-    # However, let's respect the flag if it's explicitly 'false' or '0'.
-    # If key is missing, default to True. If key is present but empty, treat as False (standard HTML form behavior unfortunately).
-    # But wait, the previous log showed 'semantic='. HTML checkboxes send nothing if unchecked, 'on' if checked.
-    # So if it appears as empty string, it might be a hidden input or unassigned var.
-    # Let's check 'semantic' param presence.
-    raw_semantic = request.args.get("semantic")
-    if raw_semantic is None:
-        use_semantic = True # Default to semantic if not specified
-    elif raw_semantic == "" or raw_semantic.lower() in ["false", "0", "off"]:
-        use_semantic = False
-    else:
-        use_semantic = True
-
    with get_read_conn() as conn:
        conn.autocommit = True
        categorias = get_categorias(conn)
        paises = get_paises(conn)

-        noticias = []
-        total_results = 0
-        total_pages = 0
-        tags_por_tr = {}
-
-        # 1. Intentar búsqueda semántica si hay query y está habilitado
-        semantic_success = False
-        if use_semantic and q:
-            try:
-                # Obtener más resultados para 'llenar' la página si hay IDs no encontrados
-                limit_fetch = per_page * 2 
-                
-                sem_results = semantic_search(
-                    query=q,
-                    limit=limit_fetch, # Pedimos más para asegurar
-                    score_threshold=0.30
-                )
-                
-                if sem_results:
-                    # Extraer IDs
-                    news_ids = [r['news_id'] for r in sem_results]
-                    
-                    # Traer datos completos de PostgreSQL (igual que en search.py)
-                    with conn.cursor(cursor_factory=extras.DictCursor) as cur:
-                        query_sql = """
-                            SELECT 
-                                n.id,
-                                n.titulo,
-                                n.resumen,
-                                n.url,
-                                n.fecha,
-                                n.imagen_url,
-                                n.fuente_nombre,
-                                c.nombre AS categoria,
-                                p.nombre AS pais,
-                                
-                                -- traducciones
-                                t.id AS traduccion_id,
-                                t.titulo_trad AS titulo_traducido,
-                                t.resumen_trad AS resumen_traducido,
-                                CASE WHEN t.id IS NOT NULL THEN TRUE ELSE FALSE END AS tiene_traduccion,
-                                
-                                -- originales
-                                n.titulo AS titulo_original,
-                                n.resumen AS resumen_original
-                                
-                            FROM noticias n
-                            LEFT JOIN categorias c ON c.id = n.categoria_id
-                            LEFT JOIN paises p ON p.id = n.pais_id
-                            LEFT JOIN traducciones t
-                                ON t.noticia_id = n.id
-                                AND t.lang_to = %s
-                                AND t.status = 'done'
-                            WHERE n.id = ANY(%s)
-                        """
-                        cur.execute(query_sql, (lang, news_ids))
-                        rows = cur.fetchall()
-                        
-                        # Convertimos a lista para poder ordenar por fecha
-                        rows_list = list(rows)
-                        
-                        # Ordenar cronológicamente (más reciente primero)
-                        sorted_rows = sorted(
-                            rows_list, 
-                            key=lambda x: x['fecha'] if x['fecha'] else datetime.min, 
-                            reverse=True
-                        )
-                                
-                        # Aplicar paginación manual sobre los resultados ordenados
-                        # Nota: semantic_search ya devolvió los "top" globales (aproximadamente). 
-                        # Para paginación real profunda con Qdrant se necesita scroll/offset, 
-                        # aquí asumimos que page request mapea al limit/offset enviado a Qdrant.
-                        # Pero `semantic_search` simple en utils no tiene offset.
-                        # Arreglo temporal: Solo mostramos la primera "tanda" de resultados semánticos.
-                        # Si el usuario quiere paginar profundo, Qdrant search debe soportar offset.
-                        # utils/qdrant_search.py NO tiene offset. 
-                        # ASÍ QUE: Solo funcionará bien para la página 1.
-                        # Si page > 1, semantic_search simple no sirve sin offset.
-                        
-                        # Fallback: Si page > 1, usamos búsqueda tradicional O implementamos offset en Qdrant (mejor).
-                        # Por ahora: Usamos lo que devolvió semantic_search y cortamos localmente
-                        # si page=1.
-                        
-                        if len(sorted_rows) > 0:
-                            noticias = sorted_rows
-                            total_results = len(noticias) # Aproximado
-                            total_pages = 1 # Qdrant simple no pagina bien aun
-                            
-                            # Extraer tags
-                            tr_ids = [n["traduccion_id"] for n in noticias if n["traduccion_id"]]
-                            from models.noticias import _extraer_tags_por_traduccion
-                            tags_por_tr = _extraer_tags_por_traduccion(cur, tr_ids)
-                            
-                            semantic_success = True
-
-            except Exception as e:
-                print(f"⚠️ Error en semántica home, fallback: {e}")
-                semantic_success = False
-
-        # 2. Si no hubo búsqueda semántica (o falló, o no había query, o usuario la desactivó), usar la tradicional
-        if not semantic_success:
+        if use_semantic:
+            from models.noticias import buscar_noticias_semantica
+            noticias, total_results, total_pages, tags_por_tr = buscar_noticias_semantica(
+                conn=conn,
+                page=page,
+                per_page=per_page,
+                q=q,
+                categoria_id=categoria_id,
+                continente_id=continente_id,
+                pais_id=pais_id,
+                fecha=fecha_filtro,
+                lang=lang,
+            )
+        else:
            noticias, total_results, total_pages, tags_por_tr = buscar_noticias(
                conn=conn,
                page=page,
@ -171,82 +69,22 @@ def home():
                use_tr=use_tr,
            )

-    # Record search history for logged-in users (only on first page to avoid dupes)
-    if (q or categoria_id or pais_id) and page == 1:
+        # Historial de búsqueda (solo para usuarios logueados y en primera página)
+        recent_searches_with_results = []
        user = get_current_user()
-        if user:
-            try:
-                with get_write_conn() as w_conn:
-                    with w_conn.cursor() as w_cur:
-                        # Check if it's the same as the last search to avoid immediate duplicates
-                        w_cur.execute("""
-                            SELECT query, pais_id, categoria_id 
-                            FROM search_history 
-                            WHERE user_id = %s 
-                            ORDER BY searched_at DESC LIMIT 1
-                        """, (user['id'],))
-                        last_search = w_cur.fetchone()
-                        
-                        current_search = (q or None, int(pais_id) if pais_id else None, int(categoria_id) if categoria_id else None)
-                        
-                        if not last_search or (last_search[0], last_search[1], last_search[2]) != current_search:
-                            w_cur.execute("""
-                                INSERT INTO search_history (user_id, query, pais_id, categoria_id, results_count)
-                                VALUES (%s, %s, %s, %s, %s)
-                            """, (user['id'], current_search[0], current_search[1], current_search[2], total_results))
-                    w_conn.commit()
-            except Exception as e:
-                # Log error but don't break the page load
-                print(f"Error saving search history: {e}")
-                pass
-
-    user = get_current_user()
-    recent_searches_with_results = []
-    if user and not q and not categoria_id and not pais_id and page == 1:
-        with get_read_conn() as conn:
-            with conn.cursor(cursor_factory=extras.DictCursor) as cur:
-                # Fetch unique latest searches using DISTINCT ON
-                cur.execute("""
-                    SELECT sub.id, query, pais_id, categoria_id, results_count, searched_at,
-                           p.nombre as pais_nombre, c.nombre as categoria_nombre
-                    FROM (
-                        SELECT DISTINCT ON (COALESCE(query, ''), COALESCE(pais_id, 0), COALESCE(categoria_id, 0))
-                            id, query, pais_id, categoria_id, results_count, searched_at
-                        FROM search_history
-                        WHERE user_id = %s
-                        ORDER BY COALESCE(query, ''), COALESCE(pais_id, 0), COALESCE(categoria_id, 0), searched_at DESC
-                    ) sub
-                    LEFT JOIN paises p ON p.id = sub.pais_id
-                    LEFT JOIN categorias c ON c.id = sub.categoria_id
-                    ORDER BY searched_at DESC
-                    LIMIT 6
-                """, (user['id'],))
-                recent_searches = cur.fetchall()
-                
-                for s in recent_searches:
-                    # Fetch top 6 news for this search
-                    news_items, _, _, _ = buscar_noticias(
-                        conn=conn,
-                        page=1,
-                        per_page=6,
-                        q=s['query'] or "",
-                        pais_id=s['pais_id'],
-                        categoria_id=s['categoria_id'],
-                        lang=lang,
-                        use_tr=use_tr,
-                        skip_count=True
-                    )
-                    recent_searches_with_results.append({
-                        'id': s['id'],
-                        'query': s['query'],
-                        'pais_id': s['pais_id'],
-                        'pais_nombre': s['pais_nombre'],
-                        'categoria_id': s['categoria_id'],
-                        'categoria_nombre': s['categoria_nombre'],
-                        'results_count': s['results_count'],
-                        'searched_at': s['searched_at'],
-                        'noticias': news_items
-                    })
+        if user and page == 1 and not q:
+             with conn.cursor(cursor_factory=extras.DictCursor) as cur:
+                 cur.execute("""
+                     SELECT sh.id, sh.query, sh.searched_at, sh.results_count,
+                            p.nombre as pais_nombre, c.nombre as categoria_nombre
+                     FROM search_history sh
+                     LEFT JOIN paises p ON p.id = sh.pais_id
+                     LEFT JOIN categorias c ON c.id = sh.categoria_id
+                     WHERE sh.user_id = %s
+                     ORDER BY sh.searched_at DESC
+                     LIMIT 10
+                 """, (user['id'],))
+                 recent_searches_with_results = cur.fetchall()

    context = dict(
        noticias=noticias,
@ -259,6 +97,7 @@ def home():
        q=q,
        cat_id=int(categoria_id) if categoria_id else None,
        pais_id=int(pais_id) if pais_id else None,
+        cont_id=int(continente_id) if continente_id else None,
        fecha_filtro=fecha_str,
        lang=lang,
        use_tr=use_tr,
@ -282,7 +121,6 @@ def delete_search(search_id):
    try:
        with get_write_conn() as conn:
            with conn.cursor() as cur:
-                # Direct deletion ensuring ownership
                cur.execute(
                    "DELETE FROM search_history WHERE id = %s AND user_id = %s",
                    (search_id, user["id"])