Actualización del 2025-06-18 a las 17:42:26

2025-06-18 17:42:26 +02:00 · 2025-06-18 17:42:26 +02:00 · e7c3433f0d
commit e7c3433f0d
parent 78c01fd61b
2 changed files with 2 additions and 78 deletions
--- a/app.py
+++ b/app.py
@ -4,7 +4,7 @@ import hashlib
 import csv
 import math
 from io import StringIO, BytesIO
-from datetime import datetime, timedelta
+from datetime import datetime
 import logging
 import atexit
 import zipfile
@ -91,10 +91,8 @@ def home():
                continentes = cursor.fetchall()
                cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
                paises = cursor.fetchall()
-
                sql_params, conditions = [], []
                sql_base = "SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id"
-
                if q:
                    search_query = " & ".join(q.split())
                    conditions.append("n.tsv @@ to_tsquery('spanish', %s)")
@ -109,26 +107,20 @@ def home():
                        sql_params.append(fecha_obj.date())
                    except ValueError:
                        flash("Formato de fecha no válido. Use AAAA-MM-DD.", "error")
-
                if conditions: sql_base += " WHERE " + " AND ".join(conditions)
-
                order_clause = " ORDER BY n.fecha DESC NULLS LAST"
                if q:
                    search_query_ts = " & ".join(q.split())
                    order_clause = " ORDER BY ts_rank(n.tsv, to_tsquery('spanish', %s)) DESC, n.fecha DESC"
                    sql_params.append(search_query_ts)
-
                sql_final = sql_base + order_clause + " LIMIT 50"
                cursor.execute(sql_final, tuple(sql_params))
                noticias = cursor.fetchall()
-
    except psycopg2.Error as db_err:
        app.logger.error(f"[DB ERROR] Al leer noticias: {db_err}", exc_info=True)
        flash("Error de base de datos al cargar las noticias.", "error")
-
    if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
        return render_template('_noticias_list.html', noticias=noticias)
-
    return render_template("noticias.html",
                           noticias=noticias, categorias=categorias, continentes=continentes, paises=paises,
                           cat_id=int(cat_id) if cat_id else None, cont_id=int(cont_id) if cont_id else None,
@ -151,9 +143,6 @@ def dashboard():
        flash("Error al conectar con la base de datos.", "error")
    return render_template("dashboard.html", stats=stats)

-
-# --- GESTIÓN DE FEEDS ---
-
@app.route("/feeds/manage")
 def manage_feeds():
    page = request.args.get('page', 1, type=int)
@ -191,7 +180,6 @@ def add_feed():
            app.logger.error(f"[DB ERROR] Al agregar feed: {db_err}", exc_info=True)
            flash(f"Error al añadir el feed: {db_err}", "error")
        return redirect(url_for("manage_feeds"))
-
    categorias, paises = [], []
    try:
        with get_conn() as conn:
@ -221,7 +209,6 @@ def edit_feed(feed_id):
            app.logger.error(f"[DB ERROR] Al actualizar feed: {db_err}", exc_info=True)
            flash(f"Error al actualizar el feed: {db_err}", "error")
        return redirect(url_for("manage_feeds"))
-
    feed, categorias, paises = None, [], []
    try:
        with get_conn() as conn:
@ -260,9 +247,6 @@ def reactivar_feed(feed_id):
        flash(f"Error al reactivar feed: {db_err}", "error")
    return redirect(url_for("manage_feeds"))

-
-# --- GESTIÓN DE FUENTES URL ---
-
@app.route("/urls/manage")
 def manage_urls():
    fuentes = []
@ -295,7 +279,6 @@ def add_url_source():
            app.logger.error(f"[DB ERROR] Al agregar fuente URL: {db_err}", exc_info=True)
            flash(f"Error al añadir la fuente URL: {db_err}", "error")
        return redirect(url_for("manage_urls"))
-
    categorias, paises = [], []
    try:
        with get_conn() as conn:
@ -324,7 +307,6 @@ def edit_url_source(url_id):
            app.logger.error(f"[DB ERROR] Al actualizar fuente URL: {db_err}", exc_info=True)
            flash(f"Error al actualizar la fuente URL: {db_err}", "error")
        return redirect(url_for("manage_urls"))
-
    fuente, categorias, paises = None, [], []
    try:
        with get_conn() as conn:
@ -351,23 +333,13 @@ def delete_url_source(url_id):
        flash(f"Error al eliminar la fuente URL: {db_err}", "error")
    return redirect(url_for("manage_urls"))

-
-# --- TAREA DE FONDO (CORREGIDA Y REFACTORIZADA) ---
-
 def fetch_and_store_all():
-    """
-    Tarea de fondo única y cohesiva que recolecta noticias tanto de Feeds RSS como de Fuentes URL,
-    y luego actualiza la base de datos en una sola transacción.
-    """
    with app.app_context():
        logging.info("--- INICIANDO CICLO DE CAPTURA GLOBAL (RSS y URL) ---")
-        
        todas_las_noticias = []
        feeds_fallidos = []
        feeds_exitosos = []
        feeds_para_actualizar_headers = []
-
-        # --- 1. PROCESAR FEEDS RSS ---
        logging.info("=> Parte 1: Procesando Feeds RSS...")
        feeds_to_process = []
        try:
@ -379,7 +351,6 @@ def fetch_and_store_all():
        except psycopg2.Error as db_err:
            logging.error(f"Error de BD al obtener feeds RSS: {db_err}")
            return
-
        if feeds_to_process:
            with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
                future_to_feed = {executor.submit(process_single_feed, dict(feed)): feed for feed in feeds_to_process}
@ -400,11 +371,8 @@ def fetch_and_store_all():
                    except Exception as exc:
                        logging.error(f"Excepción en feed {original_feed_data['url']} (ID: {feed_id}): {exc}")
                        feeds_fallidos.append(feed_id)
-        
        noticias_desde_rss_count = len(todas_las_noticias)
        logging.info(f"=> Parte 1 Finalizada. Noticias desde RSS: {noticias_desde_rss_count}. Éxitos: {len(feeds_exitosos)}. Fallos: {len(feeds_fallidos)}.")
-
-        # --- 2. PROCESAR FUENTES URL ---
        logging.info("=> Parte 2: Procesando Fuentes URL...")
        urls_to_process = []
        try:
@ -415,7 +383,6 @@ def fetch_and_store_all():
                    logging.info(f"Encontradas {len(urls_to_process)} fuentes URL para scrapear.")
        except Exception as e:
            logging.error(f"Error de BD al obtener fuentes URL: {e}")
-
        if urls_to_process:
            for source in tqdm(urls_to_process, desc="Procesando Fuentes URL"):
                try:
@ -427,17 +394,13 @@ def fetch_and_store_all():
                        todas_las_noticias.extend(noticias_encontradas)
                except Exception as e:
                    logging.error(f"Fallo al procesar la fuente URL {source['nombre']}: {e}")
-            
            noticias_desde_urls_count = len(todas_las_noticias) - noticias_desde_rss_count
            logging.info(f"=> Parte 2 Finalizada. Noticias encontradas desde URLs: {noticias_desde_urls_count}.")
-
-        # --- 3. ACTUALIZAR BD ---
        logging.info("=> Parte 3: Actualizando la base de datos...")
        if not any([todas_las_noticias, feeds_fallidos, feeds_exitosos, feeds_para_actualizar_headers]):
            logging.info("No se encontraron nuevas noticias ni cambios en los feeds. Nada que actualizar.")
            logging.info("--- CICLO DE CAPTURA GLOBAL FINALIZADO ---")
            return
-
        try:
            with get_conn() as conn:
                with conn.cursor() as cursor:
@ -448,7 +411,6 @@ def fetch_and_store_all():
                    if feeds_exitosos:
                        cursor.execute("UPDATE feeds SET fallos = 0 WHERE id IN %s", (tuple(feeds_exitosos),))
                        logging.info(f"Reseteado contador de fallos para {len(feeds_exitosos)} feeds.")
-
                    if feeds_para_actualizar_headers:
                        psycopg2.extras.execute_values(
                            cursor,
@ -456,7 +418,6 @@ def fetch_and_store_all():
                            [(f['id'], f['etag'], f['modified']) for f in feeds_para_actualizar_headers]
                        )
                        logging.info(f"Actualizados headers para {len(feeds_para_actualizar_headers)} feeds.")
-
                    if todas_las_noticias:
                        logging.info(f"Intentando insertar/ignorar {len(todas_las_noticias)} noticias en total.")
                        insert_query = """
@ -466,16 +427,11 @@ def fetch_and_store_all():
                        """
                        psycopg2.extras.execute_values(cursor, insert_query, todas_las_noticias, page_size=200)
                        logging.info(f"Inserción de noticias finalizada. {cursor.rowcount} filas podrían haber sido afectadas.")
-            
            logging.info("=> Parte 3 Finalizada. Base de datos actualizada correctamente.")
        except Exception as e:
            logging.error(f"Error de BD en la actualización masiva final: {e}", exc_info=True)
-
        logging.info("--- CICLO DE CAPTURA GLOBAL FINALIZADO ---")

-
-# --- SECCIÓN DE BACKUPS Y RESTAURACIÓN ---
-
@app.route("/backup_feeds")
 def backup_feeds():
    try:
@ -486,7 +442,6 @@ def backup_feeds():
        if not feeds_:
            flash("No hay feeds para exportar.", "warning")
            return redirect(url_for("dashboard"))
-
        fieldnames = list(feeds_[0].keys())
        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=fieldnames)
@ -511,17 +466,14 @@ def backup_urls():
                    ORDER BY f.id
                """)
                fuentes = cursor.fetchall()
-        
        if not fuentes:
            flash("No hay fuentes URL para exportar.", "warning")
            return redirect(url_for("dashboard"))
-
        fieldnames = list(fuentes[0].keys())
        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows([dict(fuente) for fuente in fuentes])
-        
        return Response(
            output.getvalue(), 
            mimetype="text/csv", 
@ -542,7 +494,6 @@ def backup_noticias():
        if not noticias:
            flash("No hay noticias para exportar.", "warning")
            return redirect(url_for("dashboard"))
-
        fieldnames_noticias = list(noticias[0].keys())
        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=fieldnames_noticias)
@ -569,7 +520,6 @@ def backup_completo():
                        writer_feeds.writeheader()
                        writer_feeds.writerows([dict(f) for f in feeds_data])
                        zipf.writestr("feeds.csv", output_feeds.getvalue())
-
                    cursor.execute("SELECT * FROM fuentes_url ORDER BY id")
                    fuentes_data = cursor.fetchall()
                    if fuentes_data:
@ -578,7 +528,6 @@ def backup_completo():
                        writer_fuentes.writeheader()
                        writer_fuentes.writerows([dict(f) for f in fuentes_data])
                        zipf.writestr("fuentes_url.csv", output_fuentes.getvalue())
-
                    cursor.execute("SELECT * FROM noticias ORDER BY fecha DESC")
                    noticias_data = cursor.fetchall()
                    if noticias_data:
@ -587,7 +536,6 @@ def backup_completo():
                        writer_noticias.writeheader()
                        writer_noticias.writerows([dict(n) for n in noticias_data])
                        zipf.writestr("noticias.csv", output_noticias.getvalue())
-
        memory_buffer.seek(0)
        return Response(memory_buffer, mimetype="application/zip", headers={"Content-Disposition": "attachment;filename=rss_backup_completo.zip"})
    except Exception as e:
@ -649,7 +597,6 @@ def restore_urls():
        if not file or not file.filename.endswith(".csv"):
            flash("Archivo no válido. Sube un .csv.", "error")
            return redirect(url_for("restore_urls"))
-
        try:
            file_stream = StringIO(file.read().decode("utf-8", errors='ignore'))
            reader = csv.DictReader(file_stream)
@ -685,37 +632,15 @@ def restore_urls():
                            cursor.execute("ROLLBACK TO SAVEPOINT restore_url_row")
                            n_err += 1
                            app.logger.error(f"Error procesando fila de fuente URL (se omite): {row} - Error: {e}")
-            
            flash(f"Restauración de Fuentes URL completada. Procesadas: {n_ok}. Errores: {n_err}.", "success" if n_err == 0 else "warning")
        except Exception as e:
            app.logger.error(f"Error al restaurar fuentes URL desde CSV: {e}", exc_info=True)
            flash(f"Ocurrió un error general al procesar el archivo: {e}", "error")
-        
        return redirect(url_for("dashboard"))
-        
    return render_template("restore_urls.html")

-
-# --- RUTA DE UTILIDAD PARA PRUEBAS ---
-# MOVIDA FUERA DEL BLOQUE if __name__ == '__main__' PARA QUE GUNICORN LA RECONOZCA
-@app.route("/run-fetch")
-def run_fetch_now():
-    """Ejecuta la tarea de recolección manualmente para pruebas."""
-    try:
-        # Idealmente, esto debería correr en un hilo separado para no bloquear la respuesta
-        # pero para una ejecución manual simple, está bien así.
-        fetch_and_store_all()
-        flash("Tarea de fondo de recolección ejecutada manualmente.", "info")
-    except Exception as e:
-        flash(f"Error al ejecutar la tarea de fondo: {e}", "error")
-        app.logger.error(f"Error en la ejecución manual de la tarea de fondo: {e}", exc_info=True)
-    return redirect(url_for('dashboard'))
-
-
 if __name__ == "__main__":
    if not db_pool:
        app.logger.error("La aplicación no puede arrancar sin una conexión a la base de datos.")
        sys.exit(1)
-    
-    # El app.run solo se usa para el desarrollo local. Gunicorn no ejecuta esta parte.
    app.run(host="0.0.0.0", port=8000, debug=True)