Actualización del 2025-06-18 a las 17:08:45

2025-06-18 17:08:45 +02:00 · 2025-06-18 17:08:45 +02:00 · 78c01fd61b
commit 78c01fd61b
parent eb72ec9e56
7 changed files with 368 additions and 239 deletions
--- a/app.py
+++ b/app.py
@ -93,7 +93,6 @@ def home():
                paises = cursor.fetchall()
                sql_params, conditions = [], []
                # --- CORRECCIÓN: SE AÑADE 'fuente_nombre' AL SELECT ---
                sql_base = "SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id"
                if q:
@ -352,69 +351,131 @@ def delete_url_source(url_id):
        flash(f"Error al eliminar la fuente URL: {db_err}", "error")
    return redirect(url_for("manage_urls"))
 # --- PROCESAMIENTO DE URLS ---
-@app.route("/scrape_url", methods=['GET', 'POST'])
+# --- TAREA DE FONDO (CORREGIDA Y REFACTORIZADA) ---
 def scrape_url():
    if request.method == 'POST':
        source_id = request.form.get("source_id")
        if not source_id:
            flash("Debes seleccionar una fuente para procesar.", "error")
            return redirect(url_for('scrape_url'))
-        source = None
+def fetch_and_store_all():
    """
    Tarea de fondo única y cohesiva que recolecta noticias tanto de Feeds RSS como de Fuentes URL,
    y luego actualiza la base de datos en una sola transacción.
    """
    with app.app_context():
        logging.info("--- INICIANDO CICLO DE CAPTURA GLOBAL (RSS y URL) ---")
        todas_las_noticias = []
        feeds_fallidos = []
        feeds_exitosos = []
        feeds_para_actualizar_headers = []
        # --- 1. PROCESAR FEEDS RSS ---
        logging.info("=> Parte 1: Procesando Feeds RSS...")
        feeds_to_process = []
        try:
            with get_conn() as conn:
                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
-                    cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (source_id,))
+                    cursor.execute("SELECT id, nombre, url, categoria_id, pais_id, last_etag, last_modified FROM feeds WHERE activo = TRUE")
-                    source = cursor.fetchone()
+                    feeds_to_process = cursor.fetchall()
                    logging.info(f"Encontrados {len(feeds_to_process)} feeds RSS activos para procesar.")
        except psycopg2.Error as db_err:
-            app.logger.error(f"[DB ERROR] Al buscar fuente URL: {db_err}", exc_info=True)
+            logging.error(f"Error de BD al obtener feeds RSS: {db_err}")
-            flash("Error de base de datos al buscar la fuente.", "error")
+            return
            return redirect(url_for('scrape_url'))
-        if not source:
+        if feeds_to_process:
-            flash("La fuente seleccionada no existe.", "error")
+            with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
-            return redirect(url_for('scrape_url'))
+                future_to_feed = {executor.submit(process_single_feed, dict(feed)): feed for feed in feeds_to_process}
                for future in tqdm(as_completed(future_to_feed), total=len(feeds_to_process), desc="Procesando Feeds RSS"):
                    original_feed_data = future_to_feed[future]
                    feed_id = original_feed_data['id']
                    try:
                        _, noticias_encontradas, new_etag, new_modified, success = future.result(timeout=SINGLE_FEED_TIMEOUT)
                        if success:
                            feeds_exitosos.append(feed_id)
                            if noticias_encontradas:
                                todas_las_noticias.extend(noticias_encontradas)
                            if (new_etag and new_etag != original_feed_data.get('last_etag')) or \
                               (new_modified and new_modified != original_feed_data.get('last_modified')):
                                feeds_para_actualizar_headers.append({'id': feed_id, 'etag': new_etag, 'modified': new_modified})
                        else:
                            feeds_fallidos.append(feed_id)
                    except Exception as exc:
                        logging.error(f"Excepción en feed {original_feed_data['url']} (ID: {feed_id}): {exc}")
                        feeds_fallidos.append(feed_id)
-        lista_noticias, message = process_newspaper_url(source['nombre'], source['url'], source['categoria_id'], source['pais_id'], source['idioma'])
+        noticias_desde_rss_count = len(todas_las_noticias)
        logging.info(f"=> Parte 1 Finalizada. Noticias desde RSS: {noticias_desde_rss_count}. Éxitos: {len(feeds_exitosos)}. Fallos: {len(feeds_fallidos)}.")
        # --- 2. PROCESAR FUENTES URL ---
        logging.info("=> Parte 2: Procesando Fuentes URL...")
        urls_to_process = []
        try:
            with get_conn() as conn:
                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
                    cursor.execute("SELECT * FROM fuentes_url")
                    urls_to_process = cursor.fetchall()
                    logging.info(f"Encontradas {len(urls_to_process)} fuentes URL para scrapear.")
        except Exception as e:
            logging.error(f"Error de BD al obtener fuentes URL: {e}")
        if urls_to_process:
            for source in tqdm(urls_to_process, desc="Procesando Fuentes URL"):
                try:
                    noticias_encontradas, _ = process_newspaper_url(
                        source['nombre'], source['url'], source['categoria_id'],
                        source['pais_id'], source['idioma']
                    )
                    if noticias_encontradas:
                        todas_las_noticias.extend(noticias_encontradas)
                except Exception as e:
                    logging.error(f"Fallo al procesar la fuente URL {source['nombre']}: {e}")
            noticias_desde_urls_count = len(todas_las_noticias) - noticias_desde_rss_count
            logging.info(f"=> Parte 2 Finalizada. Noticias encontradas desde URLs: {noticias_desde_urls_count}.")
        # --- 3. ACTUALIZAR BD ---
        logging.info("=> Parte 3: Actualizando la base de datos...")
        if not any([todas_las_noticias, feeds_fallidos, feeds_exitosos, feeds_para_actualizar_headers]):
            logging.info("No se encontraron nuevas noticias ni cambios en los feeds. Nada que actualizar.")
            logging.info("--- CICLO DE CAPTURA GLOBAL FINALIZADO ---")
            return
        if lista_noticias:
        try:
            with get_conn() as conn:
                with conn.cursor() as cursor:
                    if feeds_fallidos:
                        cursor.execute("UPDATE feeds SET fallos = fallos + 1 WHERE id IN %s", (tuple(feeds_fallidos),))
                        cursor.execute("UPDATE feeds SET activo = FALSE WHERE fallos >= %s AND id IN %s", (MAX_FALLOS, tuple(feeds_fallidos)))
                        logging.info(f"Incrementado contador de fallos para {len(feeds_fallidos)} feeds.")
                    if feeds_exitosos:
                        cursor.execute("UPDATE feeds SET fallos = 0 WHERE id IN %s", (tuple(feeds_exitosos),))
                        logging.info(f"Reseteado contador de fallos para {len(feeds_exitosos)} feeds.")
                    if feeds_para_actualizar_headers:
                        psycopg2.extras.execute_values(
                            cursor,
                            "UPDATE feeds SET last_etag = data.etag, last_modified = data.modified FROM (VALUES %s) AS data(id, etag, modified) WHERE feeds.id = data.id",
                            [(f['id'], f['etag'], f['modified']) for f in feeds_para_actualizar_headers]
                        )
                        logging.info(f"Actualizados headers para {len(feeds_para_actualizar_headers)} feeds.")
                    if todas_las_noticias:
                        logging.info(f"Intentando insertar/ignorar {len(todas_las_noticias)} noticias en total.")
                        insert_query = """
                            INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id)
                            VALUES %s
-                            ON CONFLICT (url) DO UPDATE SET
+                            ON CONFLICT (url) DO NOTHING;
                                titulo = EXCLUDED.titulo,
                                resumen = EXCLUDED.resumen,
                                fecha = EXCLUDED.fecha,
                                imagen_url = EXCLUDED.imagen_url;
                        """
-                        psycopg2.extras.execute_values(cursor, insert_query, lista_noticias)
+                        psycopg2.extras.execute_values(cursor, insert_query, todas_las_noticias, page_size=200)
-                flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde '{source['nombre']}'.", "success")
+                        logging.info(f"Inserción de noticias finalizada. {cursor.rowcount} filas podrían haber sido afectadas.")
                return redirect(url_for("home"))
            except psycopg2.Error as db_err:
                app.logger.error(f"[DB ERROR] Al insertar noticias scrapeadas: {db_err}", exc_info=True)
                flash(f"Error de base de datos al guardar las noticias: {db_err}", "error")
        else:
            flash(message, "warning")
-        return redirect(url_for('scrape_url'))
+            logging.info("=> Parte 3 Finalizada. Base de datos actualizada correctamente.")
        except Exception as e:
            logging.error(f"Error de BD en la actualización masiva final: {e}", exc_info=True)
-    fuentes = []
+        logging.info("--- CICLO DE CAPTURA GLOBAL FINALIZADO ---")
    try:
        with get_conn() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
                cursor.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre")
                fuentes = cursor.fetchall()
    except psycopg2.Error as db_err:
        flash("Error al cargar las fuentes de URL.", "error")
    return render_template("scrape_url.html", fuentes=fuentes)
-# --- BACKUP Y RESTORE ---
+# --- SECCIÓN DE BACKUPS Y RESTAURACIÓN ---
@app.route("/backup_feeds")
 def backup_feeds():
    try:
@ -437,6 +498,40 @@ def backup_feeds():
        flash(f"Error interno al generar el backup: {e}", "error")
        return redirect(url_for("dashboard"))
@app.route("/backup_urls")
 def backup_urls():
    try:
        with get_conn() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
                cursor.execute("""
                    SELECT f.id, f.nombre, f.url, f.categoria_id, c.nombre AS categoria, f.pais_id, p.nombre AS pais, f.idioma 
                    FROM fuentes_url f 
                    LEFT JOIN categorias c ON f.categoria_id = c.id 
                    LEFT JOIN paises p ON f.pais_id = p.id 
                    ORDER BY f.id
                """)
                fuentes = cursor.fetchall()
        if not fuentes:
            flash("No hay fuentes URL para exportar.", "warning")
            return redirect(url_for("dashboard"))
        fieldnames = list(fuentes[0].keys())
        output = StringIO()
        writer = csv.DictWriter(output, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows([dict(fuente) for fuente in fuentes])
        return Response(
            output.getvalue(), 
            mimetype="text/csv", 
            headers={"Content-Disposition": "attachment;filename=fuentes_url_backup.csv"}
        )
    except Exception as e:
        app.logger.error(f"[ERROR] Al hacer backup de fuentes URL: {e}", exc_info=True)
        flash(f"Error interno al generar el backup de fuentes URL: {e}", "error")
        return redirect(url_for("dashboard"))
@app.route("/backup_noticias")
 def backup_noticias():
    try:
@ -466,25 +561,33 @@ def backup_completo():
        with zipfile.ZipFile(memory_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
            with get_conn() as conn:
                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
-                    cursor.execute("SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, c.nombre AS categoria, f.pais_id, p.nombre AS pais, f.idioma, f.activo, f.fallos FROM feeds f LEFT JOIN categorias c ON f.categoria_id = c.id LEFT JOIN paises p ON f.pais_id = p.id ORDER BY f.id")
+                    cursor.execute("SELECT * FROM feeds ORDER BY id")
                    feeds_data = cursor.fetchall()
                    if feeds_data:
-                        fieldnames_feeds = list(feeds_data[0].keys())
+                        output_feeds = StringIO()
-                        output = StringIO()
+                        writer_feeds = csv.DictWriter(output_feeds, fieldnames=list(feeds_data[0].keys()))
-                        writer = csv.DictWriter(output, fieldnames=fieldnames_feeds)
+                        writer_feeds.writeheader()
-                        writer.writeheader()
+                        writer_feeds.writerows([dict(f) for f in feeds_data])
-                        writer.writerows([dict(f) for f in feeds_data])
+                        zipf.writestr("feeds.csv", output_feeds.getvalue())
                        zipf.writestr("feeds.csv", output.getvalue())
-                    cursor.execute("SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id ORDER BY n.fecha DESC")
+                    cursor.execute("SELECT * FROM fuentes_url ORDER BY id")
                    fuentes_data = cursor.fetchall()
                    if fuentes_data:
                        output_fuentes = StringIO()
                        writer_fuentes = csv.DictWriter(output_fuentes, fieldnames=list(fuentes_data[0].keys()))
                        writer_fuentes.writeheader()
                        writer_fuentes.writerows([dict(f) for f in fuentes_data])
                        zipf.writestr("fuentes_url.csv", output_fuentes.getvalue())
                    cursor.execute("SELECT * FROM noticias ORDER BY fecha DESC")
                    noticias_data = cursor.fetchall()
                    if noticias_data:
-                        fieldnames_noticias = list(noticias_data[0].keys())
+                        output_noticias = StringIO()
-                        output = StringIO()
+                        writer_noticias = csv.DictWriter(output_noticias, fieldnames=list(noticias_data[0].keys()))
-                        writer = csv.DictWriter(output, fieldnames=fieldnames_noticias)
+                        writer_noticias.writeheader()
-                        writer.writeheader()
+                        writer_noticias.writerows([dict(n) for n in noticias_data])
-                        writer.writerows([dict(n) for n in noticias_data])
+                        zipf.writestr("noticias.csv", output_noticias.getvalue())
-                        zipf.writestr("noticias.csv", output.getvalue())
+
        memory_buffer.seek(0)
        return Response(memory_buffer, mimetype="application/zip", headers={"Content-Disposition": "attachment;filename=rss_backup_completo.zip"})
    except Exception as e:
@ -520,9 +623,11 @@ def restore_feeds():
                                    nombre=EXCLUDED.nombre, descripcion=EXCLUDED.descripcion, url=EXCLUDED.url, categoria_id=EXCLUDED.categoria_id,
                                    pais_id=EXCLUDED.pais_id, idioma=EXCLUDED.idioma, activo=EXCLUDED.activo, fallos=EXCLUDED.fallos;
                                """,
-                                {"id": int(row["id"]), "nombre": row.get("nombre"), "descripcion": row.get("descripcion") or "", "url": row.get("url"),
+                                {
                                    "id": int(row["id"]), "nombre": row.get("nombre"), "descripcion": row.get("descripcion") or "", "url": row.get("url"),
                                    "categoria_id": cat_id, "pais_id": pais_id, "idioma": row.get("idioma") or None, "activo": activo,
-                                 "fallos": int(row.get("fallos", 0) or 0)}
+                                    "fallos": int(row.get("fallos", 0) or 0)
                                }
                            )
                            n_ok += 1
                            cursor.execute("RELEASE SAVEPOINT restore_feed_row")
@ -537,100 +642,80 @@ def restore_feeds():
        return redirect(url_for("dashboard"))
    return render_template("restore_feeds.html")
@app.route("/restore_urls", methods=["GET", "POST"])
 def restore_urls():
    if request.method == "POST":
        file = request.files.get("file")
        if not file or not file.filename.endswith(".csv"):
            flash("Archivo no válido. Sube un .csv.", "error")
            return redirect(url_for("restore_urls"))
 # --- TAREA DE FONDO ---
 def fetch_and_store():
    with app.app_context():
        logging.info("--- INICIANDO CICLO DE CAPTURA ---")
        feeds_to_process = []
        try:
            file_stream = StringIO(file.read().decode("utf-8", errors='ignore'))
            reader = csv.DictReader(file_stream)
            rows = list(reader)
            n_ok, n_err = 0, 0
            with get_conn() as conn:
-                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
+                for row in rows:
-                    logging.info("Paso 1: Obteniendo lista de feeds...")
+                    with conn.cursor() as cursor:
                    # --- CORRECCIÓN: Se añade 'nombre' al SELECT ---
                    cursor.execute("SELECT id, nombre, url, categoria_id, pais_id, last_etag, last_modified FROM feeds WHERE activo = TRUE")
                    feeds_to_process = cursor.fetchall()
                    logging.info(f"Paso 2: {len(feeds_to_process)} feeds para procesar.")
        except psycopg2.Error as db_err:
            logging.error(f"Error de BD al obtener feeds: {db_err}")
            return
        if not feeds_to_process:
            logging.info("No hay feeds activos para procesar.")
            return
        feeds_fallidos, feeds_exitosos, todas_las_noticias, feeds_para_actualizar_headers = [], [], [], []
        logging.info(f"Paso 3: Iniciando procesamiento paralelo ({MAX_WORKERS} workers)...")
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            future_to_feed = {executor.submit(process_single_feed, dict(feed)): feed for feed in feeds_to_process}
            progress_bar = tqdm(as_completed(future_to_feed), total=len(feeds_to_process), desc="Procesando Feeds")
            for future in progress_bar:
                original_feed_data = future_to_feed[future]
                feed_id = original_feed_data['id']
                        try:
-                    _, noticias_encontradas, new_etag, new_modified, success = future.result(timeout=SINGLE_FEED_TIMEOUT)
+                            cursor.execute("SAVEPOINT restore_url_row")
-                    if success:
+                            cat_id = int(row["categoria_id"]) if row.get("categoria_id") and row["categoria_id"].strip() else None
-                        feeds_exitosos.append(feed_id)
+                            pais_id = int(row["pais_id"]) if row.get("pais_id") and row["pais_id"].strip() else None
-                        if noticias_encontradas: todas_las_noticias.extend(noticias_encontradas)
+                            cursor.execute(
-                        if (new_etag is not None and new_etag != original_feed_data.get('last_etag')) or \
+                                """
-                           (new_modified is not None and new_modified != original_feed_data.get('last_modified')):
+                                INSERT INTO fuentes_url (id, nombre, url, categoria_id, pais_id, idioma)
-                            feeds_para_actualizar_headers.append({'id': feed_id, 'etag': new_etag, 'modified': new_modified})
+                                VALUES (%(id)s, %(nombre)s, %(url)s, %(categoria_id)s, %(pais_id)s, %(idioma)s)
-                    else:
+                                ON CONFLICT (id) DO UPDATE SET
-                        feeds_fallidos.append(feed_id)
+                                    nombre=EXCLUDED.nombre, url=EXCLUDED.url, categoria_id=EXCLUDED.categoria_id,
-                except TimeoutError:
+                                    pais_id=EXCLUDED.pais_id, idioma=EXCLUDED.idioma;
-                    logging.error(f"!!! TIMEOUT en feed {original_feed_data['url']} (ID: {feed_id})")
+                                """,
-                    feeds_fallidos.append(feed_id)
+                                {
-                except Exception as exc:
+                                    "id": int(row["id"]),
-                    logging.error(f"Excepción en feed {original_feed_data['url']} (ID: {feed_id}): {exc}", exc_info=True)
+                                    "nombre": row.get("nombre"),
-                    feeds_fallidos.append(feed_id)
+                                    "url": row.get("url"),
                                    "categoria_id": cat_id,
                                    "pais_id": pais_id,
                                    "idioma": row.get("idioma") or None
                                }
                            )
                            n_ok += 1
                            cursor.execute("RELEASE SAVEPOINT restore_url_row")
                        except Exception as e:
                            cursor.execute("ROLLBACK TO SAVEPOINT restore_url_row")
                            n_err += 1
                            app.logger.error(f"Error procesando fila de fuente URL (se omite): {row} - Error: {e}")
-        logging.info(f"Paso 4: Procesamiento finalizado. Noticias nuevas: {len(todas_las_noticias)}, Feeds fallidos: {len(feeds_fallidos)}, Feeds actualizados: {len(feeds_para_actualizar_headers)}.")
+            flash(f"Restauración de Fuentes URL completada. Procesadas: {n_ok}. Errores: {n_err}.", "success" if n_err == 0 else "warning")
-        if not any([todas_las_noticias, feeds_fallidos, feeds_exitosos, feeds_para_actualizar_headers]):
+        except Exception as e:
-            logging.info("Sin cambios que aplicar en la base de datos.")
+            app.logger.error(f"Error al restaurar fuentes URL desde CSV: {e}", exc_info=True)
-            return
+            flash(f"Ocurrió un error general al procesar el archivo: {e}", "error")
        return redirect(url_for("dashboard"))
    return render_template("restore_urls.html")
 # --- RUTA DE UTILIDAD PARA PRUEBAS ---
 # MOVIDA FUERA DEL BLOQUE if __name__ == '__main__' PARA QUE GUNICORN LA RECONOZCA
@app.route("/run-fetch")
 def run_fetch_now():
    """Ejecuta la tarea de recolección manualmente para pruebas."""
    try:
-            with get_conn() as conn:
+        # Idealmente, esto debería correr en un hilo separado para no bloquear la respuesta
-                logging.info("Paso 5: Actualizando BD...")
+        # pero para una ejecución manual simple, está bien así.
-
+        fetch_and_store_all()
-                if feeds_fallidos or feeds_exitosos:
+        flash("Tarea de fondo de recolección ejecutada manualmente.", "info")
-                    with conn.cursor() as cursor_feeds_status:
+    except Exception as e:
-                        if feeds_fallidos:
+        flash(f"Error al ejecutar la tarea de fondo: {e}", "error")
-                            cursor_feeds_status.execute("UPDATE feeds SET fallos = fallos + 1 WHERE id IN %s", (tuple(feeds_fallidos),))
+        app.logger.error(f"Error en la ejecución manual de la tarea de fondo: {e}", exc_info=True)
-                            cursor_feeds_status.execute("UPDATE feeds SET activo = FALSE WHERE fallos >= %s AND id IN %s", (MAX_FALLOS, tuple(feeds_fallidos)))
+    return redirect(url_for('dashboard'))
                        if feeds_exitosos:
                            cursor_feeds_status.execute("UPDATE feeds SET fallos = 0 WHERE id IN %s", (tuple(feeds_exitosos),))
                if feeds_para_actualizar_headers:
                    with conn.cursor() as cursor_headers:
                        psycopg2.extras.execute_values(
                            cursor_headers,
                            "UPDATE feeds SET last_etag = data.etag, last_modified = data.modified FROM (VALUES %s) AS data(id, etag, modified) WHERE feeds.id = data.id",
                            [(f['id'], f['etag'], f['modified']) for f in feeds_para_actualizar_headers]
                        )
                if todas_las_noticias:
                    logging.info(f"Intentando insertar {len(todas_las_noticias)} noticias en la base de datos.")
                    with conn.cursor() as cursor_news_insert:
                        # --- CORRECCIÓN: Se añade 'fuente_nombre' a la consulta INSERT ---
                        psycopg2.extras.execute_values(
                            cursor_news_insert,
                            "INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id) VALUES %s ON CONFLICT (url) DO NOTHING",
                            todas_las_noticias
                        )
                        rows_inserted = cursor_news_insert.rowcount
                        logging.info(f"Se insertaron/omitieron {rows_inserted} noticias (ON CONFLICT DO NOTHING).")
                logging.info("--- CICLO DE CAPTURA FINALIZADO ---")
        except psycopg2.Error as db_err:
            logging.error(f"Error de BD en actualización masiva: {db_err}", exc_info=True)
 if __name__ == "__main__":
    if not db_pool:
        app.logger.error("La aplicación no puede arrancar sin una conexión a la base de datos.")
        sys.exit(1)
    app.run(host="0.0.0.0", port=5000, debug=True)
    # El app.run solo se usa para el desarrollo local. Gunicorn no ejecuta esta parte.
    app.run(host="0.0.0.0", port=8000, debug=True)
--- a/templates/base.html
+++ b/templates/base.html
@ -160,15 +160,12 @@
            </a>
            <p class="subtitle">Tu centro de información personalizado</p>
            <!-- NAVEGACIÓN SIMPLIFICADA -->
            <nav class="main-nav">
                <a href="{{ url_for('home') }}" class="nav-link">Noticias</a>
                <a href="{{ url_for('dashboard') }}" class="nav-link">Dashboard</a>
                <a href="{{ url_for('manage_feeds') }}" class="nav-link">Gestionar Feeds</a>
                <a href="{{ url_for('manage_urls') }}" class="nav-link">Gestionar URLs</a>
                <div class="nav-actions">
                    <a href="{{ url_for('scrape_url') }}" class="btn btn-small btn-info">Procesar URL</a>
                </div>
            </nav>
        </header>
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -1,44 +1,59 @@
 {% extends "base.html" %}
-{% block title %}Dashboard de Feeds{% endblock %}
+
 {% block title %}Dashboard{% endblock %}
 {% block content %}
 <div class="dashboard-grid">
    <div class="stat-card">
        <div class="stat-number">{{ stats.feeds_totales }}</div>
-        <div class="stat-label">Feeds RSS Totales</div>
+        <div class="stat-label">Feeds Totales</div>
    </div>
    <div class="stat-card">
        <div class="stat-number">{{ stats.noticias_totales }}</div>
-        <div class="stat-label">Noticias Recopiladas</div>
+        <div class="stat-label">Noticias Totales</div>
    </div>
    <div class="stat-card">
-        <div class="stat-number" style="background: linear-gradient(135deg, #f72585 0%, #7209b7 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
+        <div class="stat-number">{{ stats.feeds_caidos }}</div>
-            {{ stats.feeds_caidos }}
+        <div class="stat-label">Feeds Caídos</div>
    </div>
 </div>
 <div class="row">
    <div class="col-md-6 mb-4">
        <div class="card">
            <div class="card-header">
                <h3>Gestión de Feeds RSS</h3>
            </div>
            <div class="card-body">
                <p>Exporta tu lista de feeds RSS o restaura/importa desde un archivo CSV.</p>
                <a href="{{ url_for('backup_feeds') }}" class="btn"><i class="fas fa-download"></i> Exportar Feeds</a>
                <a href="{{ url_for('restore_feeds') }}" class="btn btn-info"><i class="fas fa-upload"></i> Importar Feeds</a>
            </div>
        </div>
    </div>
    <div class="col-md-6 mb-4">
        <div class="card">
            <div class="card-header">
                <h3>Gestión de Fuentes URL</h3>
            </div>
            <div class="card-body">
                <p>Exporta tu lista de fuentes URL o restaura/importa desde un archivo CSV.</p>
                <a href="{{ url_for('backup_urls') }}" class="btn"><i class="fas fa-download"></i> Exportar URLs</a>
                <a href="{{ url_for('restore_urls') }}" class="btn btn-info"><i class="fas fa-upload"></i> Importar URLs</a>
            </div>
        </div>
        <div class="stat-label">Feeds Caídos / Inactivos</div>
    </div>
 </div>
 <div class="card">
-    <h2 style="text-align: center; margin-bottom: 20px;">Opciones de Backup y Restauración</h2>
+     <div class="card-header">
-    <p style="text-align: center; color: var(--text-color-light); margin-bottom: 25px;">
+        <h3>Operaciones del Sistema</h3>
-        Exporta tus datos para mantener copias de seguridad o restaura desde un archivo.
+    </div>
-    </p>
+    <div class="card-body">
-    
+        <p>Genera una copia de seguridad completa o ejecuta la tarea de recolección manualmente para pruebas.</p>
-    <div style="display: flex; justify-content: center; gap: 15px; flex-wrap: wrap;">
+        <a href="{{ url_for('backup_completo') }}" class="btn btn-secondary"><i class="fas fa-archive"></i> Backup Completo (.zip)</a>
-        <a href="{{ url_for('backup_feeds') }}" class="btn" style="display: inline-flex; align-items: center; gap: 8px;">
+        <a href="{{ url_for('run_fetch_now') }}" class="btn btn-danger" onclick="return confirm('Esto puede tardar un momento. ¿Estás seguro?')"><i class="fas fa-cogs"></i> Ejecutar Recolección Manual</a>
            <i class="fas fa-file-csv"></i> Feeds (CSV)
        </a>
        <a href="{{ url_for('backup_noticias') }}" class="btn" style="display: inline-flex; align-items: center; gap: 8px;">
            <i class="fas fa-file-csv"></i> Noticias (CSV)
        </a>
        <a href="{{ url_for('backup_completo') }}" class="btn" style="background: linear-gradient(135deg, #00b894 0%, #00cec9 100%); display: inline-flex; align-items: center; gap: 8px;">
            <i class="fas fa-file-archive"></i> Completo (ZIP)
        </a>
        <a href="{{ url_for('restore_feeds') }}" class="btn btn-secondary" style="display: inline-flex; align-items: center; gap: 8px;">
            <i class="fas fa-file-import"></i> Importar Feeds
        </a>
    </div>
 </div>
 {% endblock %}
--- a/templates/noticias.html
+++ b/templates/noticias.html
@ -3,14 +3,6 @@
 {% block title %}Últimas Noticias RSS{% endblock %}
 {% block content %}
 <header>
    <h1>Agregador de Noticias</h1>
    <p class="subtitle">Tus fuentes de información, en un solo lugar.</p>
    <a href="{{ url_for('dashboard') }}" class="top-link" style="margin-top:15px;">
        <i class="fas fa-cogs"></i> Gestionar Feeds
    </a>
 </header>
 <div class="card">
    <h2><i class="fas fa-filter" style="color: var(--secondary-color); margin-right: 10px;"></i>Filtrar Noticias</h2>
    <form method="get" action="{{ url_for('home') }}" id="filter-form">
@ -116,3 +108,4 @@ document.addEventListener('DOMContentLoaded', function() {
 });
 </script>
 {% endblock %}
--- a/templates/restore_urls.html
+++ b/templates/restore_urls.html
@ -0,0 +1,33 @@
 {% extends "base.html" %}
 {% block title %}Importar Fuentes URL desde CSV{% endblock %}
 {% block content %}
 <div class="card">
    <div class="card-header">
        <h2>Añadir/Restaurar Fuentes URL desde archivo CSV</h2>
    </div>
    <div class="card-body">
        <p>
            Sube un archivo CSV para añadir en bloque nuevas fuentes URL o para actualizar las existentes.
        </p>
        <p>
            El archivo debe contener, como mínimo, las columnas: <code>id</code>, <code>nombre</code>, <code>url</code>, <code>categoria_id</code>, <code>pais_id</code>, e <code>idioma</code>.
            Para añadir nuevas fuentes, asegúrate de que los <code>id</code> no existan en la base de datos (puedes usar números altos).
        </p>
        <hr>
        <form method="POST" enctype="multipart/form-data" action="{{ url_for('restore_urls') }}">
            <div class="mb-3">
                <label for="file" class="form-label">Selecciona el archivo CSV:</label>
                <input class="form-control" type="file" id="file" name="file" accept=".csv" required>
            </div>
            <button type="submit" class="btn btn-primary">
                <i class="fas fa-upload"></i> Importar Fuentes URL
            </button>
            <a href="{{ url_for('dashboard') }}" class="btn btn-secondary">
                <i class="fas fa-times"></i> Cancelar
            </a>
        </form>
    </div>
 </div>
 {% endblock %}
--- a/templates/urls_list.html
+++ b/templates/urls_list.html
@ -1,44 +1,45 @@
 {% extends "base.html" %}
 {% block title %}Gestionar Fuentes URL{% endblock %}
 {% block content %}
-<div class="feed-detail-card">
+<div class="card feed-detail-card">
    <div class="feed-header">
-        <h2>Fuentes de Noticias URL</h2>
+        <h2>Lista de Fuentes URL ({{ fuentes|length }})</h2>
-        <a href="{{ url_for('add_url_source') }}" class="btn btn-small">Añadir Nueva Fuente URL</a>
+        <div class="nav-actions">
            <a href="{{ url_for('add_url_source') }}" class="btn btn-small"><i class="fas fa-plus"></i> Añadir URL</a>
        </div>
-    <div class="feed-body">
+    </div>
-        {% if fuentes %}
+    <div class="feed-body" style="padding: 0;">
-            <table class="table table-hover">
+        <table style="width:100%; border-collapse: collapse;">
            <thead>
-                    <tr>
+                <tr style="background-color: rgba(0,0,0,0.05);">
-                        <th>Nombre</th>
+                    <th style="padding: 12px 15px; text-align: left;">Nombre</th>
-                        <th>URL</th>
+                    <th style="padding: 12px 15px; text-align: left;">Categoría</th>
-                        <th>Categoría</th>
+                    <th style="padding: 12px 15px; text-align: left;">País</th>
-                        <th>País</th>
+                    <th style="padding: 12px 15px; text-align: right;">Acciones</th>
                        <th>Idioma</th>
                        <th>Acciones</th>
                </tr>
            </thead>
            <tbody>
                {% for fuente in fuentes %}
                <tr>
-                        <td>{{ fuente.nombre }}</td>
+                    <td style="padding: 12px 15px; border-top: 1px solid var(--border-color);">
-                        <td><a href="{{ fuente.url }}" target="_blank">{{ fuente.url[:50] }}...</a></td>
+                        <a href="{{ fuente.url }}" target="_blank" title="{{ fuente.url }}">{{ fuente.nombre }}</a>
                        <td>{{ fuente.categoria or 'N/A' }}</td>
                        <td>{{ fuente.pais or 'N/A' }}</td>
                        <td>{{ fuente.idioma }}</td>
                        <td>
                            <a href="{{ url_for('edit_url_source', url_id=fuente.id) }}" class="btn btn-small btn-secondary">Editar</a>
                            <a href="{{ url_for('delete_url_source', url_id=fuente.id) }}" class="btn btn-small btn-danger" onclick="return confirm('¿Estás seguro de que quieres eliminar esta fuente?');">Eliminar</a>
                    </td>
                    <td style="padding: 12px 15px; border-top: 1px solid var(--border-color);">{{ fuente.categoria or 'N/A' }}</td>
                    <td style="padding: 12px 15px; border-top: 1px solid var(--border-color);">{{ fuente.pais or 'Global' }}</td>
                    <td style="padding: 12px 15px; text-align: right; border-top: 1px solid var(--border-color);">
                        <a href="{{ url_for('edit_url_source', url_id=fuente.id) }}" class="btn btn-small btn-info"><i class="fas fa-edit"></i></a>
                        <a href="{{ url_for('delete_url_source', url_id=fuente.id) }}" class="btn btn-small btn-danger" onclick="return confirm('¿Estás seguro?')"><i class="fas fa-trash"></i></a>
                    </td>
                </tr>
                {% else %}
                <tr>
                    <td colspan="4" style="padding: 20px; text-align: center;">No hay fuentes URL para mostrar.</td>
                </tr>
                {% endfor %}
            </tbody>
        </table>
        {% else %}
            <p class="text-center text-muted">No hay fuentes URL guardadas. ¡Añade la primera!</p>
        {% endif %}
    </div>
 </div>
 {% endblock %}
--- a/worker.py
+++ b/worker.py
@ -1,13 +1,18 @@
 import sys
-import os
+import logging # <--- ¡LA LÍNEA QUE FALTABA!
-import logging
+from app import fetch_and_store_all, app
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
 # Añadimos un logger para ver la salida en el journal de systemd
 handler = logging.StreamHandler(sys.stdout)
 handler.setLevel(logging.INFO)
 handler.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s in %(module)s: %(message)s'))
 app.logger.addHandler(handler)
 if __name__ == '__main__':
    app.logger.info("Iniciando tarea de recolección desde worker.py...")
    try:
-    from app import app, fetch_and_store
+        # Llamamos a la función con el nombre correcto
-except ImportError as e:
+        fetch_and_store_all()
-    logging.basicConfig()
+        app.logger.info("Tarea de recolección finalizada exitosamente.")
-    logging.critical(f"No se pudo importar la aplicación Flask. Error: {e}")
+    except Exception as e:
-    sys.exit(1)
+        app.logger.error(f"La tarea de recolección falló con una excepción: {e}", exc_info=True)
 if __name__ == "__main__":
    with app.app_context():
        fetch_and_store()