From 603149d47afc311a9400a7d3c444b3ef36af1b54 Mon Sep 17 00:00:00 2001 From: jlimolina Date: Sun, 15 Jun 2025 19:26:12 +0200 Subject: [PATCH] =?UTF-8?q?Actualizaci=C3=B3n=20del=202025-06-15=20a=20las?= =?UTF-8?q?=2019:26:12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 246 +++++++++++++++++++++++---------- install.sh | 3 +- templates/add_url_source.html | 49 +++++++ templates/base.html | 11 +- templates/dashboard.html | 30 +--- templates/edit_url_source.html | 37 +++++ templates/scrape_url.html | 30 ++++ templates/urls_list.html | 44 ++++++ url_processor.py | 22 +-- 9 files changed, 350 insertions(+), 122 deletions(-) create mode 100644 templates/add_url_source.html create mode 100644 templates/edit_url_source.html create mode 100644 templates/scrape_url.html create mode 100644 templates/urls_list.html diff --git a/app.py b/app.py index 07a027e..cc2ec7e 100644 --- a/app.py +++ b/app.py @@ -19,7 +19,6 @@ import psycopg2.pool import bleach from feed_processor import process_single_feed -# --- IMPORTACIÓN CORREGIDA --- from url_processor import process_newspaper_url logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s') @@ -71,6 +70,13 @@ def safe_html(text): if not text: return "" return bleach.clean(text, tags={'a', 'b', 'strong', 'i', 'em', 'p', 'br'}, attributes={'a': ['href', 'title']}, strip=True) +def _get_form_dependencies(cursor): + cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre") + categorias = cursor.fetchall() + cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre") + paises = cursor.fetchall() + return categorias, paises + @app.route("/") def home(): cat_id, cont_id, pais_id, fecha_filtro = request.args.get("categoria_id"), request.args.get("continente_id"), request.args.get("pais_id"), request.args.get("fecha") @@ -128,7 +134,7 @@ def home(): cat_id=int(cat_id) if cat_id else None, cont_id=int(cont_id) if cont_id else None, pais_id=int(pais_id) if pais_id else None, fecha_filtro=fecha_filtro, q=q) -@app.route("/feeds") +@app.route("/dashboard") def dashboard(): stats = {'feeds_totales': 0, 'noticias_totales': 0, 'feeds_caidos': 0} try: @@ -145,6 +151,9 @@ def dashboard(): flash("Error al conectar con la base de datos.", "error") return render_template("dashboard.html", stats=stats) + +# --- GESTIÓN DE FEEDS --- + @app.route("/feeds/manage") def manage_feeds(): page = request.args.get('page', 1, type=int) @@ -156,7 +165,7 @@ def manage_feeds(): with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("SELECT COUNT(*) FROM feeds") total_feeds = cursor.fetchone()[0] - cursor.execute("SELECT * FROM feeds ORDER BY nombre LIMIT %s OFFSET %s", (per_page, offset)) + cursor.execute("SELECT f.id, f.nombre, f.url, c.nombre as categoria, p.nombre as pais, f.idioma, f.activo FROM feeds f LEFT JOIN categorias c ON f.categoria_id = c.id LEFT JOIN paises p ON f.pais_id = p.id ORDER BY f.nombre LIMIT %s OFFSET %s", (per_page, offset)) feeds_list = cursor.fetchall() except psycopg2.Error as db_err: app.logger.error(f"[DB ERROR] Al obtener lista de feeds: {db_err}") @@ -164,13 +173,6 @@ def manage_feeds(): total_pages = math.ceil(total_feeds / per_page) if total_feeds > 0 else 0 return render_template("feeds_list.html", feeds=feeds_list, page=page, total_pages=total_pages, total_feeds=total_feeds) -def _get_form_dependencies(cursor): - cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre") - categorias = cursor.fetchall() - cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre") - paises = cursor.fetchall() - return categorias, paises - @app.route("/feeds/add", methods=['GET', 'POST']) def add_feed(): if request.method == 'POST': @@ -188,7 +190,7 @@ def add_feed(): except psycopg2.Error as db_err: app.logger.error(f"[DB ERROR] Al agregar feed: {db_err}", exc_info=True) flash(f"Error al añadir el feed: {db_err}", "error") - return redirect(url_for("dashboard")) + return redirect(url_for("manage_feeds")) categorias, paises = [], [] try: @@ -200,67 +202,7 @@ def add_feed(): flash("No se pudieron cargar las categorías o países.", "error") return render_template("add_feed.html", categorias=categorias, paises=paises) - -@app.route("/add_url", methods=['GET', 'POST']) -def add_url(): - if request.method == 'POST': - url_to_scrape = request.form.get("url") - if not url_to_scrape: - flash("La URL es obligatoria.", "error") - return redirect(url_for('add_url')) - - categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None - pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None - - if not categoria_id or not pais_id: - flash("Debes seleccionar una categoría y un país.", "error") - return redirect(url_for('add_url')) - - # Llama a la nueva función que devuelve una lista de noticias - lista_noticias, message = process_newspaper_url(url_to_scrape, categoria_id, pais_id) - - if lista_noticias: - try: - with get_conn() as conn: - with conn.cursor() as cursor: - # Usamos execute_values para insertar todas las noticias de una vez - insert_query = """ - INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id) - VALUES %s - ON CONFLICT (url) DO UPDATE SET - titulo = EXCLUDED.titulo, - resumen = EXCLUDED.resumen, - fecha = EXCLUDED.fecha, - imagen_url = EXCLUDED.imagen_url; - """ - psycopg2.extras.execute_values(cursor, insert_query, lista_noticias) - - # Mensaje de éxito mejorado que indica cuántas noticias se guardaron - flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde la URL.", "success") - return redirect(url_for("home")) - except psycopg2.Error as db_err: - app.logger.error(f"[DB ERROR] Al insertar noticias scrapeadas: {db_err}", exc_info=True) - flash(f"Error de base de datos al guardar las noticias: {db_err}", "error") - else: - # Muestra el mensaje de error o de "no se encontraron artículos" - flash(message, "warning") - - return redirect(url_for('add_url')) - - # Petición GET: Muestra el formulario - categorias, paises = [], [] - try: - with get_conn() as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: - categorias, paises = _get_form_dependencies(cursor) - except psycopg2.Error as db_err: - app.logger.error(f"[DB ERROR] Al cargar formulario de URL: {db_err}") - flash("No se pudieron cargar las categorías o países para el formulario.", "error") - - return render_template("add_url.html", categorias=categorias, paises=paises) - - -@app.route("/edit/", methods=["GET", "POST"]) +@app.route("/feeds/edit/", methods=["GET", "POST"]) def edit_feed(feed_id): if request.method == "POST": try: @@ -280,6 +222,7 @@ def edit_feed(feed_id): flash(f"Error al actualizar el feed: {db_err}", "error") return redirect(url_for("manage_feeds")) + feed, categorias, paises = None, [], [] try: with get_conn() as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: @@ -290,12 +233,11 @@ def edit_feed(feed_id): return redirect(url_for("manage_feeds")) categorias, paises = _get_form_dependencies(cursor) except psycopg2.Error as db_err: - app.logger.error(f"[DB ERROR] Al cargar feed para editar: {db_err}", exc_info=True) flash("Error al cargar el feed para editar.", "error") return redirect(url_for("manage_feeds")) return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises) -@app.route("/delete/") +@app.route("/feeds/delete/") def delete_feed(feed_id): try: with get_conn() as conn: @@ -307,7 +249,7 @@ def delete_feed(feed_id): flash(f"Error al eliminar el feed: {db_err}", "error") return redirect(url_for("manage_feeds")) -@app.route("/reactivar_feed/") +@app.route("/feeds/reactivar/") def reactivar_feed(feed_id): try: with get_conn() as conn: @@ -318,6 +260,156 @@ def reactivar_feed(feed_id): flash(f"Error al reactivar feed: {db_err}", "error") return redirect(url_for("manage_feeds")) + +# --- GESTIÓN DE FUENTES URL --- + +@app.route("/urls/manage") +def manage_urls(): + fuentes = [] + try: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + cursor.execute("SELECT f.id, f.nombre, f.url, c.nombre as categoria, p.nombre as pais, f.idioma FROM fuentes_url f LEFT JOIN categorias c ON f.categoria_id = c.id LEFT JOIN paises p ON f.pais_id = p.id ORDER BY f.nombre") + fuentes = cursor.fetchall() + except psycopg2.Error as db_err: + app.logger.error(f"[DB ERROR] Al obtener lista de fuentes URL: {db_err}") + flash("Error al obtener la lista de fuentes URL.", "error") + return render_template("urls_list.html", fuentes=fuentes) + +@app.route("/urls/add", methods=['GET', 'POST']) +def add_url_source(): + if request.method == 'POST': + nombre = request.form.get("nombre") + try: + with get_conn() as conn: + with conn.cursor() as cursor: + categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None + pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None + idioma = request.form.get("idioma", "es").strip().lower() + cursor.execute( + "INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma) VALUES (%s, %s, %s, %s, %s)", + (nombre, request.form.get("url"), categoria_id, pais_id, idioma) + ) + flash(f"Fuente URL '{nombre}' añadida correctamente.", "success") + except psycopg2.Error as db_err: + app.logger.error(f"[DB ERROR] Al agregar fuente URL: {db_err}", exc_info=True) + flash(f"Error al añadir la fuente URL: {db_err}", "error") + return redirect(url_for("manage_urls")) + + categorias, paises = [], [] + try: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + categorias, paises = _get_form_dependencies(cursor) + except psycopg2.Error as db_err: + app.logger.error(f"[DB ERROR] Al cargar formulario: {db_err}") + flash("No se pudieron cargar las categorías o países.", "error") + return render_template("add_url_source.html", categorias=categorias, paises=paises) + +@app.route("/urls/edit/", methods=["GET", "POST"]) +def edit_url_source(url_id): + if request.method == "POST": + try: + with get_conn() as conn: + with conn.cursor() as cursor: + categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None + pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None + idioma = request.form.get("idioma", "es").strip().lower() + cursor.execute( + "UPDATE fuentes_url SET nombre=%s, url=%s, categoria_id=%s, pais_id=%s, idioma=%s WHERE id=%s", + (request.form.get("nombre"), request.form.get("url"), categoria_id, pais_id, idioma, url_id) + ) + flash("Fuente URL actualizada correctamente.", "success") + except psycopg2.Error as db_err: + app.logger.error(f"[DB ERROR] Al actualizar fuente URL: {db_err}", exc_info=True) + flash(f"Error al actualizar la fuente URL: {db_err}", "error") + return redirect(url_for("manage_urls")) + + fuente, categorias, paises = None, [], [] + try: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (url_id,)) + fuente = cursor.fetchone() + if not fuente: + flash("No se encontró la fuente URL solicitada.", "error") + return redirect(url_for("manage_urls")) + categorias, paises = _get_form_dependencies(cursor) + except psycopg2.Error as db_err: + flash("Error al cargar la fuente URL para editar.", "error") + return redirect(url_for("manage_urls")) + return render_template("edit_url_source.html", fuente=fuente, categorias=categorias, paises=paises) + +@app.route("/urls/delete/") +def delete_url_source(url_id): + try: + with get_conn() as conn: + with conn.cursor() as cursor: + cursor.execute("DELETE FROM fuentes_url WHERE id=%s", (url_id,)) + flash("Fuente URL eliminada correctamente.", "success") + except psycopg2.Error as db_err: + flash(f"Error al eliminar la fuente URL: {db_err}", "error") + return redirect(url_for("manage_urls")) + +# --- PROCESAMIENTO DE URLS --- + +@app.route("/scrape_url", methods=['GET', 'POST']) +def scrape_url(): + if request.method == 'POST': + source_id = request.form.get("source_id") + if not source_id: + flash("Debes seleccionar una fuente para procesar.", "error") + return redirect(url_for('scrape_url')) + + try: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (source_id,)) + source = cursor.fetchone() + + if not source: + flash("La fuente seleccionada no existe.", "error") + return redirect(url_for('scrape_url')) + + lista_noticias, message = process_newspaper_url(source['url'], source['categoria_id'], source['pais_id'], source['idioma']) + + if lista_noticias: + # Se necesita una nueva conexión/cursor para la inserción + with get_conn() as insert_conn: + with insert_conn.cursor() as insert_cursor: + insert_query = """ + INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id) + VALUES %s + ON CONFLICT (url) DO UPDATE SET + titulo = EXCLUDED.titulo, + resumen = EXCLUDED.resumen, + fecha = EXCLUDED.fecha, + imagen_url = EXCLUDED.imagen_url; + """ + psycopg2.extras.execute_values(insert_cursor, insert_query, lista_noticias) + flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde '{source['nombre']}'.", "success") + return redirect(url_for("home")) + else: + flash(message, "warning") + + except psycopg2.Error as db_err: + app.logger.error(f"[DB ERROR] Al procesar fuente URL: {db_err}", exc_info=True) + flash(f"Error de base de datos al procesar la fuente: {db_err}", "error") + + return redirect(url_for('scrape_url')) + + fuentes = [] + try: + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + cursor.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre") + fuentes = cursor.fetchall() + except psycopg2.Error as db_err: + flash("Error al cargar las fuentes de URL.", "error") + + return render_template("scrape_url.html", fuentes=fuentes) + +# --- BACKUP Y RESTORE --- @app.route("/backup_feeds") def backup_feeds(): try: @@ -440,6 +532,9 @@ def restore_feeds(): return redirect(url_for("dashboard")) return render_template("restore_feeds.html") + +# --- TAREA DE FONDO --- + def fetch_and_store(): with app.app_context(): logging.info("--- INICIANDO CICLO DE CAPTURA ---") @@ -525,6 +620,7 @@ def fetch_and_store(): except psycopg2.Error as db_err: logging.error(f"Error de BD en actualización masiva: {db_err}", exc_info=True) + if __name__ == "__main__": if not db_pool: app.logger.error("La aplicación no puede arrancar sin una conexión a la base de datos.") diff --git a/install.sh b/install.sh index 9c91eba..da650fa 100644 --- a/install.sh +++ b/install.sh @@ -13,7 +13,7 @@ WEB_PORT=8000 echo "🟢 Paso 0: Verificaciones y confirmación de seguridad" if [[ $EUID -ne 0 ]]; then - echo "❌ Este script debe ser ejecutado como root (usa sudo)." + echo "❌ Este script debe ser ejecutado como root (usa sudo)." exit 1 fi @@ -99,6 +99,7 @@ CREATE TABLE IF NOT EXISTS continentes (id SERIAL PRIMARY KEY, nombre VARCHAR(50 CREATE TABLE IF NOT EXISTS categorias (id SERIAL PRIMARY KEY, nombre VARCHAR(100) NOT NULL UNIQUE); CREATE TABLE IF NOT EXISTS paises (id SERIAL PRIMARY KEY, nombre VARCHAR(100) NOT NULL UNIQUE, continente_id INTEGER REFERENCES continentes(id) ON DELETE SET NULL); CREATE TABLE IF NOT EXISTS feeds (id SERIAL PRIMARY KEY, nombre VARCHAR(255), descripcion TEXT, url TEXT NOT NULL UNIQUE, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, idioma CHAR(2), activo BOOLEAN DEFAULT TRUE, fallos INTEGER DEFAULT 0, last_etag TEXT, last_modified TEXT); +CREATE TABLE IF NOT EXISTS fuentes_url (id SERIAL PRIMARY KEY, nombre VARCHAR(255) NOT NULL, url TEXT NOT NULL UNIQUE, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, idioma CHAR(2) DEFAULT 'es'); CREATE TABLE IF NOT EXISTS noticias (id VARCHAR(32) PRIMARY KEY, titulo TEXT, resumen TEXT, url TEXT NOT NULL UNIQUE, fecha TIMESTAMP, imagen_url TEXT, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, tsv tsvector); ALTER TABLE noticias ADD COLUMN IF NOT EXISTS tsv tsvector; CREATE OR REPLACE FUNCTION noticias_tsv_trigger() RETURNS trigger AS \$\$ BEGIN new.tsv := setweight(to_tsvector('spanish', coalesce(new.titulo,'')), 'A') || setweight(to_tsvector('spanish', coalesce(new.resumen,'')), 'B'); return new; END \$\$ LANGUAGE plpgsql; diff --git a/templates/add_url_source.html b/templates/add_url_source.html new file mode 100644 index 0000000..bf294ed --- /dev/null +++ b/templates/add_url_source.html @@ -0,0 +1,49 @@ +{% extends "base.html" %} +{% block title %}Añadir Fuente URL{% endblock %} +{% block content %} +
+
+
+
+

Añadir Nueva Fuente de Noticias URL

+
+
+

Añade un periódico o sitio de noticias para poder procesar sus artículos más tarde.

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ Cancelar + +
+
+
+
+
+
+{% endblock %} + diff --git a/templates/base.html b/templates/base.html index cc45e8d..2151478 100644 --- a/templates/base.html +++ b/templates/base.html @@ -9,7 +9,7 @@ - +