Actualización del 2025-06-15 a las 19:26:12

This commit is contained in:
jlimolina 2025-06-15 19:26:12 +02:00
parent d23754d3b8
commit 603149d47a
9 changed files with 350 additions and 122 deletions

246
app.py
View file

@ -19,7 +19,6 @@ import psycopg2.pool
import bleach
from feed_processor import process_single_feed
# --- IMPORTACIÓN CORREGIDA ---
from url_processor import process_newspaper_url
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s')
@ -71,6 +70,13 @@ def safe_html(text):
if not text: return ""
return bleach.clean(text, tags={'a', 'b', 'strong', 'i', 'em', 'p', 'br'}, attributes={'a': ['href', 'title']}, strip=True)
def _get_form_dependencies(cursor):
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cursor.fetchall()
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
paises = cursor.fetchall()
return categorias, paises
@app.route("/")
def home():
cat_id, cont_id, pais_id, fecha_filtro = request.args.get("categoria_id"), request.args.get("continente_id"), request.args.get("pais_id"), request.args.get("fecha")
@ -128,7 +134,7 @@ def home():
cat_id=int(cat_id) if cat_id else None, cont_id=int(cont_id) if cont_id else None,
pais_id=int(pais_id) if pais_id else None, fecha_filtro=fecha_filtro, q=q)
@app.route("/feeds")
@app.route("/dashboard")
def dashboard():
stats = {'feeds_totales': 0, 'noticias_totales': 0, 'feeds_caidos': 0}
try:
@ -145,6 +151,9 @@ def dashboard():
flash("Error al conectar con la base de datos.", "error")
return render_template("dashboard.html", stats=stats)
# --- GESTIÓN DE FEEDS ---
@app.route("/feeds/manage")
def manage_feeds():
page = request.args.get('page', 1, type=int)
@ -156,7 +165,7 @@ def manage_feeds():
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute("SELECT COUNT(*) FROM feeds")
total_feeds = cursor.fetchone()[0]
cursor.execute("SELECT * FROM feeds ORDER BY nombre LIMIT %s OFFSET %s", (per_page, offset))
cursor.execute("SELECT f.id, f.nombre, f.url, c.nombre as categoria, p.nombre as pais, f.idioma, f.activo FROM feeds f LEFT JOIN categorias c ON f.categoria_id = c.id LEFT JOIN paises p ON f.pais_id = p.id ORDER BY f.nombre LIMIT %s OFFSET %s", (per_page, offset))
feeds_list = cursor.fetchall()
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al obtener lista de feeds: {db_err}")
@ -164,13 +173,6 @@ def manage_feeds():
total_pages = math.ceil(total_feeds / per_page) if total_feeds > 0 else 0
return render_template("feeds_list.html", feeds=feeds_list, page=page, total_pages=total_pages, total_feeds=total_feeds)
def _get_form_dependencies(cursor):
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cursor.fetchall()
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
paises = cursor.fetchall()
return categorias, paises
@app.route("/feeds/add", methods=['GET', 'POST'])
def add_feed():
if request.method == 'POST':
@ -188,7 +190,7 @@ def add_feed():
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al agregar feed: {db_err}", exc_info=True)
flash(f"Error al añadir el feed: {db_err}", "error")
return redirect(url_for("dashboard"))
return redirect(url_for("manage_feeds"))
categorias, paises = [], []
try:
@ -200,67 +202,7 @@ def add_feed():
flash("No se pudieron cargar las categorías o países.", "error")
return render_template("add_feed.html", categorias=categorias, paises=paises)
@app.route("/add_url", methods=['GET', 'POST'])
def add_url():
if request.method == 'POST':
url_to_scrape = request.form.get("url")
if not url_to_scrape:
flash("La URL es obligatoria.", "error")
return redirect(url_for('add_url'))
categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None
pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None
if not categoria_id or not pais_id:
flash("Debes seleccionar una categoría y un país.", "error")
return redirect(url_for('add_url'))
# Llama a la nueva función que devuelve una lista de noticias
lista_noticias, message = process_newspaper_url(url_to_scrape, categoria_id, pais_id)
if lista_noticias:
try:
with get_conn() as conn:
with conn.cursor() as cursor:
# Usamos execute_values para insertar todas las noticias de una vez
insert_query = """
INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id)
VALUES %s
ON CONFLICT (url) DO UPDATE SET
titulo = EXCLUDED.titulo,
resumen = EXCLUDED.resumen,
fecha = EXCLUDED.fecha,
imagen_url = EXCLUDED.imagen_url;
"""
psycopg2.extras.execute_values(cursor, insert_query, lista_noticias)
# Mensaje de éxito mejorado que indica cuántas noticias se guardaron
flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde la URL.", "success")
return redirect(url_for("home"))
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al insertar noticias scrapeadas: {db_err}", exc_info=True)
flash(f"Error de base de datos al guardar las noticias: {db_err}", "error")
else:
# Muestra el mensaje de error o de "no se encontraron artículos"
flash(message, "warning")
return redirect(url_for('add_url'))
# Petición GET: Muestra el formulario
categorias, paises = [], []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
categorias, paises = _get_form_dependencies(cursor)
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al cargar formulario de URL: {db_err}")
flash("No se pudieron cargar las categorías o países para el formulario.", "error")
return render_template("add_url.html", categorias=categorias, paises=paises)
@app.route("/edit/<int:feed_id>", methods=["GET", "POST"])
@app.route("/feeds/edit/<int:feed_id>", methods=["GET", "POST"])
def edit_feed(feed_id):
if request.method == "POST":
try:
@ -280,6 +222,7 @@ def edit_feed(feed_id):
flash(f"Error al actualizar el feed: {db_err}", "error")
return redirect(url_for("manage_feeds"))
feed, categorias, paises = None, [], []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
@ -290,12 +233,11 @@ def edit_feed(feed_id):
return redirect(url_for("manage_feeds"))
categorias, paises = _get_form_dependencies(cursor)
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al cargar feed para editar: {db_err}", exc_info=True)
flash("Error al cargar el feed para editar.", "error")
return redirect(url_for("manage_feeds"))
return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)
@app.route("/delete/<int:feed_id>")
@app.route("/feeds/delete/<int:feed_id>")
def delete_feed(feed_id):
try:
with get_conn() as conn:
@ -307,7 +249,7 @@ def delete_feed(feed_id):
flash(f"Error al eliminar el feed: {db_err}", "error")
return redirect(url_for("manage_feeds"))
@app.route("/reactivar_feed/<int:feed_id>")
@app.route("/feeds/reactivar/<int:feed_id>")
def reactivar_feed(feed_id):
try:
with get_conn() as conn:
@ -318,6 +260,156 @@ def reactivar_feed(feed_id):
flash(f"Error al reactivar feed: {db_err}", "error")
return redirect(url_for("manage_feeds"))
# --- GESTIÓN DE FUENTES URL ---
@app.route("/urls/manage")
def manage_urls():
fuentes = []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute("SELECT f.id, f.nombre, f.url, c.nombre as categoria, p.nombre as pais, f.idioma FROM fuentes_url f LEFT JOIN categorias c ON f.categoria_id = c.id LEFT JOIN paises p ON f.pais_id = p.id ORDER BY f.nombre")
fuentes = cursor.fetchall()
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al obtener lista de fuentes URL: {db_err}")
flash("Error al obtener la lista de fuentes URL.", "error")
return render_template("urls_list.html", fuentes=fuentes)
@app.route("/urls/add", methods=['GET', 'POST'])
def add_url_source():
if request.method == 'POST':
nombre = request.form.get("nombre")
try:
with get_conn() as conn:
with conn.cursor() as cursor:
categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None
pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None
idioma = request.form.get("idioma", "es").strip().lower()
cursor.execute(
"INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma) VALUES (%s, %s, %s, %s, %s)",
(nombre, request.form.get("url"), categoria_id, pais_id, idioma)
)
flash(f"Fuente URL '{nombre}' añadida correctamente.", "success")
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al agregar fuente URL: {db_err}", exc_info=True)
flash(f"Error al añadir la fuente URL: {db_err}", "error")
return redirect(url_for("manage_urls"))
categorias, paises = [], []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
categorias, paises = _get_form_dependencies(cursor)
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al cargar formulario: {db_err}")
flash("No se pudieron cargar las categorías o países.", "error")
return render_template("add_url_source.html", categorias=categorias, paises=paises)
@app.route("/urls/edit/<int:url_id>", methods=["GET", "POST"])
def edit_url_source(url_id):
if request.method == "POST":
try:
with get_conn() as conn:
with conn.cursor() as cursor:
categoria_id = int(request.form.get("categoria_id")) if request.form.get("categoria_id") else None
pais_id = int(request.form.get("pais_id")) if request.form.get("pais_id") else None
idioma = request.form.get("idioma", "es").strip().lower()
cursor.execute(
"UPDATE fuentes_url SET nombre=%s, url=%s, categoria_id=%s, pais_id=%s, idioma=%s WHERE id=%s",
(request.form.get("nombre"), request.form.get("url"), categoria_id, pais_id, idioma, url_id)
)
flash("Fuente URL actualizada correctamente.", "success")
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al actualizar fuente URL: {db_err}", exc_info=True)
flash(f"Error al actualizar la fuente URL: {db_err}", "error")
return redirect(url_for("manage_urls"))
fuente, categorias, paises = None, [], []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (url_id,))
fuente = cursor.fetchone()
if not fuente:
flash("No se encontró la fuente URL solicitada.", "error")
return redirect(url_for("manage_urls"))
categorias, paises = _get_form_dependencies(cursor)
except psycopg2.Error as db_err:
flash("Error al cargar la fuente URL para editar.", "error")
return redirect(url_for("manage_urls"))
return render_template("edit_url_source.html", fuente=fuente, categorias=categorias, paises=paises)
@app.route("/urls/delete/<int:url_id>")
def delete_url_source(url_id):
try:
with get_conn() as conn:
with conn.cursor() as cursor:
cursor.execute("DELETE FROM fuentes_url WHERE id=%s", (url_id,))
flash("Fuente URL eliminada correctamente.", "success")
except psycopg2.Error as db_err:
flash(f"Error al eliminar la fuente URL: {db_err}", "error")
return redirect(url_for("manage_urls"))
# --- PROCESAMIENTO DE URLS ---
@app.route("/scrape_url", methods=['GET', 'POST'])
def scrape_url():
if request.method == 'POST':
source_id = request.form.get("source_id")
if not source_id:
flash("Debes seleccionar una fuente para procesar.", "error")
return redirect(url_for('scrape_url'))
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (source_id,))
source = cursor.fetchone()
if not source:
flash("La fuente seleccionada no existe.", "error")
return redirect(url_for('scrape_url'))
lista_noticias, message = process_newspaper_url(source['url'], source['categoria_id'], source['pais_id'], source['idioma'])
if lista_noticias:
# Se necesita una nueva conexión/cursor para la inserción
with get_conn() as insert_conn:
with insert_conn.cursor() as insert_cursor:
insert_query = """
INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id)
VALUES %s
ON CONFLICT (url) DO UPDATE SET
titulo = EXCLUDED.titulo,
resumen = EXCLUDED.resumen,
fecha = EXCLUDED.fecha,
imagen_url = EXCLUDED.imagen_url;
"""
psycopg2.extras.execute_values(insert_cursor, insert_query, lista_noticias)
flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde '{source['nombre']}'.", "success")
return redirect(url_for("home"))
else:
flash(message, "warning")
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al procesar fuente URL: {db_err}", exc_info=True)
flash(f"Error de base de datos al procesar la fuente: {db_err}", "error")
return redirect(url_for('scrape_url'))
fuentes = []
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre")
fuentes = cursor.fetchall()
except psycopg2.Error as db_err:
flash("Error al cargar las fuentes de URL.", "error")
return render_template("scrape_url.html", fuentes=fuentes)
# --- BACKUP Y RESTORE ---
@app.route("/backup_feeds")
def backup_feeds():
try:
@ -440,6 +532,9 @@ def restore_feeds():
return redirect(url_for("dashboard"))
return render_template("restore_feeds.html")
# --- TAREA DE FONDO ---
def fetch_and_store():
with app.app_context():
logging.info("--- INICIANDO CICLO DE CAPTURA ---")
@ -525,6 +620,7 @@ def fetch_and_store():
except psycopg2.Error as db_err:
logging.error(f"Error de BD en actualización masiva: {db_err}", exc_info=True)
if __name__ == "__main__":
if not db_pool:
app.logger.error("La aplicación no puede arrancar sin una conexión a la base de datos.")