474 lines
17 KiB
Python
474 lines
17 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Flask RSS aggregator — versión PostgreSQL
|
|
|
|
(Copyright tuyo 😉, soporte robusto de importación desde CSV)
|
|
"""
|
|
|
|
from flask import Flask, render_template, request, redirect, url_for, Response
|
|
from apscheduler.schedulers.background import BackgroundScheduler
|
|
from datetime import datetime
|
|
import feedparser
|
|
import hashlib
|
|
import re
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
import csv
|
|
from io import StringIO
|
|
|
|
app = Flask(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuración de la base de datos PostgreSQL
|
|
# ---------------------------------------------------------------------------
|
|
DB_CONFIG = {
|
|
"host": "localhost",
|
|
"port": 5432,
|
|
"dbname": "rss",
|
|
"user": "rss",
|
|
"password": "x",
|
|
}
|
|
|
|
def get_conn():
|
|
"""Devuelve una conexión nueva usando psycopg2 y el diccionario DB_CONFIG."""
|
|
return psycopg2.connect(**DB_CONFIG)
|
|
|
|
MAX_FALLOS = 5 # Número máximo de fallos antes de desactivar el feed
|
|
|
|
# ======================================
|
|
# Página principal: últimas noticias
|
|
# ======================================
|
|
@app.route("/")
|
|
def home():
|
|
conn = None
|
|
noticias = []
|
|
categorias = []
|
|
continentes = []
|
|
paises = []
|
|
cat_id = request.args.get("categoria_id")
|
|
cont_id = request.args.get("continente_id")
|
|
pais_id = request.args.get("pais_id")
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
|
|
categorias = cursor.fetchall()
|
|
cursor.execute("SELECT id, nombre FROM continentes ORDER BY nombre")
|
|
continentes = cursor.fetchall()
|
|
if cont_id:
|
|
cursor.execute(
|
|
"SELECT id, nombre, continente_id FROM paises WHERE continente_id = %s ORDER BY nombre",
|
|
(cont_id,),
|
|
)
|
|
else:
|
|
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
|
|
paises = cursor.fetchall()
|
|
|
|
sql = (
|
|
"""
|
|
SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url,
|
|
c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente
|
|
FROM noticias n
|
|
LEFT JOIN categorias c ON n.categoria_id = c.id
|
|
LEFT JOIN paises p ON n.pais_id = p.id
|
|
LEFT JOIN continentes co ON p.continente_id = co.id
|
|
WHERE 1=1
|
|
"""
|
|
)
|
|
params = []
|
|
if cat_id:
|
|
sql += " AND n.categoria_id = %s"
|
|
params.append(cat_id)
|
|
if pais_id:
|
|
sql += " AND n.pais_id = %s"
|
|
params.append(pais_id)
|
|
elif cont_id:
|
|
sql += " AND p.continente_id = %s"
|
|
params.append(cont_id)
|
|
sql += " ORDER BY n.fecha DESC LIMIT 50"
|
|
cursor.execute(sql, params)
|
|
noticias = cursor.fetchall()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] Al leer noticias: {db_err}", exc_info=True)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
return render_template(
|
|
"noticias.html",
|
|
noticias=noticias,
|
|
categorias=categorias,
|
|
continentes=continentes,
|
|
paises=paises,
|
|
cat_id=int(cat_id) if cat_id else None,
|
|
cont_id=int(cont_id) if cont_id else None,
|
|
pais_id=int(pais_id) if pais_id else None,
|
|
)
|
|
|
|
# ======================================
|
|
# Gestión de feeds en /feeds
|
|
# ======================================
|
|
@app.route("/feeds")
|
|
def feeds():
|
|
conn = None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
# Feeds con descripción y fallos
|
|
cursor.execute(
|
|
"""
|
|
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, f.pais_id,
|
|
f.activo, f.fallos, c.nombre, p.nombre
|
|
FROM feeds f
|
|
LEFT JOIN categorias c ON f.categoria_id = c.id
|
|
LEFT JOIN paises p ON f.pais_id = p.id
|
|
"""
|
|
)
|
|
feeds_ = cursor.fetchall()
|
|
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
|
|
categorias = cursor.fetchall()
|
|
cursor.execute("SELECT id, nombre FROM continentes ORDER BY nombre")
|
|
continentes = cursor.fetchall()
|
|
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
|
|
paises = cursor.fetchall()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(
|
|
f"[DB ERROR] Al leer feeds/categorías/países: {db_err}", exc_info=True
|
|
)
|
|
feeds_, categorias, continentes, paises = [], [], [], []
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
return render_template(
|
|
"index.html",
|
|
feeds=feeds_,
|
|
categorias=categorias,
|
|
continentes=continentes,
|
|
paises=paises,
|
|
)
|
|
|
|
# Añadir feed
|
|
@app.route("/add", methods=["POST"])
|
|
def add_feed():
|
|
nombre = request.form.get("nombre")
|
|
descripcion = request.form.get("descripcion")
|
|
url = request.form.get("url")
|
|
categoria_id = request.form.get("categoria_id")
|
|
pais_id = request.form.get("pais_id")
|
|
idioma = request.form.get("idioma") or None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
|
|
VALUES (%s, %s, %s, %s, %s, %s)
|
|
""",
|
|
(nombre, descripcion, url, categoria_id, pais_id, idioma),
|
|
)
|
|
conn.commit()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] Al agregar feed: {db_err}", exc_info=True)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
return redirect(url_for("feeds"))
|
|
|
|
# Editar feed
|
|
@app.route("/edit/<int:feed_id>", methods=["GET", "POST"])
|
|
def edit_feed(feed_id):
|
|
conn = None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
|
if request.method == "POST":
|
|
nombre = request.form.get("nombre")
|
|
descripcion = request.form.get("descripcion")
|
|
url_feed = request.form.get("url")
|
|
categoria_id = request.form.get("categoria_id")
|
|
pais_id = request.form.get("pais_id")
|
|
idioma = request.form.get("idioma") or None
|
|
activo = request.form.get("activo") == "on"
|
|
cursor.execute(
|
|
"""
|
|
UPDATE feeds
|
|
SET nombre=%s, descripcion=%s, url=%s, categoria_id=%s,
|
|
pais_id=%s, idioma=%s, activo=%s
|
|
WHERE id=%s
|
|
""",
|
|
(
|
|
nombre,
|
|
descripcion,
|
|
url_feed,
|
|
categoria_id,
|
|
pais_id,
|
|
idioma,
|
|
activo,
|
|
feed_id,
|
|
),
|
|
)
|
|
conn.commit()
|
|
return redirect(url_for("feeds"))
|
|
cursor.execute("SELECT * FROM feeds WHERE id = %s", (feed_id,))
|
|
feed = cursor.fetchone()
|
|
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
|
|
categorias = cursor.fetchall()
|
|
cursor.execute("SELECT id, nombre FROM paises ORDER BY nombre")
|
|
paises = cursor.fetchall()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] Al editar feed: {db_err}", exc_info=True)
|
|
feed, categorias, paises = {}, [], []
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)
|
|
|
|
# Eliminar feed
|
|
@app.route("/delete/<int:feed_id>")
|
|
def delete_feed(feed_id):
|
|
conn = None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
cursor.execute("DELETE FROM feeds WHERE id=%s", (feed_id,))
|
|
conn.commit()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] Al eliminar feed: {db_err}", exc_info=True)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
return redirect(url_for("feeds"))
|
|
|
|
# Backup de feeds a CSV
|
|
@app.route("/backup_feeds")
|
|
def backup_feeds():
|
|
conn = None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"""
|
|
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, c.nombre AS categoria,
|
|
f.pais_id, p.nombre AS pais, f.idioma, f.activo, f.fallos
|
|
FROM feeds f
|
|
LEFT JOIN categorias c ON f.categoria_id = c.id
|
|
LEFT JOIN paises p ON f.pais_id = p.id
|
|
"""
|
|
)
|
|
feeds_ = cursor.fetchall()
|
|
header = [desc[0] for desc in cursor.description]
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] Al hacer backup de feeds: {db_err}", exc_info=True)
|
|
return "Error generando backup.", 500
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
si = StringIO()
|
|
cw = csv.writer(si)
|
|
cw.writerow(header)
|
|
cw.writerows(feeds_)
|
|
output = si.getvalue()
|
|
si.close()
|
|
return Response(
|
|
output,
|
|
mimetype="text/csv",
|
|
headers={"Content-Disposition": "attachment;filename=feeds_backup.csv"},
|
|
)
|
|
|
|
# Restaurar feeds desde CSV (robusto: bool/int/None/códigos)
|
|
@app.route("/restore_feeds", methods=["GET", "POST"])
|
|
def restore_feeds():
|
|
msg = ""
|
|
if request.method == "POST":
|
|
file = request.files.get("file")
|
|
if not file or not file.filename.endswith(".csv"):
|
|
msg = "Archivo no válido."
|
|
else:
|
|
file_stream = StringIO(file.read().decode("utf-8"))
|
|
reader = csv.DictReader(file_stream)
|
|
rows = list(reader)
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
n_ok = 0
|
|
msg_lines = []
|
|
for i, row in enumerate(rows, 1):
|
|
try:
|
|
# -- robusto para activo (admite True/False/1/0/t/f/yes/no/vacío)
|
|
activo_val = str(row.get("activo", "")).strip().lower()
|
|
if activo_val in ["1", "true", "t", "yes"]:
|
|
activo = True
|
|
elif activo_val in ["0", "false", "f", "no"]:
|
|
activo = False
|
|
else:
|
|
activo = True # valor por defecto
|
|
|
|
idioma = row.get("idioma", None)
|
|
idioma = idioma.strip() if idioma else None
|
|
if idioma == "":
|
|
idioma = None
|
|
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO feeds (
|
|
id, nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos
|
|
) VALUES (%(id)s, %(nombre)s, %(descripcion)s, %(url)s, %(categoria_id)s,
|
|
%(pais_id)s, %(idioma)s, %(activo)s, %(fallos)s)
|
|
ON CONFLICT (id) DO UPDATE SET
|
|
nombre = EXCLUDED.nombre,
|
|
descripcion = EXCLUDED.descripcion,
|
|
url = EXCLUDED.url,
|
|
categoria_id = EXCLUDED.categoria_id,
|
|
pais_id = EXCLUDED.pais_id,
|
|
idioma = EXCLUDED.idioma,
|
|
activo = EXCLUDED.activo,
|
|
fallos = EXCLUDED.fallos;
|
|
""",
|
|
{
|
|
"id": int(row.get("id")),
|
|
"nombre": row["nombre"],
|
|
"descripcion": row.get("descripcion") or "",
|
|
"url": row["url"],
|
|
"categoria_id": int(row["categoria_id"]) if row["categoria_id"] else None,
|
|
"pais_id": int(row["pais_id"]) if row["pais_id"] else None,
|
|
"idioma": idioma,
|
|
"activo": activo,
|
|
"fallos": int(row.get("fallos", 0)),
|
|
},
|
|
)
|
|
n_ok += 1
|
|
except Exception as e:
|
|
app.logger.error(f"Error insertando feed fila {i}: {e}")
|
|
msg_lines.append(f"Error en fila {i}: {e}")
|
|
conn.commit()
|
|
conn.close()
|
|
msg = f"Feeds restaurados correctamente: {n_ok}"
|
|
if msg_lines:
|
|
msg += "<br>" + "<br>".join(msg_lines)
|
|
return render_template("restore_feeds.html", msg=msg)
|
|
|
|
@app.route("/noticias")
|
|
def show_noticias():
|
|
return home()
|
|
|
|
# ================================
|
|
# Lógica de procesado de feeds con control de fallos
|
|
# ================================
|
|
def sumar_fallo_feed(cursor, feed_id):
|
|
cursor.execute("UPDATE feeds SET fallos = fallos + 1 WHERE id = %s", (feed_id,))
|
|
cursor.execute("SELECT fallos FROM feeds WHERE id = %s", (feed_id,))
|
|
fallos = cursor.fetchone()[0]
|
|
if fallos >= MAX_FALLOS:
|
|
cursor.execute("UPDATE feeds SET activo = FALSE WHERE id = %s", (feed_id,))
|
|
return fallos
|
|
|
|
def resetear_fallos_feed(cursor, feed_id):
|
|
cursor.execute("UPDATE feeds SET fallos = 0 WHERE id = %s", (feed_id,))
|
|
|
|
def fetch_and_store():
|
|
conn = None
|
|
try:
|
|
conn = get_conn()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT id, url, categoria_id, pais_id FROM feeds WHERE activo = TRUE")
|
|
feeds_ = cursor.fetchall()
|
|
except psycopg2.Error as db_err:
|
|
app.logger.error(f"[DB ERROR] No se pudo conectar o leer feeds: {db_err}", exc_info=True)
|
|
return
|
|
|
|
for feed_id, rss_url, categoria_id, pais_id in feeds_:
|
|
try:
|
|
app.logger.info(f"Procesando feed: {rss_url} [{categoria_id}] [{pais_id}]")
|
|
parsed = feedparser.parse(rss_url)
|
|
except Exception as e:
|
|
app.logger.error(f"[PARSE ERROR] Al parsear {rss_url}: {e}", exc_info=True)
|
|
sumar_fallo_feed(cursor, feed_id)
|
|
continue
|
|
|
|
if getattr(parsed, "bozo", False):
|
|
bozo_exc = getattr(parsed, "bozo_exception", "Unknown")
|
|
app.logger.warning(f"[BOZO] Feed mal formado: {rss_url} - {bozo_exc}")
|
|
sumar_fallo_feed(cursor, feed_id)
|
|
continue
|
|
else:
|
|
resetear_fallos_feed(cursor, feed_id)
|
|
|
|
for entry in parsed.entries:
|
|
link = entry.get("link") or entry.get("id")
|
|
if not link:
|
|
links_list = entry.get("links", [])
|
|
if isinstance(links_list, list) and links_list:
|
|
href = next((l.get("href") for l in links_list if l.get("href")), None)
|
|
link = href
|
|
if not link:
|
|
app.logger.error(
|
|
f"[ENTRY ERROR] Entrada sin link en feed {rss_url}, salto entrada."
|
|
)
|
|
continue
|
|
|
|
try:
|
|
noticia_id = hashlib.md5(link.encode()).hexdigest()
|
|
titulo = entry.get("title", "")
|
|
resumen = entry.get("summary", "")
|
|
imagen_url = ""
|
|
fecha = None
|
|
|
|
if "media_content" in entry:
|
|
imagen_url = entry.media_content[0].get("url", "")
|
|
else:
|
|
img = re.search(r"<img.+?src=\"(.+?)\"", resumen)
|
|
if img:
|
|
imagen_url = img.group(1)
|
|
|
|
published = entry.get("published_parsed") or entry.get("updated_parsed")
|
|
if published:
|
|
try:
|
|
fecha = datetime(*published[:6])
|
|
except Exception:
|
|
fecha = None
|
|
|
|
cursor.execute(
|
|
"""
|
|
INSERT INTO noticias (
|
|
id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id
|
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
|
ON CONFLICT (id) DO NOTHING
|
|
""",
|
|
(
|
|
noticia_id,
|
|
titulo,
|
|
resumen,
|
|
link,
|
|
fecha,
|
|
imagen_url,
|
|
categoria_id,
|
|
pais_id,
|
|
),
|
|
)
|
|
except Exception as entry_err:
|
|
app.logger.error(
|
|
f"[ENTRY ERROR] Falló procesar entrada {link}: {entry_err}", exc_info=True
|
|
)
|
|
continue
|
|
|
|
try:
|
|
conn.commit()
|
|
except Exception as commit_err:
|
|
app.logger.error(
|
|
f"[DB ERROR] Al confirmar transacción: {commit_err}", exc_info=True
|
|
)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
app.logger.info(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Feeds procesados.")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Lanzador de la aplicación + scheduler
|
|
# ---------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
scheduler = BackgroundScheduler()
|
|
scheduler.add_job(fetch_and_store, "interval", minutes=2, id="rss_job")
|
|
scheduler.start()
|
|
try:
|
|
app.run(host="0.0.0.0", port=5000, debug=True)
|
|
finally:
|
|
scheduler.shutdown()
|
|
|