rss/app.py
2025-11-24 00:45:10 +01:00

1411 lines
50 KiB
Python

import os
import csv
import io
import time
import socket
import zipfile
from datetime import datetime, date
from concurrent.futures import ThreadPoolExecutor, as_completed
from dotenv import load_dotenv
from flask import (
Flask, render_template, request, redirect, url_for,
flash, send_file
)
from markupsafe import Markup
import psycopg2
import psycopg2.extras as extras
load_dotenv()
DB_CONFIG = {
"dbname": os.getenv("DB_NAME", "rss"),
"user": os.getenv("DB_USER", "rss"),
"password": os.getenv("DB_PASS", ""),
"host": os.getenv("DB_HOST", "localhost"),
"port": int(os.getenv("DB_PORT", "5432")),
}
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "es")
DEFAULT_TRANSLATION_LANG = os.getenv("DEFAULT_TRANSLATION_LANG", "es")
WEB_TRANSLATED_DEFAULT = os.getenv("WEB_TRANSLATED_DEFAULT", "1") == "1"
NEWS_PER_PAGE_DEFAULT = int(os.getenv("NEWS_PER_PAGE", "20"))
SECRET_KEY = os.getenv("SECRET_KEY", "cambia_esta_clave_insegura")
RSS_MAX_WORKERS = int(os.getenv("RSS_MAX_WORKERS", "10"))
RSS_FEED_TIMEOUT = int(os.getenv("RSS_FEED_TIMEOUT", "30"))
RSS_MAX_FAILURES = int(os.getenv("RSS_MAX_FAILURES", "5"))
app = Flask(__name__)
app.config["SECRET_KEY"] = SECRET_KEY
def get_conn():
return psycopg2.connect(**DB_CONFIG)
def safe_html(texto):
if not texto:
return ""
return Markup(texto)
app.jinja_env.filters["safe_html"] = safe_html
def get_categorias(conn):
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;")
return cur.fetchall()
def get_continentes(conn):
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT id, nombre FROM continentes ORDER BY nombre;")
return cur.fetchall()
def get_paises(conn):
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT p.id, p.nombre, p.continente_id
FROM paises p
ORDER BY p.nombre;
"""
)
return cur.fetchall()
def normalize_url_py(u: str | None) -> str | None:
if not u:
return None
u = u.strip()
if not u:
return None
if "://" not in u:
u = "http://" + u
u = u.split("#", 1)[0]
try:
from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode
except ImportError:
return u
sp = urlsplit(u)
scheme = sp.scheme.lower()
netloc = sp.netloc.lower()
if netloc.startswith("www."):
netloc = netloc[4:]
if scheme == "http" and netloc.endswith(":80"):
netloc = netloc[:-3]
if scheme == "https" and netloc.endswith(":443"):
netloc = netloc[:-4]
qs_pairs = []
for k, v in parse_qsl(sp.query, keep_blank_values=True):
kl = k.lower()
if kl.startswith("utm_"):
continue
if kl in ("gclid", "fbclid", "mc_cid", "mc_eid", "ref", "ref_src", "yclid", "igshid"):
continue
qs_pairs.append((k, v))
new_query = urlencode(qs_pairs, doseq=True)
path = sp.path or "/"
while "//" in path:
path = path.replace("//", "/")
if path != "/":
path = path.rstrip("/")
return urlunsplit((scheme, netloc, path, new_query, ""))
def _parse_entry_date(entry) -> datetime | None:
dt = None
try:
if getattr(entry, "published_parsed", None):
import time as _time
dt = datetime.fromtimestamp(_time.mktime(entry.published_parsed))
elif getattr(entry, "updated_parsed", None):
import time as _time
dt = datetime.fromtimestamp(_time.mktime(entry.updated_parsed))
except Exception:
dt = None
return dt
def _process_feed(feed_row):
import feedparser
feed_id = feed_row["id"]
feed_url = feed_row["url"]
feed_nombre = feed_row["nombre"]
categoria_id = feed_row["categoria_id"]
pais_id = feed_row["pais_id"]
app.logger.info(f"[ingesta] Procesando feed {feed_id} '{feed_nombre}' ({feed_url})")
try:
old_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(RSS_FEED_TIMEOUT)
try:
parsed = feedparser.parse(feed_url)
finally:
socket.setdefaulttimeout(old_timeout)
if parsed.bozo and parsed.bozo_exception:
app.logger.warning(f"[ingesta] Feed {feed_id} bozo={parsed.bozo}: {parsed.bozo_exception}")
entries = parsed.entries or []
nuevos = 0
with get_conn() as conn:
conn.autocommit = True
with conn.cursor() as cur:
for entry in entries:
link = getattr(entry, "link", None) or getattr(entry, "id", None)
if not link:
continue
url_norm = normalize_url_py(link)
if not url_norm:
continue
titulo = getattr(entry, "title", None) or url_norm
resumen = getattr(entry, "summary", None) or getattr(entry, "description", None)
if resumen:
resumen = resumen[:4000]
fecha = _parse_entry_date(entry) or datetime.utcnow()
imagen_url = None
try:
if hasattr(entry, "media_content") and entry.media_content:
imagen_url = entry.media_content[0].get("url")
except Exception:
imagen_url = None
if not imagen_url:
try:
if hasattr(entry, "links"):
for l in entry.links:
if l.get("rel") == "enclosure" and l.get("type", "").startswith("image/"):
imagen_url = l.get("href")
break
except Exception:
imagen_url = None
try:
cur.execute(
"""
INSERT INTO noticias
(id, titulo, resumen, url, fecha, imagen_url,
fuente_nombre, categoria_id, pais_id)
VALUES (md5(%s), %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (url) DO NOTHING;
""",
(
url_norm,
titulo,
resumen,
url_norm,
fecha,
imagen_url,
feed_nombre,
categoria_id,
pais_id,
),
)
if cur.rowcount > 0:
nuevos += 1
except psycopg2.Error as e:
app.logger.warning(f"[ingesta] Error insertando noticia de {feed_url}: {e}")
with get_conn() as conn, conn.cursor() as cur:
cur.execute(
"UPDATE feeds SET fallos = 0 WHERE id = %s;",
(feed_id,),
)
app.logger.info(f"[ingesta] Feed {feed_id} OK. Nuevas noticias: {nuevos}")
except Exception as e:
app.logger.exception(f"[ingesta] Error procesando feed {feed_id} ({feed_url}): {e}")
try:
with get_conn() as conn, conn.cursor() as cur:
cur.execute(
"""
UPDATE feeds
SET fallos = COALESCE(fallos, 0) + 1,
activo = CASE
WHEN COALESCE(fallos, 0) + 1 >= %s THEN FALSE
ELSE activo
END
WHERE id = %s;
""",
(RSS_MAX_FAILURES, feed_id),
)
except Exception as e2:
app.logger.warning(f"[ingesta] No se pudo actualizar fallos de feed {feed_id}: {e2}")
def fetch_and_store_all():
app.logger.info("[ingesta] fetch_and_store_all() iniciado")
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT id, nombre, url, categoria_id, pais_id, fallos, activo
FROM feeds
WHERE activo = TRUE
AND (fallos IS NULL OR fallos < %s)
ORDER BY id;
""",
(RSS_MAX_FAILURES,),
)
feeds = cur.fetchall()
if not feeds:
app.logger.info("[ingesta] No hay feeds activos para procesar.")
return
app.logger.info(f"[ingesta] Procesando {len(feeds)} feeds (max workers = {RSS_MAX_WORKERS})")
max_workers = max(1, RSS_MAX_WORKERS)
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {executor.submit(_process_feed, f): f for f in feeds}
for future in as_completed(futures):
feed_row = futures[future]
try:
future.result()
except Exception as e:
app.logger.exception(f"[ingesta] Excepción no controlada en feed {feed_row['id']}: {e}")
app.logger.info("[ingesta] fetch_and_store_all() terminado")
@app.route("/")
@app.route("/home")
def home():
page = max(int(request.args.get("page", 1) or 1), 1)
per_page = int(request.args.get("per_page", NEWS_PER_PAGE_DEFAULT) or NEWS_PER_PAGE_DEFAULT)
per_page = min(max(per_page, 10), 100)
q = (request.args.get("q") or "").strip()
categoria_id = request.args.get("categoria_id") or None
continente_id = request.args.get("continente_id") or None
pais_id = request.args.get("pais_id") or None
fecha_str = request.args.get("fecha") or ""
lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]
orig_flag = request.args.get("orig")
use_tr = not bool(orig_flag)
fecha_filtro = None
if fecha_str:
try:
fecha_filtro = datetime.strptime(fecha_str, "%Y-%m-%d").date()
except ValueError:
fecha_filtro = None
offset = (page - 1) * per_page
with get_conn() as conn:
conn.autocommit = True
categorias = get_categorias(conn)
continentes = get_continentes(conn)
paises = get_paises(conn)
params = []
where = ["1=1"]
if fecha_filtro:
where.append("n.fecha::date = %s")
params.append(fecha_filtro)
if categoria_id:
where.append("n.categoria_id = %s")
params.append(int(categoria_id))
if pais_id:
where.append("n.pais_id = %s")
params.append(int(pais_id))
elif continente_id:
where.append("p.continente_id = %s")
params.append(int(continente_id))
if q:
search_like = f"%{q}%"
if use_tr:
where.append(
"""
(
n.tsv @@ websearch_to_tsquery('spanish', %s)
OR t.titulo_trad ILIKE %s
OR t.resumen_trad ILIKE %s
OR n.titulo ILIKE %s
OR n.resumen ILIKE %s
)
"""
)
params.extend([q, search_like, search_like, search_like, search_like])
else:
where.append(
"""
(
n.tsv @@ websearch_to_tsquery('spanish', %s)
OR n.titulo ILIKE %s
OR n.resumen ILIKE %s
)
"""
)
params.extend([q, search_like, search_like])
where_sql = " AND ".join(where)
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
f"""
SELECT COUNT(DISTINCT n.id)
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t
ON t.noticia_id = n.id
AND t.lang_to = %s
AND t.status = 'done'
WHERE {where_sql}
""",
[lang] + params,
)
total_results = cur.fetchone()[0] if cur.rowcount else 0
total_pages = (total_results // per_page) + (1 if total_results % per_page else 0)
cur.execute(
f"""
SELECT
n.id,
n.titulo,
n.resumen,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais,
t.id AS traduccion_id,
t.titulo_trad AS titulo_traducido,
t.resumen_trad AS resumen_traducido,
CASE WHEN t.id IS NOT NULL THEN TRUE ELSE FALSE END AS tiene_traduccion,
n.titulo AS titulo_original,
n.resumen AS resumen_original
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t
ON t.noticia_id = n.id
AND t.lang_to = %s
AND t.status = 'done'
WHERE {where_sql}
ORDER BY n.fecha DESC NULLS LAST, n.id DESC
LIMIT %s OFFSET %s
""",
[lang] + params + [per_page, offset],
)
noticias = cur.fetchall()
tags_por_tr = {}
tr_ids = [n["traduccion_id"] for n in noticias if n["traduccion_id"]]
if tr_ids:
cur.execute(
"""
SELECT tn.traduccion_id, tg.valor, tg.tipo
FROM tags_noticia tn
JOIN tags tg ON tg.id = tn.tag_id
WHERE tn.traduccion_id = ANY(%s);
""",
(tr_ids,),
)
for tr_id, valor, tipo in cur.fetchall():
tags_por_tr.setdefault(tr_id, []).append((valor, tipo))
context = dict(
noticias=noticias,
total_results=total_results,
total_pages=total_pages,
page=page,
per_page=per_page,
categorias=categorias,
continentes=continentes,
paises=paises,
q=q,
cat_id=int(categoria_id) if categoria_id else None,
cont_id=int(continente_id) if continente_id else None,
pais_id=int(pais_id) if pais_id else None,
fecha_filtro=fecha_str,
use_tr=use_tr,
lang=lang,
tags_por_tr=tags_por_tr,
)
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return render_template("_noticias_list.html", **context)
return render_template("noticias.html", **context)
@app.route("/noticia")
def noticia():
tr_id = request.args.get("tr_id")
noticia_id = request.args.get("id")
if not tr_id and not noticia_id:
flash("No se ha indicado ninguna noticia.", "warning")
return redirect(url_for("home"))
with get_conn() as conn:
conn.autocommit = True
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
dato = None
if tr_id:
cur.execute(
"""
SELECT
t.id AS traduccion_id,
t.lang_from,
t.lang_to,
t.titulo_trad,
t.resumen_trad,
n.id As noticia_id,
n.titulo AS titulo_orig,
n.resumen AS resumen_orig,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais
FROM traducciones t
JOIN noticias n ON n.id = t.noticia_id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE t.id = %s
""",
(int(tr_id),),
)
dato = cur.fetchone()
else:
cur.execute(
"""
SELECT
NULL AS traduccion_id,
NULL AS lang_from,
NULL AS lang_to,
NULL AS titulo_trad,
NULL AS resumen_trad,
n.id AS noticia_id,
n.titulo AS titulo_orig,
n.resumen AS resumen_orig,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE n.id = %s
""",
(noticia_id,),
)
dato = cur.fetchone()
tags = []
relacionadas = []
if dato and dato["traduccion_id"]:
cur.execute(
"""
SELECT tg.valor, tg.tipo
FROM tags_noticia tn
JOIN tags tg ON tg.id = tn.tag_id
WHERE tn.traduccion_id = %s
ORDER BY tg.tipo, tg.valor;
""",
(dato["traduccion_id"],),
)
tags = cur.fetchall()
cur.execute(
"""
SELECT
n2.url,
n2.titulo,
n2.fecha,
n2.imagen_url,
n2.fuente_nombre,
rn.score
FROM related_noticias rn
JOIN traducciones t2 ON t2.id = rn.related_traduccion_id
JOIN noticias n2 ON n2.id = t2.noticia_id
WHERE rn.traduccion_id = %s
ORDER BY rn.score DESC
LIMIT 8;
""",
(dato["traduccion_id"],),
)
relacionadas = cur.fetchall()
return render_template(
"noticia.html",
dato=dato,
tags=tags,
relacionadas=relacionadas,
)
@app.route("/dashboard")
def dashboard():
with get_conn() as conn:
conn.autocommit = True
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT COUNT(*) FROM feeds;")
feeds_totales = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM noticias;")
noticias_totales = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM feeds WHERE activo = FALSE;")
feeds_caidos = cur.fetchone()[0]
stats = {
"feeds_totales": feeds_totales,
"noticias_totales": noticias_totales,
"feeds_caidos": feeds_caidos,
}
top_tags = []
try:
cur.execute(
"SELECT id, valor, tipo, apariciones FROM v_tag_counts_24h ORDER BY apariciones DESC LIMIT 100;"
)
top_tags = cur.fetchall()
except psycopg2.Error:
top_tags = []
return render_template("dashboard.html", stats=stats, top_tags=top_tags)
@app.route("/feeds")
def manage_feeds():
page = max(int(request.args.get("page", 1) or 1), 1)
per_page = 50
offset = (page - 1) * per_page
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT COUNT(*) FROM feeds;")
total_feeds = cur.fetchone()[0] if cur.rowcount else 0
total_pages = (total_feeds // per_page) + (1 if total_feeds % per_page else 0)
cur.execute(
"""
SELECT
f.id,
f.nombre,
f.descripcion,
f.url,
f.activo,
f.fallos,
c.nombre AS categoria,
p.nombre AS pais
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
ORDER BY f.nombre
LIMIT %s OFFSET %s;
""",
(per_page, offset),
)
feeds = cur.fetchall()
cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;")
categorias = cur.fetchall()
cur.execute("SELECT id, nombre FROM paises ORDER BY nombre;")
paises = cur.fetchall()
return render_template(
"feeds_list.html",
feeds=feeds,
total_feeds=total_feeds,
total_pages=total_pages,
page=page,
categorias=categorias,
paises=paises,
)
@app.route("/feeds/add", methods=["GET", "POST"])
def add_feed():
with get_conn() as conn:
conn.autocommit = True
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion") or None
url = request.form.get("url")
categoria_id = request.form.get("categoria_id") or None
pais_id = request.form.get("pais_id") or None
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s, %s);
""",
(
nombre,
descripcion,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
),
)
flash(f"Feed '{nombre}' añadido correctamente.", "success")
return redirect(url_for("manage_feeds"))
except psycopg2.Error as e:
flash(f"Error al añadir feed: {e}", "error")
return render_template("add_feed.html", categorias=categorias, paises=paises)
@app.route("/feeds/<int:feed_id>/edit", methods=["GET", "POST"])
def edit_feed(feed_id):
with get_conn() as conn:
conn.autocommit = True
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT * FROM feeds WHERE id = %s;", (feed_id,))
feed = cur.fetchone()
if not feed:
flash("Feed no encontrado.", "error")
return redirect(url_for("manage_feeds"))
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion") or None
url = request.form.get("url")
categoria_id = request.form.get("categoria_id") or None
pais_id = request.form.get("pais_id") or None
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
activo = bool(request.form.get("activo"))
try:
cur.execute(
"""
UPDATE feeds
SET nombre = %s,
descripcion = %s,
url = %s,
categoria_id = %s,
pais_id = %s,
idioma = %s,
activo = %s
WHERE id = %s;
""",
(
nombre,
descripcion,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
activo,
feed_id,
),
)
flash("Feed actualizado correctamente.", "success")
return redirect(url_for("manage_feeds"))
except psycopg2.Error as e:
flash(f"Error al actualizar feed: {e}", "error")
return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)
@app.route("/feeds/<int:feed_id>/delete")
def delete_feed(feed_id):
with get_conn() as conn, conn.cursor() as cur:
try:
cur.execute("DELETE FROM feeds WHERE id = %s;", (feed_id,))
flash("Feed eliminado.", "success")
except psycopg2.Error as e:
flash(f"No se pudo eliminar el feed: {e}", "error")
return redirect(url_for("manage_feeds"))
@app.route("/feeds/<int:feed_id>/reactivar")
def reactivar_feed(feed_id):
with get_conn() as conn, conn.cursor() as cur:
try:
cur.execute(
"UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = %s;",
(feed_id,),
)
flash("Feed reactivado.", "success")
except psycopg2.Error as e:
flash(f"No se pudo reactivar el feed: {e}", "error")
return redirect(url_for("manage_feeds"))
@app.route("/add", methods=["POST"])
def legacy_add_feed():
return add_feed()
@app.route("/backup_feeds")
def backup_feeds():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT
f.id,
f.nombre,
f.descripcion,
f.url,
f.categoria_id,
c.nombre AS categoria,
f.pais_id,
p.nombre AS pais,
f.idioma,
f.activo,
f.fallos
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
ORDER BY f.id;
"""
)
rows = cur.fetchall()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(["id", "nombre", "descripcion", "url", "categoria_id", "categoria",
"pais_id", "pais", "idioma", "activo", "fallos"])
for r in rows:
writer.writerow([
r["id"],
r["nombre"],
r["descripcion"] or "",
r["url"],
r["categoria_id"] or "",
r["categoria"] or "",
r["pais_id"] or "",
r["pais"] or "",
r["idioma"] or "",
r["activo"],
r["fallos"],
])
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="feeds_backup.csv",
)
@app.route("/restore_feeds", methods=["GET", "POST"])
def restore_feeds():
if request.method == "GET":
return render_template("restore_feeds.html")
file = request.files.get("file")
if not file:
flash("No se ha subido ningún archivo.", "error")
return redirect(url_for("restore_feeds"))
try:
content = file.stream.read().decode("utf-8", errors="ignore")
reader = csv.DictReader(io.StringIO(content))
except Exception as e:
flash(f"Error leyendo el CSV: {e}", "error")
return redirect(url_for("restore_feeds"))
def parse_int_field(row, key):
val = row.get(key)
if val is None or str(val).strip() == "":
return None
try:
return int(val)
except (ValueError, TypeError):
app.logger.warning(
f"[restore_feeds] Valor no numérico '{val}' en columna {key}, se usará NULL."
)
return None
conn = get_conn()
try:
with conn.cursor() as cur:
for row in reader:
try:
categoria_id = parse_int_field(row, "categoria_id")
pais_id = parse_int_field(row, "pais_id")
raw_fallos = (row.get("fallos") or "").strip()
if raw_fallos == "":
fallos = 0
else:
try:
fallos = int(raw_fallos)
except (ValueError, TypeError):
app.logger.warning(
f"[restore_feeds] Valor no numérico '{raw_fallos}' en columna fallos, se usará 0."
)
fallos = 0
cur.execute(
"""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (url) DO UPDATE
SET nombre = EXCLUDED.nombre,
descripcion = EXCLUDED.descripcion,
categoria_id = EXCLUDED.categoria_id,
pais_id = EXCLUDED.pais_id,
idioma = EXCLUDED.idioma,
activo = EXCLUDED.activo,
fallos = EXCLUDED.fallos;
""",
(
row["nombre"],
row.get("descripcion") or None,
row["url"],
categoria_id,
pais_id,
(row.get("idioma") or "").strip().lower()[:2] or None,
row.get("activo") in ("1", "True", "true", "t", "on"),
fallos,
),
)
conn.commit()
except psycopg2.Error as e:
print("Error restaurando feed:", e)
conn.rollback()
finally:
conn.close()
flash("Restauración de feeds completada (con posibles errores en algunos registros).", "success")
return redirect(url_for("dashboard"))
@app.route("/urls")
def manage_urls():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT
fu.id,
fu.nombre,
fu.url,
c.nombre AS categoria,
p.nombre AS pais,
fu.idioma
FROM fuentes_url fu
LEFT JOIN categorias c ON c.id = fu.categoria_id
LEFT JOIN paises p ON p.id = fu.pais_id
ORDER BY fu.nombre;
"""
)
fuentes = cur.fetchall()
return render_template("urls_list.html", fuentes=fuentes)
@app.route("/urls/add_source", methods=["GET", "POST"])
def add_url_source():
with get_conn() as conn:
conn.autocommit = True
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
url = request.form.get("url")
categoria_id = request.form.get("categoria_id") or None
pais_id = request.form.get("pais_id") or None
idioma = (request.form.get("idioma", "es") or "es").strip().lower()[:2]
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (url) DO UPDATE
SET nombre = EXCLUDED.nombre,
categoria_id = EXCLUDED.categoria_id,
pais_id = EXCLUDED.pais_id,
idioma = EXCLUDED.idioma;
""",
(
nombre,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
),
)
flash(f"Fuente URL '{nombre}' añadida/actualizada correctamente.", "success")
return redirect(url_for("manage_urls"))
except psycopg2.Error as e:
flash(f"Error al guardar fuente URL: {e}", "error")
return render_template("add_url_source.html", categorias=categorias, paises=paises)
@app.route("/urls/<int:url_id>/edit", methods=["GET", "POST"])
def edit_url_source(url_id):
with get_conn() as conn:
conn.autocommit = True
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT * FROM fuentes_url WHERE id = %s;", (url_id,))
fuente = cur.fetchone()
if not fuente:
flash("Fuente URL no encontrada.", "error")
return redirect(url_for("manage_urls"))
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
url = request.form.get("url")
categoria_id = request.form.get("categoria_id") or None
pais_id = request.form.get("pais_id") or None
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or "es"
try:
cur.execute(
"""
UPDATE fuentes_url
SET nombre = %s,
url = %s,
categoria_id = %s,
pais_id = %s,
idioma = %s
WHERE id = %s;
""",
(
nombre,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
url_id,
),
)
flash("Fuente URL actualizada.", "success")
return redirect(url_for("manage_urls"))
except psycopg2.Error as e:
flash(f"Error al actualizar fuente: {e}", "error")
return render_template("edit_url_source.html", fuente=fuente, categorias=categorias, paises=paises)
@app.route("/urls/<int:url_id>/delete")
def delete_url_source(url_id):
with get_conn() as conn, conn.cursor() as cur:
try:
cur.execute("DELETE FROM fuentes_url WHERE id = %s;", (url_id,))
flash("Fuente URL eliminada.", "success")
except psycopg2.Error as e:
flash(f"No se pudo eliminar la fuente URL: {e}", "error")
return redirect(url_for("manage_urls"))
@app.route("/backup_urls")
def backup_urls():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT id, nombre, url, categoria_id, pais_id, idioma
FROM fuentes_url
ORDER BY id;
"""
)
rows = cur.fetchall()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(["id", "nombre", "url", "categoria_id", "pais_id", "idioma"])
for r in rows:
writer.writerow([
r["id"],
r["nombre"],
r["url"],
r["categoria_id"] or "",
r["pais_id"] or "",
r["idioma"] or "",
])
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="fuentes_url_backup.csv",
)
@app.route("/restore_urls", methods=["GET", "POST"])
def restore_urls():
if request.method == "GET":
return render_template("restore_urls.html")
file = request.files.get("file")
if not file:
flash("No se ha subido ningún archivo.", "error")
return redirect(url_for("restore_urls"))
try:
content = file.stream.read().decode("utf-8", errors="ignore")
reader = csv.DictReader(io.StringIO(content))
except Exception as e:
flash(f"Error leyendo el CSV: {e}", "error")
return redirect(url_for("restore_urls"))
conn = get_conn()
try:
with conn.cursor() as cur:
for row in reader:
try:
cur.execute(
"""
INSERT INTO fuentes_url (id, nombre, url, categoria_id, pais_id, idioma)
VALUES (%s,%s,%s,%s,%s,%s)
ON CONFLICT (id) DO UPDATE
SET nombre = EXCLUDED.nombre,
url = EXCLUDED.url,
categoria_id = EXCLUDED.categoria_id,
pais_id = EXCLUDED.pais_id,
idioma = EXCLUDED.idioma;
""",
(
int(row["id"]),
row["nombre"],
row["url"],
int(row["categoria_id"]) if row.get("categoria_id") else None,
int(row["pais_id"]) if row.get("pais_id") else None,
(row.get("idioma") or "es").strip().lower()[:2],
),
)
conn.commit()
except psycopg2.Error as e:
print("Error restaurando fuente_url:", e)
conn.rollback()
finally:
conn.close()
flash("Importación de fuentes URL completada (con posibles errores en algunas filas).", "success")
return redirect(url_for("dashboard"))
@app.route("/urls/add", methods=["GET", "POST"])
def add_url():
with get_conn() as conn:
conn.autocommit = True
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
url = request.form.get("url")
categoria_id = request.form.get("categoria_id") or None
pais_id = request.form.get("pais_id") or None
if not url:
flash("Debes indicar una URL.", "error")
return redirect(url_for("add_url"))
try:
from newspaper import Article
except ImportError:
flash("La librería newspaper3k no está instalada en este entorno.", "error")
return redirect(url_for("add_url"))
try:
art = Article(url)
art.download()
art.parse()
titulo = art.title or url
resumen = (art.summary or "")[:2000] if hasattr(art, "summary") else None
imagen_url = art.top_image or None
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url,
fuente_nombre, categoria_id, pais_id)
VALUES (md5(%s), %s, %s, %s, NOW(), %s, %s, %s, %s)
ON CONFLICT (url) DO NOTHING;
""",
(
url,
titulo,
resumen,
url,
imagen_url,
None,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
),
)
flash("Noticia añadida desde URL.", "success")
return redirect(url_for("home"))
except Exception as e:
flash(f"Error al scrapear la URL: {e}", "error")
return redirect(url_for("add_url"))
return render_template("add_url.html", categorias=categorias, paises=paises)
@app.route("/urls/scrape", methods=["GET", "POST"])
def scrape_url():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre;")
fuentes = cur.fetchall()
if request.method == "POST":
source_id = request.form.get("source_id")
if not source_id:
flash("Debes seleccionar una fuente.", "error")
return redirect(url_for("scrape_url"))
flash("Scrapeo desde fuente aún no implementado (stub).", "warning")
return redirect(url_for("dashboard"))
return render_template("scrape_url.html", fuentes=fuentes)
@app.route("/backup_completo")
def backup_completo():
mem_file = io.BytesIO()
with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT * FROM feeds ORDER BY id;")
rows = cur.fetchall()
buf = io.StringIO()
writer = csv.writer(buf)
if rows:
writer.writerow(rows[0].keys())
for r in rows:
writer.writerow(list(r.values()))
zf.writestr("feeds.csv", buf.getvalue())
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT * FROM fuentes_url ORDER BY id;")
rows = cur.fetchall()
buf2 = io.StringIO()
writer2 = csv.writer(buf2)
if rows:
writer2.writerow(rows[0].keys())
for r in rows:
writer2.writerow(list(r.values()))
zf.writestr("fuentes_url.csv", buf2.getvalue())
mem_file.seek(0)
return send_file(
mem_file,
mimetype="application/zip",
as_attachment=True,
download_name="backup_completo_rss.zip",
)
@app.route("/restore_completo", methods=["GET", "POST"])
def restore_completo():
if request.method == "GET":
return render_template("restore_completo.html")
file = request.files.get("backup_file")
if not file or file.filename == "":
flash("No se ha seleccionado ningún archivo.", "error")
return redirect(url_for("restore_completo"))
filename = file.filename.lower()
if not filename.endswith(".zip"):
flash("El archivo debe ser un .zip.", "error")
return redirect(url_for("restore_completo"))
raw = file.read()
try:
zf = zipfile.ZipFile(io.BytesIO(raw))
except zipfile.BadZipFile:
flash("El archivo no es un .zip válido.", "error")
return redirect(url_for("restore_completo"))
restored_counts = {}
conn = get_conn()
try:
with conn:
with conn.cursor() as cur:
if "feeds.csv" in zf.namelist():
cur.execute("TRUNCATE TABLE feeds RESTART IDENTITY;")
with zf.open("feeds.csv") as f:
text_f = io.TextIOWrapper(f, encoding="utf-8")
cur.copy_expert("COPY feeds FROM STDIN CSV HEADER", text_f)
restored_counts["feeds"] = cur.rowcount if cur.rowcount is not None else 0
if "fuentes_url.csv" in zf.namelist():
cur.execute("TRUNCATE TABLE fuentes_url RESTART IDENTITY;")
with zf.open("fuentes_url.csv") as f2:
text_f2 = io.TextIOWrapper(f2, encoding="utf-8")
cur.copy_expert("COPY fuentes_url FROM STDIN CSV HEADER", text_f2)
restored_counts["fuentes_url"] = cur.rowcount if cur.rowcount is not None else 0
except Exception as e:
conn.rollback()
conn.close()
flash(f"Error al restaurar el backup: {e}", "error")
return redirect(url_for("restore_completo"))
conn.close()
if restored_counts:
partes = [f"{tabla}: {n} filas" for tabla, n in restored_counts.items()]
flash("Restauración completada: " + ", ".join(partes), "success")
else:
flash("Backup procesado pero no se encontraron ficheros reconocidos (feeds.csv, fuentes_url.csv).", "warning")
return redirect(url_for("dashboard"))
@app.route("/eventos_pais")
def eventos_pais():
pais_id = request.args.get("pais_id") or None
page = max(int(request.args.get("page", 1) or 1), 1)
per_page = 30
offset = (page - 1) * per_page
lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]
with get_conn() as conn:
conn.autocommit = True
paises = get_paises(conn)
eventos = []
total_eventos = 0
noticias_por_evento = {}
pais_nombre = None
if pais_id:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# 1) Eventos que tienen al menos una traducción cuya noticia es de ese país
cur.execute(
"""
SELECT
e.id,
e.titulo,
e.fecha_inicio,
e.fecha_fin,
e.n_noticias,
MAX(p.nombre) AS pais_nombre
FROM eventos e
JOIN traducciones t ON t.evento_id = e.id
JOIN noticias n ON n.id = t.noticia_id
JOIN paises p ON p.id = n.pais_id
WHERE n.pais_id = %s
GROUP BY e.id, e.titulo, e.fecha_inicio, e.fecha_fin, e.n_noticias
ORDER BY e.fecha_inicio DESC NULLS LAST, e.id DESC
LIMIT %s OFFSET %s;
""",
(int(pais_id), per_page, offset),
)
eventos = cur.fetchall()
# 2) Total de eventos distintos para ese país
cur.execute(
"""
SELECT COUNT(DISTINCT e.id)
FROM eventos e
JOIN traducciones t ON t.evento_id = e.id
JOIN noticias n ON n.id = t.noticia_id
WHERE n.pais_id = %s;
""",
(int(pais_id),),
)
total_eventos = cur.fetchone()[0] if cur.rowcount else 0
# 3) Cargar noticias asociadas a esos eventos (desde traducciones + noticias)
if eventos:
evento_ids = [e["id"] for e in eventos]
cur.execute(
"""
SELECT
t.evento_id,
n.id AS noticia_id,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
n.titulo AS titulo_orig,
n.resumen AS resumen_orig,
t.id AS traduccion_id,
t.titulo_trad AS titulo_trad,
t.resumen_trad AS resumen_trad,
p.nombre AS pais_nombre
FROM traducciones t
JOIN noticias n ON n.id = t.noticia_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE t.evento_id = ANY(%s)
AND t.status = 'done'
AND t.lang_to = %s
ORDER BY t.evento_id, n.fecha DESC;
""",
(evento_ids, lang),
)
rows = cur.fetchall()
noticias_por_evento = {e["id"]: [] for e in eventos}
for r in rows:
noticias_por_evento.setdefault(r["evento_id"], []).append(r)
# Nombre del país (todos los eventos en esta vista son del mismo país filtrado)
pais_nombre = eventos[0]["pais_nombre"]
else:
# Si no hay eventos, al menos sacamos el nombre del país desde la lista
for p in paises:
if p["id"] == int(pais_id):
pais_nombre = p["nombre"]
break
total_pages = (total_eventos // per_page) + (1 if total_eventos % per_page else 0)
return render_template(
"eventos_pais.html",
paises=paises,
eventos=eventos,
noticias_por_evento=noticias_por_evento,
pais_id=int(pais_id) if pais_id else None,
pais_nombre=pais_nombre,
total_eventos=total_eventos,
total_pages=total_pages,
page=page,
lang=lang,
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8001, debug=True)