import os import csv import io import time import socket from datetime import datetime, date from concurrent.futures import ThreadPoolExecutor, as_completed from dotenv import load_dotenv from flask import ( Flask, render_template, request, redirect, url_for, flash, send_file ) from markupsafe import Markup import psycopg2 import psycopg2.extras as extras load_dotenv() DB_CONFIG = { "dbname": os.getenv("DB_NAME", "rss"), "user": os.getenv("DB_USER", "rss"), "password": os.getenv("DB_PASS", ""), "host": os.getenv("DB_HOST", "localhost"), "port": int(os.getenv("DB_PORT", "5432")), } DEFAULT_LANG = os.getenv("DEFAULT_LANG", "es") DEFAULT_TRANSLATION_LANG = os.getenv("DEFAULT_TRANSLATION_LANG", "es") WEB_TRANSLATED_DEFAULT = os.getenv("WEB_TRANSLATED_DEFAULT", "1") == "1" NEWS_PER_PAGE_DEFAULT = int(os.getenv("NEWS_PER_PAGE", "20")) SECRET_KEY = os.getenv("SECRET_KEY", "cambia_esta_clave_insegura") RSS_MAX_WORKERS = int(os.getenv("RSS_MAX_WORKERS", "10")) RSS_FEED_TIMEOUT = int(os.getenv("RSS_FEED_TIMEOUT", "30")) RSS_MAX_FAILURES = int(os.getenv("RSS_MAX_FAILURES", "5")) app = Flask(__name__) app.config["SECRET_KEY"] = SECRET_KEY def get_conn(): return psycopg2.connect(**DB_CONFIG) def safe_html(texto): if not texto: return "" return Markup(texto) app.jinja_env.filters["safe_html"] = safe_html def get_categorias(conn): with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;") return cur.fetchall() def get_continentes(conn): with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT id, nombre FROM continentes ORDER BY nombre;") return cur.fetchall() def get_paises(conn): with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( """ SELECT p.id, p.nombre, p.continente_id FROM paises p ORDER BY p.nombre; """ ) return cur.fetchall() def normalize_url_py(u: str | None) -> str | None: if not u: return None u = u.strip() if not u: return None if "://" not in u: u = "http://" + u u = u.split("#", 1)[0] try: from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode except ImportError: return u sp = urlsplit(u) scheme = sp.scheme.lower() netloc = sp.netloc.lower() if netloc.startswith("www."): netloc = netloc[4:] if scheme == "http" and netloc.endswith(":80"): netloc = netloc[:-3] if scheme == "https" and netloc.endswith(":443"): netloc = netloc[:-4] qs_pairs = [] for k, v in parse_qsl(sp.query, keep_blank_values=True): kl = k.lower() if kl.startswith("utm_"): continue if kl in ("gclid", "fbclid", "mc_cid", "mc_eid", "ref", "ref_src", "yclid", "igshid"): continue qs_pairs.append((k, v)) new_query = urlencode(qs_pairs, doseq=True) path = sp.path or "/" while "//" in path: path = path.replace("//", "/") if path != "/": path = path.rstrip("/") return urlunsplit((scheme, netloc, path, new_query, "")) def _parse_entry_date(entry) -> datetime | None: dt = None try: if getattr(entry, "published_parsed", None): import time as _time dt = datetime.fromtimestamp(_time.mktime(entry.published_parsed)) elif getattr(entry, "updated_parsed", None): import time as _time dt = datetime.fromtimestamp(_time.mktime(entry.updated_parsed)) except Exception: dt = None return dt def _process_feed(feed_row): import feedparser feed_id = feed_row["id"] feed_url = feed_row["url"] feed_nombre = feed_row["nombre"] categoria_id = feed_row["categoria_id"] pais_id = feed_row["pais_id"] app.logger.info(f"[ingesta] Procesando feed {feed_id} '{feed_nombre}' ({feed_url})") try: old_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(RSS_FEED_TIMEOUT) try: parsed = feedparser.parse(feed_url) finally: socket.setdefaulttimeout(old_timeout) if parsed.bozo and parsed.bozo_exception: app.logger.warning(f"[ingesta] Feed {feed_id} bozo={parsed.bozo}: {parsed.bozo_exception}") entries = parsed.entries or [] nuevos = 0 with get_conn() as conn: conn.autocommit = True with conn.cursor() as cur: for entry in entries: link = getattr(entry, "link", None) or getattr(entry, "id", None) if not link: continue url_norm = normalize_url_py(link) if not url_norm: continue titulo = getattr(entry, "title", None) or url_norm resumen = getattr(entry, "summary", None) or getattr(entry, "description", None) if resumen: resumen = resumen[:4000] fecha = _parse_entry_date(entry) or datetime.utcnow() imagen_url = None try: if hasattr(entry, "media_content") and entry.media_content: imagen_url = entry.media_content[0].get("url") except Exception: imagen_url = None if not imagen_url: try: if hasattr(entry, "links"): for l in entry.links: if l.get("rel") == "enclosure" and l.get("type", "").startswith("image/"): imagen_url = l.get("href") break except Exception: imagen_url = None try: cur.execute( """ INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id) VALUES (md5(%s), %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (url) DO NOTHING; """, ( url_norm, titulo, resumen, url_norm, fecha, imagen_url, feed_nombre, categoria_id, pais_id, ), ) if cur.rowcount > 0: nuevos += 1 except psycopg2.Error as e: app.logger.warning(f"[ingesta] Error insertando noticia de {feed_url}: {e}") with get_conn() as conn, conn.cursor() as cur: cur.execute( "UPDATE feeds SET fallos = 0 WHERE id = %s;", (feed_id,), ) app.logger.info(f"[ingesta] Feed {feed_id} OK. Nuevas noticias: {nuevos}") except Exception as e: app.logger.exception(f"[ingesta] Error procesando feed {feed_id} ({feed_url}): {e}") try: with get_conn() as conn, conn.cursor() as cur: cur.execute( """ UPDATE feeds SET fallos = COALESCE(fallos, 0) + 1, activo = CASE WHEN COALESCE(fallos, 0) + 1 >= %s THEN FALSE ELSE activo END WHERE id = %s; """, (RSS_MAX_FAILURES, feed_id), ) except Exception as e2: app.logger.warning(f"[ingesta] No se pudo actualizar fallos de feed {feed_id}: {e2}") def fetch_and_store_all(): app.logger.info("[ingesta] fetch_and_store_all() iniciado") with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( """ SELECT id, nombre, url, categoria_id, pais_id, fallos, activo FROM feeds WHERE activo = TRUE AND (fallos IS NULL OR fallos < %s) ORDER BY id; """, (RSS_MAX_FAILURES,), ) feeds = cur.fetchall() if not feeds: app.logger.info("[ingesta] No hay feeds activos para procesar.") return app.logger.info(f"[ingesta] Procesando {len(feeds)} feeds (max workers = {RSS_MAX_WORKERS})") max_workers = max(1, RSS_MAX_WORKERS) with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = {executor.submit(_process_feed, f): f for f in feeds} for future in as_completed(futures): feed_row = futures[future] try: future.result() except Exception as e: app.logger.exception(f"[ingesta] Excepción no controlada en feed {feed_row['id']}: {e}") app.logger.info("[ingesta] fetch_and_store_all() terminado") @app.route("/") @app.route("/home") def home(): page = max(int(request.args.get("page", 1) or 1), 1) per_page = int(request.args.get("per_page", NEWS_PER_PAGE_DEFAULT) or NEWS_PER_PAGE_DEFAULT) per_page = min(max(per_page, 10), 100) q = (request.args.get("q") or "").strip() categoria_id = request.args.get("categoria_id") or None continente_id = request.args.get("continente_id") or None pais_id = request.args.get("pais_id") or None fecha_str = request.args.get("fecha") or "" lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5] orig_flag = request.args.get("orig") use_tr = not bool(orig_flag) fecha_filtro = None if fecha_str: try: fecha_filtro = datetime.strptime(fecha_str, "%Y-%m-%d").date() except ValueError: fecha_filtro = None offset = (page - 1) * per_page with get_conn() as conn: conn.autocommit = True categorias = get_categorias(conn) continentes = get_continentes(conn) paises = get_paises(conn) params = [] where = ["1=1"] if fecha_filtro: where.append("n.fecha::date = %s") params.append(fecha_filtro) if categoria_id: where.append("n.categoria_id = %s") params.append(int(categoria_id)) if pais_id: where.append("n.pais_id = %s") params.append(int(pais_id)) elif continente_id: where.append("p.continente_id = %s") params.append(int(continente_id)) if q: where.append("n.tsv @@ plainto_tsquery('spanish', %s)") params.append(q) where_sql = " AND ".join(where) with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( f""" SELECT COUNT(*) FROM noticias n LEFT JOIN categorias c ON c.id = n.categoria_id LEFT JOIN paises p ON p.id = n.pais_id WHERE {where_sql} """, params, ) total_results = cur.fetchone()[0] if cur.rowcount else 0 total_pages = (total_results // per_page) + (1 if total_results % per_page else 0) cur.execute( f""" SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, t.id AS traduccion_id, t.titulo_trad AS titulo_traducido, t.resumen_trad AS resumen_traducido, CASE WHEN t.id IS NOT NULL THEN TRUE ELSE FALSE END AS tiene_traduccion, n.titulo AS titulo_original, n.resumen AS resumen_original FROM noticias n LEFT JOIN categorias c ON c.id = n.categoria_id LEFT JOIN paises p ON p.id = n.pais_id LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = %s AND t.status = 'done' WHERE {where_sql} ORDER BY n.fecha DESC NULLS LAST, n.id DESC LIMIT %s OFFSET %s """, [lang] + params + [per_page, offset], ) noticias = cur.fetchall() tags_por_tr = {} tr_ids = [n["traduccion_id"] for n in noticias if n["traduccion_id"]] if tr_ids: cur.execute( """ SELECT tn.traduccion_id, tg.valor, tg.tipo FROM tags_noticia tn JOIN tags tg ON tg.id = tn.tag_id WHERE tn.traduccion_id = ANY(%s); """, (tr_ids,), ) for tr_id, valor, tipo in cur.fetchall(): tags_por_tr.setdefault(tr_id, []).append((valor, tipo)) context = dict( noticias=noticias, total_results=total_results, total_pages=total_pages, page=page, per_page=per_page, categorias=categorias, continentes=continentes, paises=paises, q=q, cat_id=int(categoria_id) if categoria_id else None, cont_id=int(continente_id) if continente_id else None, pais_id=int(pais_id) if pais_id else None, fecha_filtro=fecha_str, use_tr=use_tr, lang=lang, tags_por_tr=tags_por_tr, ) if request.headers.get("X-Requested-With") == "XMLHttpRequest": return render_template("_noticias_list.html", **context) return render_template("noticias.html", **context) @app.route("/noticia") def noticia(): tr_id = request.args.get("tr_id") noticia_id = request.args.get("id") if not tr_id and not noticia_id: flash("No se ha indicado ninguna noticia.", "warning") return redirect(url_for("home")) with get_conn() as conn: conn.autocommit = True with conn.cursor(cursor_factory=extras.DictCursor) as cur: dato = None if tr_id: cur.execute( """ SELECT t.id AS traduccion_id, t.lang_from, t.lang_to, t.titulo_trad, t.resumen_trad, n.id As noticia_id, n.titulo AS titulo_orig, n.resumen AS resumen_orig, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais FROM traducciones t JOIN noticias n ON n.id = t.noticia_id LEFT JOIN categorias c ON c.id = n.categoria_id LEFT JOIN paises p ON p.id = n.pais_id WHERE t.id = %s """, (int(tr_id),), ) dato = cur.fetchone() else: cur.execute( """ SELECT NULL AS traduccion_id, NULL AS lang_from, NULL AS lang_to, NULL AS titulo_trad, NULL AS resumen_trad, n.id AS noticia_id, n.titulo AS titulo_orig, n.resumen AS resumen_orig, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais FROM noticias n LEFT JOIN categorias c ON c.id = n.categoria_id LEFT JOIN paises p ON p.id = n.pais_id WHERE n.id = %s """, (noticia_id,), ) dato = cur.fetchone() tags = [] relacionadas = [] if dato and dato["traduccion_id"]: cur.execute( """ SELECT tg.valor, tg.tipo FROM tags_noticia tn JOIN tags tg ON tg.id = tn.tag_id WHERE tn.traduccion_id = %s ORDER BY tg.tipo, tg.valor; """, (dato["traduccion_id"],), ) tags = cur.fetchall() cur.execute( """ SELECT n2.url, n2.titulo, n2.fecha, n2.imagen_url, n2.fuente_nombre, rn.score FROM related_noticias rn JOIN traducciones t2 ON t2.id = rn.related_traduccion_id JOIN noticias n2 ON n2.id = t2.noticia_id WHERE rn.traduccion_id = %s ORDER BY rn.score DESC LIMIT 8; """, (dato["traduccion_id"],), ) relacionadas = cur.fetchall() return render_template( "noticia.html", dato=dato, tags=tags, relacionadas=relacionadas, ) @app.route("/dashboard") def dashboard(): with get_conn() as conn: conn.autocommit = True with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT COUNT(*) FROM feeds;") feeds_totales = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM noticias;") noticias_totales = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM feeds WHERE activo = FALSE;") feeds_caidos = cur.fetchone()[0] stats = { "feeds_totales": feeds_totales, "noticias_totales": noticias_totales, "feeds_caidos": feeds_caidos, } top_tags = [] try: cur.execute( "SELECT id, valor, tipo, apariciones FROM v_tag_counts_24h ORDER BY apariciones DESC LIMIT 100;" ) top_tags = cur.fetchall() except psycopg2.Error: top_tags = [] return render_template("dashboard.html", stats=stats, top_tags=top_tags) @app.route("/feeds") def manage_feeds(): page = max(int(request.args.get("page", 1) or 1), 1) per_page = 50 offset = (page - 1) * per_page with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT COUNT(*) FROM feeds;") total_feeds = cur.fetchone()[0] if cur.rowcount else 0 total_pages = (total_feeds // per_page) + (1 if total_feeds % per_page else 0) cur.execute( """ SELECT f.id, f.nombre, f.descripcion, f.url, f.activo, f.fallos, c.nombre AS categoria, p.nombre AS pais FROM feeds f LEFT JOIN categorias c ON c.id = f.categoria_id LEFT JOIN paises p ON p.id = f.pais_id ORDER BY f.nombre LIMIT %s OFFSET %s; """, (per_page, offset), ) feeds = cur.fetchall() cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;") categorias = cur.fetchall() cur.execute("SELECT id, nombre FROM paises ORDER BY nombre;") paises = cur.fetchall() return render_template( "feeds_list.html", feeds=feeds, total_feeds=total_feeds, total_pages=total_pages, page=page, categorias=categorias, paises=paises, ) @app.route("/feeds/add", methods=["GET", "POST"]) def add_feed(): with get_conn() as conn: conn.autocommit = True categorias = get_categorias(conn) paises = get_paises(conn) if request.method == "POST": nombre = request.form.get("nombre") descripcion = request.form.get("descripcion") or None url = request.form.get("url") categoria_id = request.form.get("categoria_id") or None pais_id = request.form.get("pais_id") or None idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None try: with conn.cursor() as cur: cur.execute( """ INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma) VALUES (%s, %s, %s, %s, %s, %s); """, ( nombre, descripcion, url, int(categoria_id) if categoria_id else None, int(pais_id) if pais_id else None, idioma, ), ) flash(f"Feed '{nombre}' añadido correctamente.", "success") return redirect(url_for("manage_feeds")) except psycopg2.Error as e: flash(f"Error al añadir feed: {e}", "error") return render_template("add_feed.html", categorias=categorias, paises=paises) @app.route("/feeds//edit", methods=["GET", "POST"]) def edit_feed(feed_id): with get_conn() as conn: conn.autocommit = True with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT * FROM feeds WHERE id = %s;", (feed_id,)) feed = cur.fetchone() if not feed: flash("Feed no encontrado.", "error") return redirect(url_for("manage_feeds")) categorias = get_categorias(conn) paises = get_paises(conn) if request.method == "POST": nombre = request.form.get("nombre") descripcion = request.form.get("descripcion") or None url = request.form.get("url") categoria_id = request.form.get("categoria_id") or None pais_id = request.form.get("pais_id") or None idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None activo = bool(request.form.get("activo")) try: cur.execute( """ UPDATE feeds SET nombre = %s, descripcion = %s, url = %s, categoria_id = %s, pais_id = %s, idioma = %s, activo = %s WHERE id = %s; """, ( nombre, descripcion, url, int(categoria_id) if categoria_id else None, int(pais_id) if pais_id else None, idioma, activo, feed_id, ), ) flash("Feed actualizado correctamente.", "success") return redirect(url_for("manage_feeds")) except psycopg2.Error as e: flash(f"Error al actualizar feed: {e}", "error") return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises) @app.route("/feeds//delete") def delete_feed(feed_id): with get_conn() as conn, conn.cursor() as cur: try: cur.execute("DELETE FROM feeds WHERE id = %s;", (feed_id,)) flash("Feed eliminado.", "success") except psycopg2.Error as e: flash(f"No se pudo eliminar el feed: {e}", "error") return redirect(url_for("manage_feeds")) @app.route("/feeds//reactivar") def reactivar_feed(feed_id): with get_conn() as conn, conn.cursor() as cur: try: cur.execute( "UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = %s;", (feed_id,), ) flash("Feed reactivado.", "success") except psycopg2.Error as e: flash(f"No se pudo reactivar el feed: {e}", "error") return redirect(url_for("manage_feeds")) @app.route("/add", methods=["POST"]) def legacy_add_feed(): return add_feed() @app.route("/backup_feeds") def backup_feeds(): with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( """ SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, c.nombre AS categoria, f.pais_id, p.nombre AS pais, f.idioma, f.activo, f.fallos FROM feeds f LEFT JOIN categorias c ON c.id = f.categoria_id LEFT JOIN paises p ON p.id = f.pais_id ORDER BY f.id; """ ) rows = cur.fetchall() output = io.StringIO() writer = csv.writer(output) writer.writerow(["id", "nombre", "descripcion", "url", "categoria_id", "categoria", "pais_id", "pais", "idioma", "activo", "fallos"]) for r in rows: writer.writerow([ r["id"], r["nombre"], r["descripcion"] or "", r["url"], r["categoria_id"] or "", r["categoria"] or "", r["pais_id"] or "", r["pais"] or "", r["idioma"] or "", r["activo"], r["fallos"], ]) output.seek(0) return send_file( io.BytesIO(output.getvalue().encode("utf-8")), mimetype="text/csv", as_attachment=True, download_name="feeds_backup.csv", ) @app.route("/restore_feeds", methods=["GET", "POST"]) def restore_feeds(): if request.method == "GET": return render_template("restore_feeds.html") file = request.files.get("file") if not file: flash("No se ha subido ningún archivo.", "error") return redirect(url_for("restore_feeds")) try: content = file.stream.read().decode("utf-8", errors="ignore") reader = csv.DictReader(io.StringIO(content)) except Exception as e: flash(f"Error leyendo el CSV: {e}", "error") return redirect(url_for("restore_feeds")) def parse_int_field(row, key): val = row.get(key) if val is None or str(val).strip() == "": return None try: return int(val) except (ValueError, TypeError): app.logger.warning( f"[restore_feeds] Valor no numérico '{val}' en columna {key}, se usará NULL." ) return None conn = get_conn() try: with conn.cursor() as cur: for row in reader: try: categoria_id = parse_int_field(row, "categoria_id") pais_id = parse_int_field(row, "pais_id") raw_fallos = (row.get("fallos") or "").strip() if raw_fallos == "": fallos = 0 else: try: fallos = int(raw_fallos) except (ValueError, TypeError): app.logger.warning( f"[restore_feeds] Valor no numérico '{raw_fallos}' en columna fallos, se usará 0." ) fallos = 0 cur.execute( """ INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (url) DO UPDATE SET nombre = EXCLUDED.nombre, descripcion = EXCLUDED.descripcion, categoria_id = EXCLUDED.categoria_id, pais_id = EXCLUDED.pais_id, idioma = EXCLUDED.idioma, activo = EXCLUDED.activo, fallos = EXCLUDED.fallos; """, ( row["nombre"], row.get("descripcion") or None, row["url"], categoria_id, pais_id, (row.get("idioma") or "").strip().lower()[:2] or None, row.get("activo") in ("1", "True", "true", "t", "on"), fallos, ), ) conn.commit() except psycopg2.Error as e: print("Error restaurando feed:", e) conn.rollback() finally: conn.close() flash("Restauración de feeds completada (con posibles errores en algunos registros).", "success") return redirect(url_for("dashboard")) @app.route("/urls") def manage_urls(): with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( """ SELECT fu.id, fu.nombre, fu.url, c.nombre AS categoria, p.nombre AS pais, fu.idioma FROM fuentes_url fu LEFT JOIN categorias c ON c.id = fu.categoria_id LEFT JOIN paises p ON p.id = fu.pais_id ORDER BY fu.nombre; """ ) fuentes = cur.fetchall() return render_template("urls_list.html", fuentes=fuentes) @app.route("/urls/add_source", methods=["GET", "POST"]) def add_url_source(): with get_conn() as conn: conn.autocommit = True categorias = get_categorias(conn) paises = get_paises(conn) if request.method == "POST": nombre = request.form.get("nombre") url = request.form.get("url") categoria_id = request.form.get("categoria_id") or None pais_id = request.form.get("pais_id") or None idioma = (request.form.get("idioma", "es") or "es").strip().lower()[:2] try: with conn.cursor() as cur: cur.execute( """ INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma) VALUES (%s, %s, %s, %s, %s) ON CONFLICT (url_norm) DO UPDATE SET nombre = EXCLUDED.nombre, categoria_id = EXCLUDED.categoria_id, pais_id = EXCLUDED.pais_id, idioma = EXCLUDED.idioma; """, ( nombre, url, int(categoria_id) if categoria_id else None, int(pais_id) if pais_id else None, idioma, ), ) flash(f"Fuente URL '{nombre}' añadida/actualizada correctamente.", "success") return redirect(url_for("manage_urls")) except psycopg2.Error as e: flash(f"Error al guardar fuente URL: {e}", "error") return render_template("add_url_source.html", categorias=categorias, paises=paises) @app.route("/urls//edit", methods=["GET", "POST"]) def edit_url_source(url_id): with get_conn() as conn: conn.autocommit = True with conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT * FROM fuentes_url WHERE id = %s;", (url_id,)) fuente = cur.fetchone() if not fuente: flash("Fuente URL no encontrada.", "error") return redirect(url_for("manage_urls")) categorias = get_categorias(conn) paises = get_paises(conn) if request.method == "POST": nombre = request.form.get("nombre") url = request.form.get("url") categoria_id = request.form.get("categoria_id") or None pais_id = request.form.get("pais_id") or None idioma = (request.form.get("idioma") or "").strip().lower()[:2] or "es" try: cur.execute( """ UPDATE fuentes_url SET nombre = %s, url = %s, categoria_id = %s, pais_id = %s, idioma = %s WHERE id = %s; """, ( nombre, url, int(categoria_id) if categoria_id else None, int(pais_id) if pais_id else None, idioma, url_id, ), ) flash("Fuente URL actualizada.", "success") return redirect(url_for("manage_urls")) except psycopg2.Error as e: flash(f"Error al actualizar fuente: {e}", "error") return render_template("edit_url_source.html", fuente=fuente, categorias=categorias, paises=paises) @app.route("/urls//delete") def delete_url_source(url_id): with get_conn() as conn, conn.cursor() as cur: try: cur.execute("DELETE FROM fuentes_url WHERE id = %s;", (url_id,)) flash("Fuente URL eliminada.", "success") except psycopg2.Error as e: flash(f"No se pudo eliminar la fuente URL: {e}", "error") return redirect(url_for("manage_urls")) @app.route("/backup_urls") def backup_urls(): with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute( """ SELECT id, nombre, url, categoria_id, pais_id, idioma FROM fuentes_url ORDER BY id; """ ) rows = cur.fetchall() output = io.StringIO() writer = csv.writer(output) writer.writerow(["id", "nombre", "url", "categoria_id", "pais_id", "idioma"]) for r in rows: writer.writerow([ r["id"], r["nombre"], r["url"], r["categoria_id"] or "", r["pais_id"] or "", r["idioma"] or "", ]) output.seek(0) return send_file( io.BytesIO(output.getvalue().encode("utf-8")), mimetype="text/csv", as_attachment=True, download_name="fuentes_url_backup.csv", ) @app.route("/restore_urls", methods=["GET", "POST"]) def restore_urls(): if request.method == "GET": return render_template("restore_urls.html") file = request.files.get("file") if not file: flash("No se ha subido ningún archivo.", "error") return redirect(url_for("restore_urls")) try: content = file.stream.read().decode("utf-8", errors="ignore") reader = csv.DictReader(io.StringIO(content)) except Exception as e: flash(f"Error leyendo el CSV: {e}", "error") return redirect(url_for("restore_urls")) conn = get_conn() try: with conn.cursor() as cur: for row in reader: try: cur.execute( """ INSERT INTO fuentes_url (id, nombre, url, categoria_id, pais_id, idioma) VALUES (%s,%s,%s,%s,%s,%s) ON CONFLICT (id) DO UPDATE SET nombre = EXCLUDED.nombre, url = EXCLUDED.url, categoria_id = EXCLUDED.categoria_id, pais_id = EXCLUDED.pais_id, idioma = EXCLUDED.idioma; """, ( int(row["id"]), row["nombre"], row["url"], int(row["categoria_id"]) if row.get("categoria_id") else None, int(row["pais_id"]) if row.get("pais_id") else None, (row.get("idioma") or "es").strip().lower()[:2], ), ) conn.commit() except psycopg2.Error as e: print("Error restaurando fuente_url:", e) conn.rollback() finally: conn.close() flash("Importación de fuentes URL completada (con posibles errores en algunas filas).", "success") return redirect(url_for("dashboard")) @app.route("/urls/add", methods=["GET", "POST"]) def add_url(): with get_conn() as conn: conn.autocommit = True categorias = get_categorias(conn) paises = get_paises(conn) if request.method == "POST": url = request.form.get("url") categoria_id = request.form.get("categoria_id") or None pais_id = request.form.get("pais_id") or None if not url: flash("Debes indicar una URL.", "error") return redirect(url_for("add_url")) try: from newspaper import Article except ImportError: flash("La librería newspaper3k no está instalada en este entorno.", "error") return redirect(url_for("add_url")) try: art = Article(url) art.download() art.parse() titulo = art.title or url resumen = (art.summary or "")[:2000] if hasattr(art, "summary") else None imagen_url = art.top_image or None with conn.cursor() as cur: cur.execute( """ INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id) VALUES (md5(%s), %s, %s, %s, NOW(), %s, %s, %s, %s) ON CONFLICT (url) DO NOTHING; """, ( url, titulo, resumen, url, imagen_url, None, int(categoria_id) if categoria_id else None, int(pais_id) if pais_id else None, ), ) flash("Noticia añadida desde URL.", "success") return redirect(url_for("home")) except Exception as e: flash(f"Error al scrapear la URL: {e}", "error") return redirect(url_for("add_url")) return render_template("add_url.html", categorias=categorias, paises=paises) @app.route("/urls/scrape", methods=["GET", "POST"]) def scrape_url(): with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre;") fuentes = cur.fetchall() if request.method == "POST": source_id = request.form.get("source_id") if not source_id: flash("Debes seleccionar una fuente.", "error") return redirect(url_for("scrape_url")) flash("Scrapeo desde fuente aún no implementado (stub).", "warning") return redirect(url_for("dashboard")) return render_template("scrape_url.html", fuentes=fuentes) @app.route("/backup_completo") def backup_completo(): import zipfile mem_file = io.BytesIO() with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT * FROM feeds ORDER BY id;") rows = cur.fetchall() buf = io.StringIO() writer = csv.writer(buf) if rows: writer.writerow(rows[0].keys()) for r in rows: writer.writerow(list(r.values())) zf.writestr("feeds.csv", buf.getvalue()) with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur: cur.execute("SELECT * FROM fuentes_url ORDER BY id;") rows = cur.fetchall() buf2 = io.StringIO() writer2 = csv.writer(buf2) if rows: writer2.writerow(rows[0].keys()) for r in rows: writer2.writerow(list(r.values())) zf.writestr("fuentes_url.csv", buf2.getvalue()) mem_file.seek(0) return send_file( mem_file, mimetype="application/zip", as_attachment=True, download_name="backup_completo_rss.zip", ) if __name__ == "__main__": app.run(host="0.0.0.0", port=8001, debug=True)