rss/app.py

import os
import csv
import io
import time
import socket
import zipfile
from datetime import datetime, date
from concurrent.futures import ThreadPoolExecutor, as_completed

from dotenv import load_dotenv
from flask import (
    Flask, render_template, request, redirect, url_for,
    flash, send_file
)
from markupsafe import Markup
import psycopg2
import psycopg2.extras as extras

load_dotenv()

DB_CONFIG = {
    "dbname": os.getenv("DB_NAME", "rss"),
    "user": os.getenv("DB_USER", "rss"),
    "password": os.getenv("DB_PASS", ""),
    "host": os.getenv("DB_HOST", "localhost"),
    "port": int(os.getenv("DB_PORT", "5432")),
}

DEFAULT_LANG = os.getenv("DEFAULT_LANG", "es")
DEFAULT_TRANSLATION_LANG = os.getenv("DEFAULT_TRANSLATION_LANG", "es")
WEB_TRANSLATED_DEFAULT = os.getenv("WEB_TRANSLATED_DEFAULT", "1") == "1"
NEWS_PER_PAGE_DEFAULT = int(os.getenv("NEWS_PER_PAGE", "20"))

SECRET_KEY = os.getenv("SECRET_KEY", "cambia_esta_clave_insegura")

RSS_MAX_WORKERS = int(os.getenv("RSS_MAX_WORKERS", "10"))
RSS_FEED_TIMEOUT = int(os.getenv("RSS_FEED_TIMEOUT", "30"))
RSS_MAX_FAILURES = int(os.getenv("RSS_MAX_FAILURES", "5"))

app = Flask(__name__)
app.config["SECRET_KEY"] = SECRET_KEY


def get_conn():
    return psycopg2.connect(**DB_CONFIG)


def safe_html(texto):
    if not texto:
        return ""
    return Markup(texto)


app.jinja_env.filters["safe_html"] = safe_html


def get_categorias(conn):
    with conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;")
        return cur.fetchall()


def get_continentes(conn):
    with conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute("SELECT id, nombre FROM continentes ORDER BY nombre;")
        return cur.fetchall()


def get_paises(conn):
    with conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute(
            """
            SELECT p.id, p.nombre, p.continente_id
            FROM paises p
            ORDER BY p.nombre;
            """
        )
        return cur.fetchall()


def normalize_url_py(u: str | None) -> str | None:
    if not u:
        return None
    u = u.strip()
    if not u:
        return None
    if "://" not in u:
        u = "http://" + u
    u = u.split("#", 1)[0]

    try:
        from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode
    except ImportError:
        return u

    sp = urlsplit(u)
    scheme = sp.scheme.lower()
    netloc = sp.netloc.lower()

    if netloc.startswith("www."):
        netloc = netloc[4:]

    if scheme == "http" and netloc.endswith(":80"):
        netloc = netloc[:-3]
    if scheme == "https" and netloc.endswith(":443"):
        netloc = netloc[:-4]

    qs_pairs = []
    for k, v in parse_qsl(sp.query, keep_blank_values=True):
        kl = k.lower()
        if kl.startswith("utm_"):
            continue
        if kl in ("gclid", "fbclid", "mc_cid", "mc_eid", "ref", "ref_src", "yclid", "igshid"):
            continue
        qs_pairs.append((k, v))
    new_query = urlencode(qs_pairs, doseq=True)

    path = sp.path or "/"
    while "//" in path:
        path = path.replace("//", "/")
    if path != "/":
        path = path.rstrip("/")

    return urlunsplit((scheme, netloc, path, new_query, ""))


def _parse_entry_date(entry) -> datetime | None:
    dt = None
    try:
        if getattr(entry, "published_parsed", None):
            import time as _time
            dt = datetime.fromtimestamp(_time.mktime(entry.published_parsed))
        elif getattr(entry, "updated_parsed", None):
            import time as _time
            dt = datetime.fromtimestamp(_time.mktime(entry.updated_parsed))
    except Exception:
        dt = None
    return dt


def _process_feed(feed_row):
    import feedparser

    feed_id = feed_row["id"]
    feed_url = feed_row["url"]
    feed_nombre = feed_row["nombre"]
    categoria_id = feed_row["categoria_id"]
    pais_id = feed_row["pais_id"]

    app.logger.info(f"[ingesta] Procesando feed {feed_id} '{feed_nombre}' ({feed_url})")

    try:
        old_timeout = socket.getdefaulttimeout()
        socket.setdefaulttimeout(RSS_FEED_TIMEOUT)
        try:
            parsed = feedparser.parse(feed_url)
        finally:
            socket.setdefaulttimeout(old_timeout)

        if parsed.bozo and parsed.bozo_exception:
            app.logger.warning(f"[ingesta] Feed {feed_id} bozo={parsed.bozo}: {parsed.bozo_exception}")

        entries = parsed.entries or []
        nuevos = 0

        with get_conn() as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                for entry in entries:
                    link = getattr(entry, "link", None) or getattr(entry, "id", None)
                    if not link:
                        continue

                    url_norm = normalize_url_py(link)
                    if not url_norm:
                        continue

                    titulo = getattr(entry, "title", None) or url_norm
                    resumen = getattr(entry, "summary", None) or getattr(entry, "description", None)
                    if resumen:
                        resumen = resumen[:4000]

                    fecha = _parse_entry_date(entry) or datetime.utcnow()

                    imagen_url = None
                    try:
                        if hasattr(entry, "media_content") and entry.media_content:
                            imagen_url = entry.media_content[0].get("url")
                    except Exception:
                        imagen_url = None

                    if not imagen_url:
                        try:
                            if hasattr(entry, "links"):
                                for l in entry.links:
                                    if l.get("rel") == "enclosure" and l.get("type", "").startswith("image/"):
                                        imagen_url = l.get("href")
                                        break
                        except Exception:
                            imagen_url = None

                    try:
                        cur.execute(
                            """
                            INSERT INTO noticias
                                (id, titulo, resumen, url, fecha, imagen_url,
                                 fuente_nombre, categoria_id, pais_id)
                            VALUES (md5(%s), %s, %s, %s, %s, %s, %s, %s, %s)
                            ON CONFLICT (url) DO NOTHING;
                            """,
                            (
                                url_norm,
                                titulo,
                                resumen,
                                url_norm,
                                fecha,
                                imagen_url,
                                feed_nombre,
                                categoria_id,
                                pais_id,
                            ),
                        )
                        if cur.rowcount > 0:
                            nuevos += 1
                    except psycopg2.Error as e:
                        app.logger.warning(f"[ingesta] Error insertando noticia de {feed_url}: {e}")

        with get_conn() as conn, conn.cursor() as cur:
            cur.execute(
                "UPDATE feeds SET fallos = 0 WHERE id = %s;",
                (feed_id,),
            )

        app.logger.info(f"[ingesta] Feed {feed_id} OK. Nuevas noticias: {nuevos}")

    except Exception as e:
        app.logger.exception(f"[ingesta] Error procesando feed {feed_id} ({feed_url}): {e}")
        try:
            with get_conn() as conn, conn.cursor() as cur:
                cur.execute(
                    """
                    UPDATE feeds
                    SET fallos = COALESCE(fallos, 0) + 1,
                        activo = CASE
                                   WHEN COALESCE(fallos, 0) + 1 >= %s THEN FALSE
                                   ELSE activo
                                 END
                    WHERE id = %s;
                    """,
                    (RSS_MAX_FAILURES, feed_id),
                )
        except Exception as e2:
            app.logger.warning(f"[ingesta] No se pudo actualizar fallos de feed {feed_id}: {e2}")


def fetch_and_store_all():
    app.logger.info("[ingesta] fetch_and_store_all() iniciado")

    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute(
            """
            SELECT id, nombre, url, categoria_id, pais_id, fallos, activo
            FROM feeds
            WHERE activo = TRUE
              AND (fallos IS NULL OR fallos < %s)
            ORDER BY id;
            """,
            (RSS_MAX_FAILURES,),
        )
        feeds = cur.fetchall()

    if not feeds:
        app.logger.info("[ingesta] No hay feeds activos para procesar.")
        return

    app.logger.info(f"[ingesta] Procesando {len(feeds)} feeds (max workers = {RSS_MAX_WORKERS})")

    max_workers = max(1, RSS_MAX_WORKERS)
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(_process_feed, f): f for f in feeds}
        for future in as_completed(futures):
            feed_row = futures[future]
            try:
                future.result()
            except Exception as e:
                app.logger.exception(f"[ingesta] Excepción no controlada en feed {feed_row['id']}: {e}")

    app.logger.info("[ingesta] fetch_and_store_all() terminado")


@app.route("/")
@app.route("/home")
def home():
    page = max(int(request.args.get("page", 1) or 1), 1)
    per_page = int(request.args.get("per_page", NEWS_PER_PAGE_DEFAULT) or NEWS_PER_PAGE_DEFAULT)
    per_page = min(max(per_page, 10), 100)

    q = (request.args.get("q") or "").strip()
    categoria_id = request.args.get("categoria_id") or None
    continente_id = request.args.get("continente_id") or None
    pais_id = request.args.get("pais_id") or None
    fecha_str = request.args.get("fecha") or ""
    lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]
    orig_flag = request.args.get("orig")
    use_tr = not bool(orig_flag)

    fecha_filtro = None
    if fecha_str:
        try:
            fecha_filtro = datetime.strptime(fecha_str, "%Y-%m-%d").date()
        except ValueError:
            fecha_filtro = None

    offset = (page - 1) * per_page

    with get_conn() as conn:
        conn.autocommit = True
        categorias = get_categorias(conn)
        continentes = get_continentes(conn)
        paises = get_paises(conn)

        params = []
        where = ["1=1"]

        if fecha_filtro:
            where.append("n.fecha::date = %s")
            params.append(fecha_filtro)

        if categoria_id:
            where.append("n.categoria_id = %s")
            params.append(int(categoria_id))

        if pais_id:
            where.append("n.pais_id = %s")
            params.append(int(pais_id))
        elif continente_id:
            where.append("p.continente_id = %s")
            params.append(int(continente_id))

        if q:
            search_like = f"%{q}%"
            if use_tr:
                where.append(
                    """
                    (
                        n.tsv @@ websearch_to_tsquery('spanish', %s)
                        OR t.titulo_trad ILIKE %s
                        OR t.resumen_trad ILIKE %s
                        OR n.titulo ILIKE %s
                        OR n.resumen ILIKE %s
                    )
                    """
                )
                params.extend([q, search_like, search_like, search_like, search_like])
            else:
                where.append(
                    """
                    (
                        n.tsv @@ websearch_to_tsquery('spanish', %s)
                        OR n.titulo ILIKE %s
                        OR n.resumen ILIKE %s
                    )
                    """
                )
                params.extend([q, search_like, search_like])

        where_sql = " AND ".join(where)

        with conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute(
                f"""
                SELECT COUNT(DISTINCT n.id)
                FROM noticias n
                LEFT JOIN categorias c ON c.id = n.categoria_id
                LEFT JOIN paises p     ON p.id = n.pais_id
                LEFT JOIN traducciones t
                    ON t.noticia_id = n.id
                   AND t.lang_to = %s
                   AND t.status = 'done'
                WHERE {where_sql}
                """,
                [lang] + params,
            )
            total_results = cur.fetchone()[0] if cur.rowcount else 0
            total_pages = (total_results // per_page) + (1 if total_results % per_page else 0)

            cur.execute(
                f"""
                SELECT
                    n.id,
                    n.titulo,
                    n.resumen,
                    n.url,
                    n.fecha,
                    n.imagen_url,
                    n.fuente_nombre,
                    c.nombre AS categoria,
                    p.nombre AS pais,
                    t.id     AS traduccion_id,
                    t.titulo_trad  AS titulo_traducido,
                    t.resumen_trad AS resumen_traducido,
                    CASE WHEN t.id IS NOT NULL THEN TRUE ELSE FALSE END AS tiene_traduccion,
                    n.titulo  AS titulo_original,
                    n.resumen AS resumen_original
                FROM noticias n
                LEFT JOIN categorias c ON c.id = n.categoria_id
                LEFT JOIN paises p     ON p.id = n.pais_id
                LEFT JOIN traducciones t
                    ON t.noticia_id = n.id
                   AND t.lang_to = %s
                   AND t.status = 'done'
                WHERE {where_sql}
                ORDER BY n.fecha DESC NULLS LAST, n.id DESC
                LIMIT %s OFFSET %s
                """,
                [lang] + params + [per_page, offset],
            )
            noticias = cur.fetchall()

            tags_por_tr = {}
            tr_ids = [n["traduccion_id"] for n in noticias if n["traduccion_id"]]
            if tr_ids:
                cur.execute(
                    """
                    SELECT tn.traduccion_id, tg.valor, tg.tipo
                    FROM tags_noticia tn
                    JOIN tags tg ON tg.id = tn.tag_id
                    WHERE tn.traduccion_id = ANY(%s);
                    """,
                    (tr_ids,),
                )
                for tr_id, valor, tipo in cur.fetchall():
                    tags_por_tr.setdefault(tr_id, []).append((valor, tipo))

    context = dict(
        noticias=noticias,
        total_results=total_results,
        total_pages=total_pages,
        page=page,
        per_page=per_page,
        categorias=categorias,
        continentes=continentes,
        paises=paises,
        q=q,
        cat_id=int(categoria_id) if categoria_id else None,
        cont_id=int(continente_id) if continente_id else None,
        pais_id=int(pais_id) if pais_id else None,
        fecha_filtro=fecha_str,
        use_tr=use_tr,
        lang=lang,
        tags_por_tr=tags_por_tr,
    )

    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return render_template("_noticias_list.html", **context)

    return render_template("noticias.html", **context)


@app.route("/noticia")
def noticia():
    tr_id = request.args.get("tr_id")
    noticia_id = request.args.get("id")

    if not tr_id and not noticia_id:
        flash("No se ha indicado ninguna noticia.", "warning")
        return redirect(url_for("home"))

    with get_conn() as conn:
        conn.autocommit = True
        with conn.cursor(cursor_factory=extras.DictCursor) as cur:
            dato = None

            if tr_id:
                cur.execute(
                    """
                    SELECT
                        t.id           AS traduccion_id,
                        t.lang_from,
                        t.lang_to,
                        t.titulo_trad,
                        t.resumen_trad,
                        n.id           As noticia_id,
                        n.titulo       AS titulo_orig,
                        n.resumen      AS resumen_orig,
                        n.url,
                        n.fecha,
                        n.imagen_url,
                        n.fuente_nombre,
                        c.nombre       AS categoria,
                        p.nombre       AS pais
                    FROM traducciones t
                    JOIN noticias n ON n.id = t.noticia_id
                    LEFT JOIN categorias c ON c.id = n.categoria_id
                    LEFT JOIN paises p     ON p.id = n.pais_id
                    WHERE t.id = %s
                    """,
                    (int(tr_id),),
                )
                dato = cur.fetchone()
            else:
                cur.execute(
                    """
                    SELECT
                        NULL           AS traduccion_id,
                        NULL           AS lang_from,
                        NULL           AS lang_to,
                        NULL           AS titulo_trad,
                        NULL           AS resumen_trad,
                        n.id           AS noticia_id,
                        n.titulo       AS titulo_orig,
                        n.resumen      AS resumen_orig,
                        n.url,
                        n.fecha,
                        n.imagen_url,
                        n.fuente_nombre,
                        c.nombre       AS categoria,
                        p.nombre       AS pais
                    FROM noticias n
                    LEFT JOIN categorias c ON c.id = n.categoria_id
                    LEFT JOIN paises p     ON p.id = n.pais_id
                    WHERE n.id = %s
                    """,
                    (noticia_id,),
                )
                dato = cur.fetchone()

            tags = []
            relacionadas = []

            if dato and dato["traduccion_id"]:
                cur.execute(
                    """
                    SELECT tg.valor, tg.tipo
                    FROM tags_noticia tn
                    JOIN tags tg ON tg.id = tn.tag_id
                    WHERE tn.traduccion_id = %s
                    ORDER BY tg.tipo, tg.valor;
                    """,
                    (dato["traduccion_id"],),
                )
                tags = cur.fetchall()

                cur.execute(
                    """
                    SELECT
                        n2.url,
                        n2.titulo,
                        n2.fecha,
                        n2.imagen_url,
                        n2.fuente_nombre,
                        rn.score
                    FROM related_noticias rn
                    JOIN traducciones t2 ON t2.id = rn.related_traduccion_id
                    JOIN noticias n2     ON n2.id = t2.noticia_id
                    WHERE rn.traduccion_id = %s
                    ORDER BY rn.score DESC
                    LIMIT 8;
                    """,
                    (dato["traduccion_id"],),
                )
                relacionadas = cur.fetchall()

    return render_template(
        "noticia.html",
        dato=dato,
        tags=tags,
        relacionadas=relacionadas,
    )


@app.route("/dashboard")
def dashboard():
    with get_conn() as conn:
        conn.autocommit = True
        with conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute("SELECT COUNT(*) FROM feeds;")
            feeds_totales = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM noticias;")
            noticias_totales = cur.fetchone()[0]

            cur.execute("SELECT COUNT(*) FROM feeds WHERE activo = FALSE;")
            feeds_caidos = cur.fetchone()[0]

            stats = {
                "feeds_totales": feeds_totales,
                "noticias_totales": noticias_totales,
                "feeds_caidos": feeds_caidos,
            }

            top_tags = []
            try:
                cur.execute(
                    "SELECT id, valor, tipo, apariciones FROM v_tag_counts_24h ORDER BY apariciones DESC LIMIT 100;"
                )
                top_tags = cur.fetchall()
            except psycopg2.Error:
                top_tags = []

    return render_template("dashboard.html", stats=stats, top_tags=top_tags)


@app.route("/feeds")
def manage_feeds():
    page = max(int(request.args.get("page", 1) or 1), 1)
    per_page = 50
    offset = (page - 1) * per_page

    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute("SELECT COUNT(*) FROM feeds;")
        total_feeds = cur.fetchone()[0] if cur.rowcount else 0
        total_pages = (total_feeds // per_page) + (1 if total_feeds % per_page else 0)

        cur.execute(
            """
            SELECT
                f.id,
                f.nombre,
                f.descripcion,
                f.url,
                f.activo,
                f.fallos,
                c.nombre AS categoria,
                p.nombre AS pais
            FROM feeds f
            LEFT JOIN categorias c ON c.id = f.categoria_id
            LEFT JOIN paises p     ON p.id = f.pais_id
            ORDER BY f.nombre
            LIMIT %s OFFSET %s;
            """,
            (per_page, offset),
        )
        feeds = cur.fetchall()

        cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;")
        categorias = cur.fetchall()
        cur.execute("SELECT id, nombre FROM paises ORDER BY nombre;")
        paises = cur.fetchall()

    return render_template(
        "feeds_list.html",
        feeds=feeds,
        total_feeds=total_feeds,
        total_pages=total_pages,
        page=page,
        categorias=categorias,
        paises=paises,
    )


@app.route("/feeds/add", methods=["GET", "POST"])
def add_feed():
    with get_conn() as conn:
        conn.autocommit = True
        categorias = get_categorias(conn)
        paises = get_paises(conn)

        if request.method == "POST":
            nombre = request.form.get("nombre")
            descripcion = request.form.get("descripcion") or None
            url = request.form.get("url")
            categoria_id = request.form.get("categoria_id") or None
            pais_id = request.form.get("pais_id") or None
            idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None

            try:
                with conn.cursor() as cur:
                    cur.execute(
                        """
                        INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
                        VALUES (%s, %s, %s, %s, %s, %s);
                        """,
                        (
                            nombre,
                            descripcion,
                            url,
                            int(categoria_id) if categoria_id else None,
                            int(pais_id) if pais_id else None,
                            idioma,
                        ),
                    )
                flash(f"Feed '{nombre}' añadido correctamente.", "success")
                return redirect(url_for("manage_feeds"))
            except psycopg2.Error as e:
                flash(f"Error al añadir feed: {e}", "error")

    return render_template("add_feed.html", categorias=categorias, paises=paises)


@app.route("/feeds/<int:feed_id>/edit", methods=["GET", "POST"])
def edit_feed(feed_id):
    with get_conn() as conn:
        conn.autocommit = True
        with conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute("SELECT * FROM feeds WHERE id = %s;", (feed_id,))
            feed = cur.fetchone()
            if not feed:
                flash("Feed no encontrado.", "error")
                return redirect(url_for("manage_feeds"))

            categorias = get_categorias(conn)
            paises = get_paises(conn)

            if request.method == "POST":
                nombre = request.form.get("nombre")
                descripcion = request.form.get("descripcion") or None
                url = request.form.get("url")
                categoria_id = request.form.get("categoria_id") or None
                pais_id = request.form.get("pais_id") or None
                idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
                activo = bool(request.form.get("activo"))

                try:
                    cur.execute(
                        """
                        UPDATE feeds
                        SET nombre = %s,
                            descripcion = %s,
                            url = %s,
                            categoria_id = %s,
                            pais_id = %s,
                            idioma = %s,
                            activo = %s
                        WHERE id = %s;
                        """,
                        (
                            nombre,
                            descripcion,
                            url,
                            int(categoria_id) if categoria_id else None,
                            int(pais_id) if pais_id else None,
                            idioma,
                            activo,
                            feed_id,
                        ),
                    )
                    flash("Feed actualizado correctamente.", "success")
                    return redirect(url_for("manage_feeds"))
                except psycopg2.Error as e:
                    flash(f"Error al actualizar feed: {e}", "error")

    return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)


@app.route("/feeds/<int:feed_id>/delete")
def delete_feed(feed_id):
    with get_conn() as conn, conn.cursor() as cur:
        try:
            cur.execute("DELETE FROM feeds WHERE id = %s;", (feed_id,))
            flash("Feed eliminado.", "success")
        except psycopg2.Error as e:
            flash(f"No se pudo eliminar el feed: {e}", "error")
    return redirect(url_for("manage_feeds"))


@app.route("/feeds/<int:feed_id>/reactivar")
def reactivar_feed(feed_id):
    with get_conn() as conn, conn.cursor() as cur:
        try:
            cur.execute(
                "UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = %s;",
                (feed_id,),
            )
            flash("Feed reactivado.", "success")
        except psycopg2.Error as e:
            flash(f"No se pudo reactivar el feed: {e}", "error")
    return redirect(url_for("manage_feeds"))


@app.route("/add", methods=["POST"])
def legacy_add_feed():
    return add_feed()


@app.route("/backup_feeds")
def backup_feeds():
    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute(
            """
            SELECT
                f.id,
                f.nombre,
                f.descripcion,
                f.url,
                f.categoria_id,
                c.nombre AS categoria,
                f.pais_id,
                p.nombre AS pais,
                f.idioma,
                f.activo,
                f.fallos
            FROM feeds f
            LEFT JOIN categorias c ON c.id = f.categoria_id
            LEFT JOIN paises p     ON p.id = f.pais_id
            ORDER BY f.id;
            """
        )
        rows = cur.fetchall()

    output = io.StringIO()
    writer = csv.writer(output)
    writer.writerow(["id", "nombre", "descripcion", "url", "categoria_id", "categoria",
                     "pais_id", "pais", "idioma", "activo", "fallos"])
    for r in rows:
        writer.writerow([
            r["id"],
            r["nombre"],
            r["descripcion"] or "",
            r["url"],
            r["categoria_id"] or "",
            r["categoria"] or "",
            r["pais_id"] or "",
            r["pais"] or "",
            r["idioma"] or "",
            r["activo"],
            r["fallos"],
        ])

    output.seek(0)
    return send_file(
        io.BytesIO(output.getvalue().encode("utf-8")),
        mimetype="text/csv",
        as_attachment=True,
        download_name="feeds_backup.csv",
    )


@app.route("/restore_feeds", methods=["GET", "POST"])
def restore_feeds():
    if request.method == "GET":
        return render_template("restore_feeds.html")

    file = request.files.get("file")
    if not file:
        flash("No se ha subido ningún archivo.", "error")
        return redirect(url_for("restore_feeds"))

    try:
        content = file.stream.read().decode("utf-8", errors="ignore")
        reader = csv.DictReader(io.StringIO(content))
    except Exception as e:
        flash(f"Error leyendo el CSV: {e}", "error")
        return redirect(url_for("restore_feeds"))

    def parse_int_field(row, key):
        val = row.get(key)
        if val is None or str(val).strip() == "":
            return None
        try:
            return int(val)
        except (ValueError, TypeError):
            app.logger.warning(
                f"[restore_feeds] Valor no numérico '{val}' en columna {key}, se usará NULL."
            )
            return None

    conn = get_conn()
    try:
        with conn.cursor() as cur:
            for row in reader:
                try:
                    categoria_id = parse_int_field(row, "categoria_id")
                    pais_id = parse_int_field(row, "pais_id")

                    raw_fallos = (row.get("fallos") or "").strip()
                    if raw_fallos == "":
                        fallos = 0
                    else:
                        try:
                            fallos = int(raw_fallos)
                        except (ValueError, TypeError):
                            app.logger.warning(
                                f"[restore_feeds] Valor no numérico '{raw_fallos}' en columna fallos, se usará 0."
                            )
                            fallos = 0

                    cur.execute(
                        """
                        INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
                        VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
                        ON CONFLICT (url) DO UPDATE
                        SET nombre       = EXCLUDED.nombre,
                            descripcion  = EXCLUDED.descripcion,
                            categoria_id = EXCLUDED.categoria_id,
                            pais_id      = EXCLUDED.pais_id,
                            idioma       = EXCLUDED.idioma,
                            activo       = EXCLUDED.activo,
                            fallos       = EXCLUDED.fallos;
                        """,
                        (
                            row["nombre"],
                            row.get("descripcion") or None,
                            row["url"],
                            categoria_id,
                            pais_id,
                            (row.get("idioma") or "").strip().lower()[:2] or None,
                            row.get("activo") in ("1", "True", "true", "t", "on"),
                            fallos,
                        ),
                    )
                    conn.commit()
                except psycopg2.Error as e:
                    print("Error restaurando feed:", e)
                    conn.rollback()
    finally:
        conn.close()

    flash("Restauración de feeds completada (con posibles errores en algunos registros).", "success")
    return redirect(url_for("dashboard"))


@app.route("/urls")
def manage_urls():
    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute(
            """
            SELECT
              fu.id,
              fu.nombre,
              fu.url,
              c.nombre AS categoria,
              p.nombre AS pais,
              fu.idioma
            FROM fuentes_url fu
            LEFT JOIN categorias c ON c.id = fu.categoria_id
            LEFT JOIN paises p     ON p.id = fu.pais_id
            ORDER BY fu.nombre;
            """
        )
        fuentes = cur.fetchall()

    return render_template("urls_list.html", fuentes=fuentes)


@app.route("/urls/add_source", methods=["GET", "POST"])
def add_url_source():
    with get_conn() as conn:
        conn.autocommit = True
        categorias = get_categorias(conn)
        paises = get_paises(conn)

        if request.method == "POST":
            nombre = request.form.get("nombre")
            url = request.form.get("url")
            categoria_id = request.form.get("categoria_id") or None
            pais_id = request.form.get("pais_id") or None
            idioma = (request.form.get("idioma", "es") or "es").strip().lower()[:2]

            try:
                with conn.cursor() as cur:
                    cur.execute(
                        """
                        INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma)
                        VALUES (%s, %s, %s, %s, %s)
                        ON CONFLICT (url) DO UPDATE
                           SET nombre       = EXCLUDED.nombre,
                               categoria_id = EXCLUDED.categoria_id,
                               pais_id      = EXCLUDED.pais_id,
                               idioma       = EXCLUDED.idioma;
                        """,
                        (
                            nombre,
                            url,
                            int(categoria_id) if categoria_id else None,
                            int(pais_id) if pais_id else None,
                            idioma,
                        ),
                    )
                flash(f"Fuente URL '{nombre}' añadida/actualizada correctamente.", "success")
                return redirect(url_for("manage_urls"))
            except psycopg2.Error as e:
                flash(f"Error al guardar fuente URL: {e}", "error")

    return render_template("add_url_source.html", categorias=categorias, paises=paises)


@app.route("/urls/<int:url_id>/edit", methods=["GET", "POST"])
def edit_url_source(url_id):
    with get_conn() as conn:
        conn.autocommit = True
        with conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute("SELECT * FROM fuentes_url WHERE id = %s;", (url_id,))
            fuente = cur.fetchone()
            if not fuente:
                flash("Fuente URL no encontrada.", "error")
                return redirect(url_for("manage_urls"))

            categorias = get_categorias(conn)
            paises = get_paises(conn)

            if request.method == "POST":
                nombre = request.form.get("nombre")
                url = request.form.get("url")
                categoria_id = request.form.get("categoria_id") or None
                pais_id = request.form.get("pais_id") or None
                idioma = (request.form.get("idioma") or "").strip().lower()[:2] or "es"

                try:
                    cur.execute(
                        """
                        UPDATE fuentes_url
                        SET nombre = %s,
                            url = %s,
                            categoria_id = %s,
                            pais_id = %s,
                            idioma = %s
                        WHERE id = %s;
                        """,
                        (
                            nombre,
                            url,
                            int(categoria_id) if categoria_id else None,
                            int(pais_id) if pais_id else None,
                            idioma,
                            url_id,
                        ),
                    )
                    flash("Fuente URL actualizada.", "success")
                    return redirect(url_for("manage_urls"))
                except psycopg2.Error as e:
                    flash(f"Error al actualizar fuente: {e}", "error")

    return render_template("edit_url_source.html", fuente=fuente, categorias=categorias, paises=paises)


@app.route("/urls/<int:url_id>/delete")
def delete_url_source(url_id):
    with get_conn() as conn, conn.cursor() as cur:
        try:
            cur.execute("DELETE FROM fuentes_url WHERE id = %s;", (url_id,))
            flash("Fuente URL eliminada.", "success")
        except psycopg2.Error as e:
            flash(f"No se pudo eliminar la fuente URL: {e}", "error")
    return redirect(url_for("manage_urls"))


@app.route("/backup_urls")
def backup_urls():
    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute(
            """
            SELECT id, nombre, url, categoria_id, pais_id, idioma
            FROM fuentes_url
            ORDER BY id;
            """
        )
        rows = cur.fetchall()

    output = io.StringIO()
    writer = csv.writer(output)
    writer.writerow(["id", "nombre", "url", "categoria_id", "pais_id", "idioma"])
    for r in rows:
        writer.writerow([
            r["id"],
            r["nombre"],
            r["url"],
            r["categoria_id"] or "",
            r["pais_id"] or "",
            r["idioma"] or "",
        ])

    output.seek(0)
    return send_file(
        io.BytesIO(output.getvalue().encode("utf-8")),
        mimetype="text/csv",
        as_attachment=True,
        download_name="fuentes_url_backup.csv",
    )


@app.route("/restore_urls", methods=["GET", "POST"])
def restore_urls():
    if request.method == "GET":
        return render_template("restore_urls.html")

    file = request.files.get("file")
    if not file:
        flash("No se ha subido ningún archivo.", "error")
        return redirect(url_for("restore_urls"))

    try:
        content = file.stream.read().decode("utf-8", errors="ignore")
        reader = csv.DictReader(io.StringIO(content))
    except Exception as e:
        flash(f"Error leyendo el CSV: {e}", "error")
        return redirect(url_for("restore_urls"))

    conn = get_conn()
    try:
        with conn.cursor() as cur:
            for row in reader:
                try:
                    cur.execute(
                        """
                        INSERT INTO fuentes_url (id, nombre, url, categoria_id, pais_id, idioma)
                        VALUES (%s,%s,%s,%s,%s,%s)
                        ON CONFLICT (id) DO UPDATE
                          SET nombre = EXCLUDED.nombre,
                              url    = EXCLUDED.url,
                              categoria_id = EXCLUDED.categoria_id,
                              pais_id      = EXCLUDED.pais_id,
                              idioma       = EXCLUDED.idioma;
                        """,
                        (
                            int(row["id"]),
                            row["nombre"],
                            row["url"],
                            int(row["categoria_id"]) if row.get("categoria_id") else None,
                            int(row["pais_id"]) if row.get("pais_id") else None,
                            (row.get("idioma") or "es").strip().lower()[:2],
                        ),
                    )
                    conn.commit()
                except psycopg2.Error as e:
                    print("Error restaurando fuente_url:", e)
                    conn.rollback()
    finally:
        conn.close()

    flash("Importación de fuentes URL completada (con posibles errores en algunas filas).", "success")
    return redirect(url_for("dashboard"))


@app.route("/urls/add", methods=["GET", "POST"])
def add_url():
    with get_conn() as conn:
        conn.autocommit = True
        categorias = get_categorias(conn)
        paises = get_paises(conn)

        if request.method == "POST":
            url = request.form.get("url")
            categoria_id = request.form.get("categoria_id") or None
            pais_id = request.form.get("pais_id") or None

            if not url:
                flash("Debes indicar una URL.", "error")
                return redirect(url_for("add_url"))

            try:
                from newspaper import Article
            except ImportError:
                flash("La librería newspaper3k no está instalada en este entorno.", "error")
                return redirect(url_for("add_url"))

            try:
                art = Article(url)
                art.download()
                art.parse()
                titulo = art.title or url
                resumen = (art.summary or "")[:2000] if hasattr(art, "summary") else None
                imagen_url = art.top_image or None

                with conn.cursor() as cur:
                    cur.execute(
                        """
                        INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url,
                                              fuente_nombre, categoria_id, pais_id)
                        VALUES (md5(%s), %s, %s, %s, NOW(), %s, %s, %s, %s)
                        ON CONFLICT (url) DO NOTHING;
                        """,
                        (
                            url,
                            titulo,
                            resumen,
                            url,
                            imagen_url,
                            None,
                            int(categoria_id) if categoria_id else None,
                            int(pais_id) if pais_id else None,
                        ),
                    )
                flash("Noticia añadida desde URL.", "success")
                return redirect(url_for("home"))
            except Exception as e:
                flash(f"Error al scrapear la URL: {e}", "error")
                return redirect(url_for("add_url"))

    return render_template("add_url.html", categorias=categorias, paises=paises)


@app.route("/urls/scrape", methods=["GET", "POST"])
def scrape_url():
    with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
        cur.execute("SELECT id, nombre FROM fuentes_url ORDER BY nombre;")
        fuentes = cur.fetchall()

    if request.method == "POST":
        source_id = request.form.get("source_id")
        if not source_id:
            flash("Debes seleccionar una fuente.", "error")
            return redirect(url_for("scrape_url"))

        flash("Scrapeo desde fuente aún no implementado (stub).", "warning")
        return redirect(url_for("dashboard"))

    return render_template("scrape_url.html", fuentes=fuentes)


@app.route("/backup_completo")
def backup_completo():
    mem_file = io.BytesIO()
    with zipfile.ZipFile(mem_file, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
        with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute("SELECT * FROM feeds ORDER BY id;")
            rows = cur.fetchall()
        buf = io.StringIO()
        writer = csv.writer(buf)
        if rows:
            writer.writerow(rows[0].keys())
            for r in rows:
                writer.writerow(list(r.values()))
        zf.writestr("feeds.csv", buf.getvalue())

        with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
            cur.execute("SELECT * FROM fuentes_url ORDER BY id;")
            rows = cur.fetchall()
        buf2 = io.StringIO()
        writer2 = csv.writer(buf2)
        if rows:
            writer2.writerow(rows[0].keys())
            for r in rows:
                writer2.writerow(list(r.values()))
        zf.writestr("fuentes_url.csv", buf2.getvalue())

    mem_file.seek(0)
    return send_file(
        mem_file,
        mimetype="application/zip",
        as_attachment=True,
        download_name="backup_completo_rss.zip",
    )


@app.route("/restore_completo", methods=["GET", "POST"])
def restore_completo():
    if request.method == "GET":
        return render_template("restore_completo.html")

    file = request.files.get("backup_file")
    if not file or file.filename == "":
        flash("No se ha seleccionado ningún archivo.", "error")
        return redirect(url_for("restore_completo"))

    filename = file.filename.lower()
    if not filename.endswith(".zip"):
        flash("El archivo debe ser un .zip.", "error")
        return redirect(url_for("restore_completo"))

    raw = file.read()
    try:
        zf = zipfile.ZipFile(io.BytesIO(raw))
    except zipfile.BadZipFile:
        flash("El archivo no es un .zip válido.", "error")
        return redirect(url_for("restore_completo"))

    restored_counts = {}

    conn = get_conn()
    try:
        with conn:
            with conn.cursor() as cur:
                if "feeds.csv" in zf.namelist():
                    cur.execute("TRUNCATE TABLE feeds RESTART IDENTITY;")
                    with zf.open("feeds.csv") as f:
                        text_f = io.TextIOWrapper(f, encoding="utf-8")
                        cur.copy_expert("COPY feeds FROM STDIN CSV HEADER", text_f)
                    restored_counts["feeds"] = cur.rowcount if cur.rowcount is not None else 0

                if "fuentes_url.csv" in zf.namelist():
                    cur.execute("TRUNCATE TABLE fuentes_url RESTART IDENTITY;")
                    with zf.open("fuentes_url.csv") as f2:
                        text_f2 = io.TextIOWrapper(f2, encoding="utf-8")
                        cur.copy_expert("COPY fuentes_url FROM STDIN CSV HEADER", text_f2)
                    restored_counts["fuentes_url"] = cur.rowcount if cur.rowcount is not None else 0
    except Exception as e:
        conn.rollback()
        conn.close()
        flash(f"Error al restaurar el backup: {e}", "error")
        return redirect(url_for("restore_completo"))

    conn.close()

    if restored_counts:
        partes = [f"{tabla}: {n} filas" for tabla, n in restored_counts.items()]
        flash("Restauración completada: " + ", ".join(partes), "success")
    else:
        flash("Backup procesado pero no se encontraron ficheros reconocidos (feeds.csv, fuentes_url.csv).", "warning")

    return redirect(url_for("dashboard"))


@app.route("/eventos_pais")
def eventos_pais():
    pais_id = request.args.get("pais_id") or None
    page = max(int(request.args.get("page", 1) or 1), 1)
    per_page = 30
    offset = (page - 1) * per_page
    lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]

    with get_conn() as conn:
        conn.autocommit = True
        paises = get_paises(conn)

        eventos = []
        total_eventos = 0
        noticias_por_evento = {}
        pais_nombre = None

        if pais_id:
            with conn.cursor(cursor_factory=extras.DictCursor) as cur:
                # 1) Eventos que tienen al menos una traducción cuya noticia es de ese país
                cur.execute(
                    """
                    SELECT
                      e.id,
                      e.titulo,
                      e.fecha_inicio,
                      e.fecha_fin,
                      e.n_noticias,
                      MAX(p.nombre) AS pais_nombre
                    FROM eventos e
                    JOIN traducciones t ON t.evento_id = e.id
                    JOIN noticias n     ON n.id = t.noticia_id
                    JOIN paises p       ON p.id = n.pais_id
                    WHERE n.pais_id = %s
                    GROUP BY e.id, e.titulo, e.fecha_inicio, e.fecha_fin, e.n_noticias
                    ORDER BY e.fecha_inicio DESC NULLS LAST, e.id DESC
                    LIMIT %s OFFSET %s;
                    """,
                    (int(pais_id), per_page, offset),
                )
                eventos = cur.fetchall()

                # 2) Total de eventos distintos para ese país
                cur.execute(
                    """
                    SELECT COUNT(DISTINCT e.id)
                    FROM eventos e
                    JOIN traducciones t ON t.evento_id = e.id
                    JOIN noticias n     ON n.id = t.noticia_id
                    WHERE n.pais_id = %s;
                    """,
                    (int(pais_id),),
                )
                total_eventos = cur.fetchone()[0] if cur.rowcount else 0

                # 3) Cargar noticias asociadas a esos eventos (desde traducciones + noticias)
                if eventos:
                    evento_ids = [e["id"] for e in eventos]

                    cur.execute(
                        """
                        SELECT
                          t.evento_id,
                          n.id   AS noticia_id,
                          n.url,
                          n.fecha,
                          n.imagen_url,
                          n.fuente_nombre,
                          n.titulo  AS titulo_orig,
                          n.resumen AS resumen_orig,
                          t.id           AS traduccion_id,
                          t.titulo_trad  AS titulo_trad,
                          t.resumen_trad AS resumen_trad,
                          p.nombre       AS pais_nombre
                        FROM traducciones t
                        JOIN noticias n ON n.id = t.noticia_id
                        LEFT JOIN paises p ON p.id = n.pais_id
                        WHERE t.evento_id = ANY(%s)
                          AND t.status = 'done'
                          AND t.lang_to = %s
                        ORDER BY t.evento_id, n.fecha DESC;
                        """,
                        (evento_ids, lang),
                    )
                    rows = cur.fetchall()

                    noticias_por_evento = {e["id"]: [] for e in eventos}
                    for r in rows:
                        noticias_por_evento.setdefault(r["evento_id"], []).append(r)

                    # Nombre del país (todos los eventos en esta vista son del mismo país filtrado)
                    pais_nombre = eventos[0]["pais_nombre"]
                else:
                    # Si no hay eventos, al menos sacamos el nombre del país desde la lista
                    for p in paises:
                        if p["id"] == int(pais_id):
                            pais_nombre = p["nombre"]
                            break

    total_pages = (total_eventos // per_page) + (1 if total_eventos % per_page else 0)

    return render_template(
        "eventos_pais.html",
        paises=paises,
        eventos=eventos,
        noticias_por_evento=noticias_por_evento,
        pais_id=int(pais_id) if pais_id else None,
        pais_nombre=pais_nombre,
        total_eventos=total_eventos,
        total_pages=total_pages,
        page=page,
        lang=lang,
    )


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8001, debug=True)