Initial clean commit

This commit is contained in:
jlimolina 2026-01-13 13:39:51 +01:00
commit 6784d81c2c
141 changed files with 25219 additions and 0 deletions

3
routers/__init__.py Normal file
View file

@ -0,0 +1,3 @@
# routes/__init__.py
# Necesario para que Python lo trate como un paquete.

267
routers/account.py Normal file
View file

@ -0,0 +1,267 @@
"""
Account management router - User profile and account settings.
"""
from flask import Blueprint, render_template, request, redirect, url_for, flash, jsonify
from psycopg2 import extras
from db import get_conn
from utils.auth import get_current_user, login_required, hash_password, verify_password, validate_password
from datetime import datetime
account_bp = Blueprint("account", __name__, url_prefix="/account")
@account_bp.route("/")
@login_required
def index():
"""User account dashboard."""
user = get_current_user()
if not user:
return redirect(url_for('auth.login'))
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Get favorites count
cur.execute("""
SELECT COUNT(*) as count
FROM favoritos
WHERE user_id = %s
""", (user['id'],))
favorites_count = cur.fetchone()['count']
# Get search history count
cur.execute("""
SELECT COUNT(*) as count
FROM search_history
WHERE user_id = %s
""", (user['id'],))
searches_count = cur.fetchone()['count']
# Get recent searches (last 10)
cur.execute("""
SELECT query, results_count, searched_at
FROM search_history
WHERE user_id = %s
ORDER BY searched_at DESC
LIMIT 10
""", (user['id'],))
recent_searches = cur.fetchall()
# Get recent favorites (last 5)
cur.execute("""
SELECT n.id, n.titulo, n.imagen_url, f.created_at,
t.titulo_trad, t.id AS traduccion_id
FROM favoritos f
JOIN noticias n ON n.id = f.noticia_id
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' AND t.status = 'done'
WHERE f.user_id = %s
ORDER BY f.created_at DESC
LIMIT 5
""", (user['id'],))
recent_favorites = cur.fetchall()
return render_template("account.html",
user=user,
favorites_count=favorites_count,
searches_count=searches_count,
recent_searches=recent_searches,
recent_favorites=recent_favorites)
@account_bp.route("/search-history")
@login_required
def search_history():
"""Full search history page."""
user = get_current_user()
if not user:
return redirect(url_for('auth.login'))
page = max(1, int(request.args.get('page', 1)))
per_page = 50
offset = (page - 1) * per_page
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Get total count
cur.execute("""
SELECT COUNT(*) as count
FROM search_history
WHERE user_id = %s
""", (user['id'],))
total = cur.fetchone()['count']
# Get paginated results
cur.execute("""
SELECT query, results_count, searched_at
FROM search_history
WHERE user_id = %s
ORDER BY searched_at DESC
LIMIT %s OFFSET %s
""", (user['id'], per_page, offset))
searches = cur.fetchall()
total_pages = (total + per_page - 1) // per_page
return render_template("search_history.html",
user=user,
searches=searches,
page=page,
total_pages=total_pages,
total=total)
@account_bp.route("/change-password", methods=["POST"])
@login_required
def change_password():
"""Change user password."""
user = get_current_user()
if not user:
return redirect(url_for('auth.login'))
current_password = request.form.get("current_password", "")
new_password = request.form.get("new_password", "")
new_password_confirm = request.form.get("new_password_confirm", "")
# Validation
if not current_password or not new_password:
flash("Por favor completa todos los campos", "danger")
return redirect(url_for('account.index'))
valid_password, password_error = validate_password(new_password)
if not valid_password:
flash(password_error, "danger")
return redirect(url_for('account.index'))
if new_password != new_password_confirm:
flash("Las contraseñas nuevas no coinciden", "danger")
return redirect(url_for('account.index'))
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Verify current password
cur.execute("""
SELECT password_hash
FROM usuarios
WHERE id = %s
""", (user['id'],))
result = cur.fetchone()
if not result or not verify_password(current_password, result['password_hash']):
flash("La contraseña actual es incorrecta", "danger")
return redirect(url_for('account.index'))
# Update password
new_hash = hash_password(new_password)
cur.execute("""
UPDATE usuarios
SET password_hash = %s, updated_at = NOW()
WHERE id = %s
""", (new_hash, user['id']))
conn.commit()
flash("Contraseña actualizada exitosamente", "success")
except Exception as e:
flash("Error al actualizar la contraseña", "danger")
return redirect(url_for('account.index'))
@account_bp.route("/upload-avatar", methods=["POST"])
@login_required
def upload_avatar():
"""Upload user avatar."""
import os
import secrets
from werkzeug.utils import secure_filename
from flask import current_app
user = get_current_user()
if not user:
return redirect(url_for('auth.login'))
if 'avatar' not in request.files:
flash("No se seleccionó ningún archivo", "danger")
return redirect(url_for('account.index'))
file = request.files['avatar']
if file.filename == '':
flash("No se seleccionó ningún archivo", "danger")
return redirect(url_for('account.index'))
if file:
# Check extension
allowed_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp'}
_, ext = os.path.splitext(file.filename)
if ext.lower() not in allowed_extensions:
flash("Formato de imagen no permitido. Usa JPG, PNG, GIF o WEBP.", "danger")
return redirect(url_for('account.index'))
# Save file
try:
# Create filename using user ID and random partial to avoid caching issues
random_hex = secrets.token_hex(4)
filename = f"user_{user['id']}_{random_hex}{ext.lower()}"
# Ensure upload folder exists
upload_folder = os.path.join(current_app.root_path, 'static/uploads/avatars')
os.makedirs(upload_folder, exist_ok=True)
# Delete old avatar if exists
if user.get('avatar_url'):
old_path = os.path.join(current_app.root_path, user['avatar_url'].lstrip('/'))
if os.path.exists(old_path) and 'user_' in old_path: # Safety check
try:
os.remove(old_path)
except:
pass
file_path = os.path.join(upload_folder, filename)
file.save(file_path)
# Update DB
relative_path = f"/static/uploads/avatars/{filename}"
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
UPDATE usuarios
SET avatar_url = %s, updated_at = NOW()
WHERE id = %s
""", (relative_path, user['id']))
conn.commit()
# Update session
from flask import session
session['avatar_url'] = relative_path
flash("Foto de perfil actualizada", "success")
except Exception as e:
print(f"Error uploading avatar: {e}")
flash("Error al subir la imagen", "danger")
return redirect(url_for('account.index'))
@account_bp.route("/stats")
@login_required
def stats():
"""Get user statistics as JSON."""
user = get_current_user()
if not user:
return jsonify({"error": "Not authenticated"}), 401
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("""
SELECT
(SELECT COUNT(*) FROM favoritos WHERE user_id = %s) as favorites_count,
(SELECT COUNT(*) FROM search_history WHERE user_id = %s) as searches_count,
(SELECT MAX(searched_at) FROM search_history WHERE user_id = %s) as last_search
""", (user['id'], user['id'], user['id']))
stats = cur.fetchone()
return jsonify({
"favorites_count": stats['favorites_count'],
"searches_count": stats['searches_count'],
"last_search": stats['last_search'].isoformat() if stats['last_search'] else None
})

203
routers/auth.py Normal file
View file

@ -0,0 +1,203 @@
"""
Authentication router - User registration, login, and logout.
"""
from flask import Blueprint, request, render_template, redirect, url_for, session, flash
from psycopg2 import extras, IntegrityError
from db import get_conn
from utils.auth import (
hash_password, verify_password, is_authenticated,
validate_username, validate_password, validate_email
)
from datetime import datetime
auth_bp = Blueprint("auth", __name__, url_prefix="/auth")
def migrate_anonymous_favorites(session_id: str, user_id: int):
"""Migrate anonymous favorites to user account.
Args:
session_id: Anonymous session ID
user_id: User ID to migrate favorites to
"""
if not session_id:
return
with get_conn() as conn:
with conn.cursor() as cur:
# Migrate favorites, avoiding duplicates
cur.execute("""
UPDATE favoritos
SET user_id = %s, session_id = NULL
WHERE session_id = %s
AND noticia_id NOT IN (
SELECT noticia_id FROM favoritos WHERE user_id = %s
)
""", (user_id, session_id, user_id))
# Delete any remaining duplicates
cur.execute("""
DELETE FROM favoritos
WHERE session_id = %s
""", (session_id,))
conn.commit()
# ============================================================
# Registration
# ============================================================
@auth_bp.route("/register", methods=["GET", "POST"])
def register():
"""User registration page and handler."""
if is_authenticated():
return redirect(url_for('account.index'))
if request.method == "POST":
username = request.form.get("username", "").strip()
email = request.form.get("email", "").strip().lower()
password = request.form.get("password", "")
password_confirm = request.form.get("password_confirm", "")
# Validation
valid_username, username_error = validate_username(username)
if not valid_username:
flash(username_error, "danger")
return render_template("register.html", username=username, email=email)
valid_email, email_error = validate_email(email)
if not valid_email:
flash(email_error, "danger")
return render_template("register.html", username=username, email=email)
valid_password, password_error = validate_password(password)
if not valid_password:
flash(password_error, "danger")
return render_template("register.html", username=username, email=email)
if password != password_confirm:
flash("Las contraseñas no coinciden", "danger")
return render_template("register.html", username=username, email=email)
# Create user
try:
password_hash = hash_password(password)
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO usuarios (username, email, password_hash, last_login)
VALUES (%s, %s, %s, NOW())
RETURNING id
""", (username, email, password_hash))
user_id = cur.fetchone()[0]
conn.commit()
# Auto-login after registration
old_session_id = session.get('user_session')
session['user_id'] = user_id
session['username'] = username
# Migrate anonymous favorites if any
if old_session_id:
migrate_anonymous_favorites(old_session_id, user_id)
session.pop('user_session', None)
flash(f"¡Bienvenido {username}! Tu cuenta ha sido creada exitosamente.", "success")
return redirect(url_for('account.index'))
except IntegrityError as e:
if 'username' in str(e):
flash("Este nombre de usuario ya está en uso", "danger")
elif 'email' in str(e):
flash("Este email ya está registrado", "danger")
else:
flash("Error al crear la cuenta. Por favor intenta de nuevo.", "danger")
return render_template("register.html", username=username, email=email)
except Exception as e:
flash("Error al crear la cuenta. Por favor intenta de nuevo.", "danger")
return render_template("register.html", username=username, email=email)
return render_template("register.html")
# ============================================================
# Login
# ============================================================
@auth_bp.route("/login", methods=["GET", "POST"])
def login():
"""User login page and handler."""
if is_authenticated():
return redirect(url_for('account.index'))
if request.method == "POST":
username_or_email = request.form.get("username", "").strip()
password = request.form.get("password", "")
if not username_or_email or not password:
flash("Por favor ingresa tu usuario/email y contraseña", "danger")
return render_template("login.html", username=username_or_email)
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Try login with username or email
cur.execute("""
SELECT id, username, email, password_hash, is_active, avatar_url
FROM usuarios
WHERE (username = %s OR email = %s) AND is_active = TRUE
""", (username_or_email, username_or_email.lower()))
user = cur.fetchone()
if not user:
flash("Usuario o contraseña incorrectos", "danger")
return render_template("login.html", username=username_or_email)
if not verify_password(password, user['password_hash']):
flash("Usuario o contraseña incorrectos", "danger")
return render_template("login.html", username=username_or_email)
# Update last login
cur.execute("""
UPDATE usuarios SET last_login = NOW() WHERE id = %s
""", (user['id'],))
conn.commit()
# Create session
old_session_id = session.get('user_session')
session['user_id'] = user['id']
session['username'] = user['username']
session['avatar_url'] = user.get('avatar_url')
# Migrate anonymous favorites
if old_session_id:
migrate_anonymous_favorites(old_session_id, user['id'])
session.pop('user_session', None)
flash(f"¡Bienvenido de vuelta, {user['username']}!", "success")
# Redirect to 'next' parameter if exists
next_page = request.args.get('next')
if next_page and next_page.startswith('/'):
return redirect(next_page)
return redirect(url_for('account.index'))
except Exception as e:
flash("Error al iniciar sesión. Por favor intenta de nuevo.", "danger")
return render_template("login.html", username=username_or_email)
return render_template("login.html")
# ============================================================
# Logout
# ============================================================
@auth_bp.route("/logout", methods=["POST", "GET"])
def logout():
"""Log out the current user."""
username = session.get('username', 'Usuario')
session.clear()
flash(f"Hasta luego, {username}. Has cerrado sesión exitosamente.", "info")
return redirect(url_for('home.index'))

353
routers/backup.py Normal file
View file

@ -0,0 +1,353 @@
from flask import Blueprint, send_file, render_template, request, flash, redirect, url_for
import csv
import io
from psycopg2 import extras
from db import get_conn
backup_bp = Blueprint("backup", __name__)
# ============================================================
# EXPORTAR FEEDS → CSV (OK)
# ============================================================
@backup_bp.route("/backup_feeds")
def backup_feeds():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("""
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, c.nombre AS categoria,
f.pais_id, p.nombre AS pais,
f.idioma, f.activo, f.fallos
FROM feeds f
LEFT JOIN categorias c ON c.id=f.categoria_id
LEFT JOIN paises p ON p.id=f.pais_id
ORDER BY f.id;
""")
rows = cur.fetchall()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"id", "nombre", "descripcion", "url",
"categoria_id", "categoria",
"pais_id", "pais",
"idioma", "activo", "fallos"
])
for r in rows:
writer.writerow([
r["id"],
r["nombre"],
r["descripcion"] or "",
r["url"],
r["categoria_id"] or "",
r["categoria"] or "",
r["pais_id"] or "",
r["pais"] or "",
r["idioma"] or "",
r["activo"],
r["fallos"],
])
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="feeds_backup.csv",
)
# ============================================================
# EXPORTAR FEEDS FILTRADOS → CSV
# ============================================================
@backup_bp.route("/export_feeds_filtered")
def export_feeds_filtered():
"""Exportar feeds con filtros opcionales (país, categoría, estado)."""
pais_id = request.args.get("pais_id")
categoria_id = request.args.get("categoria_id")
estado = request.args.get("estado") or ""
# Construir filtros WHERE (misma lógica que list_feeds)
where = []
params = []
if pais_id:
where.append("f.pais_id = %s")
params.append(int(pais_id))
if categoria_id:
where.append("f.categoria_id = %s")
params.append(int(categoria_id))
if estado == "activos":
where.append("f.activo = TRUE")
elif estado == "inactivos":
where.append("f.activo = FALSE")
elif estado == "errores":
where.append("COALESCE(f.fallos, 0) > 0")
where_sql = "WHERE " + " AND ".join(where) if where else ""
# Query SQL con filtros
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(f"""
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, c.nombre AS categoria,
f.pais_id, p.nombre AS pais,
f.idioma, f.activo, f.fallos
FROM feeds f
LEFT JOIN categorias c ON c.id=f.categoria_id
LEFT JOIN paises p ON p.id=f.pais_id
{where_sql}
ORDER BY p.nombre NULLS LAST, c.nombre NULLS LAST, f.nombre;
""", params)
rows = cur.fetchall()
# Obtener nombres para el archivo
pais_nombre = None
categoria_nombre = None
if pais_id:
cur.execute("SELECT nombre FROM paises WHERE id = %s", (int(pais_id),))
result = cur.fetchone()
if result:
pais_nombre = result["nombre"]
if categoria_id:
cur.execute("SELECT nombre FROM categorias WHERE id = %s", (int(categoria_id),))
result = cur.fetchone()
if result:
categoria_nombre = result["nombre"]
# Generar CSV
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"id", "nombre", "descripcion", "url",
"categoria_id", "categoria",
"pais_id", "pais",
"idioma", "activo", "fallos"
])
for r in rows:
writer.writerow([
r["id"],
r["nombre"],
r["descripcion"] or "",
r["url"],
r["categoria_id"] or "",
r["categoria"] or "",
r["pais_id"] or "",
r["pais"] or "",
r["idioma"] or "",
r["activo"],
r["fallos"],
])
# Generar nombre de archivo dinámico
filename_parts = ["feeds"]
if pais_nombre:
# Limpiar nombre de país para usar en archivo
clean_pais = pais_nombre.lower().replace(" ", "_").replace("/", "_")
filename_parts.append(clean_pais)
if categoria_nombre:
clean_cat = categoria_nombre.lower().replace(" ", "_").replace("/", "_")
filename_parts.append(clean_cat)
if estado:
filename_parts.append(estado)
filename = "_".join(filename_parts) + ".csv"
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name=filename,
)
# ============================================================
# RESTAURAR FEEDS → CSV (VERSIÓN PROFESIONAL)
# ============================================================
@backup_bp.route("/restore_feeds", methods=["GET", "POST"])
def restore_feeds():
if request.method == "GET":
return render_template("restore_feeds.html")
file = request.files.get("file")
if not file:
flash("Debes seleccionar un archivo CSV.", "error")
return redirect(url_for("backup.restore_feeds"))
# 1) Leer CSV
try:
raw = file.read().decode("utf-8-sig").replace("\ufeff", "")
reader = csv.DictReader(io.StringIO(raw))
except Exception as e:
flash(f"Error al procesar CSV: {e}", "error")
return redirect(url_for("backup.restore_feeds"))
expected_fields = [
"id", "nombre", "descripcion", "url",
"categoria_id", "categoria",
"pais_id", "pais",
"idioma", "activo", "fallos"
]
if reader.fieldnames != expected_fields:
flash("El CSV no tiene el encabezado correcto.", "error")
return redirect(url_for("backup.restore_feeds"))
# Contadores
imported = 0
skipped = 0
failed = 0
with get_conn() as conn:
with conn.cursor() as cur:
# Vaciar tabla ELIMINADO para no borrar feeds existentes
# cur.execute("TRUNCATE feeds RESTART IDENTITY CASCADE;")
for row in reader:
# Limpieza general
row = {k: (v.strip().rstrip("ç") if isinstance(v, str) else v) for k, v in row.items()}
# Validaciones mínimas
if not row["url"] or not row["nombre"]:
skipped += 1
continue
try:
# Creating a savepoint to isolate this row's transaction
cur.execute("SAVEPOINT row_savepoint")
# Normalizar valores
categoria_id = int(row["categoria_id"]) if row["categoria_id"] else None
pais_id = int(row["pais_id"]) if row["pais_id"] else None
idioma = (row["idioma"] or "").lower().strip()
idioma = idioma[:2] if idioma else None
activo = str(row["activo"]).lower() in ("true", "1", "t", "yes", "y")
fallos = int(row["fallos"] or 0)
# Buscar si ya existe un feed con esta URL
cur.execute("SELECT id FROM feeds WHERE url = %s", (row["url"],))
existing_feed = cur.fetchone()
if existing_feed:
# URL ya existe -> ACTUALIZAR el feed existente
cur.execute("""
UPDATE feeds SET
nombre=%s,
descripcion=%s,
categoria_id=%s,
pais_id=%s,
idioma=%s,
activo=%s,
fallos=%s
WHERE id=%s
""", (
row["nombre"],
row["descripcion"] or None,
categoria_id,
pais_id,
idioma,
activo,
fallos,
existing_feed[0]
))
else:
# URL no existe -> INSERTAR NUEVO feed (ignorar ID del CSV, usar auto-increment)
cur.execute("""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
""", (
row["nombre"],
row["descripcion"] or None,
row["url"],
categoria_id,
pais_id,
idioma,
activo,
fallos
))
cur.execute("RELEASE SAVEPOINT row_savepoint")
imported += 1
except Exception as e:
# If any error happens, rollback to the savepoint so the main transaction isn't aborted
cur.execute("ROLLBACK TO SAVEPOINT row_savepoint")
failed += 1
continue
# No need to reset sequence - auto-increment handles it
conn.commit()
flash(
f"Restauración completada. "
f"Importados: {imported} | Saltados: {skipped} | Fallidos: {failed}",
"success"
)
return redirect(url_for("feeds.list_feeds"))
# ============================================================
# EXPORTAR METADATOS (PAISES / CATEGORIAS)
# ============================================================
@backup_bp.route("/export_paises")
def export_paises():
"""Exportar listado de países a CSV."""
with get_conn() as conn, conn.cursor() as cur:
cur.execute("SELECT id, nombre FROM paises ORDER BY id;")
rows = cur.fetchall()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(["id", "nombre"])
for r in rows:
writer.writerow([r[0], r[1]])
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="paises.csv",
)
@backup_bp.route("/export_categorias")
def export_categorias():
"""Exportar listado de categorías a CSV."""
with get_conn() as conn, conn.cursor() as cur:
cur.execute("SELECT id, nombre FROM categorias ORDER BY id;")
rows = cur.fetchall()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(["id", "nombre"])
for r in rows:
writer.writerow([r[0], r[1]])
output.seek(0)
return send_file(
io.BytesIO(output.getvalue().encode("utf-8")),
mimetype="text/csv",
as_attachment=True,
download_name="categorias.csv",
)

216
routers/config.py Normal file
View file

@ -0,0 +1,216 @@
from flask import Blueprint, render_template, request, redirect, url_for, flash, Response, stream_with_context
from datetime import datetime
import json
import zipfile
import io
from db import get_conn
from psycopg2 import extras
config_bp = Blueprint("config", __name__, url_prefix="/config")
@config_bp.route("/")
def config_home():
return render_template("config.html")
import tempfile
import os
import shutil
import threading
import uuid
import time
from flask import send_file, jsonify
from cache import cache_set, cache_get
# Global dictionary to store temporary file paths (optional, but Redis is safer for clustered env)
# Since we are in a single-server Docker setup, a global dict is fine for paths if we don't restart.
# But for absolute safety, we'll store paths in Redis too.
BACKUP_TASKS = {}
@config_bp.route("/backup/start")
def backup_start():
task_id = str(uuid.uuid4())
cache_set(f"backup_status:{task_id}", {"progress": 0, "total": 0, "status": "initializing"})
# Start thread
thread = threading.Thread(target=_backup_worker, args=(task_id,))
thread.daemon = True
thread.start()
return jsonify({"task_id": task_id})
@config_bp.route("/backup/status/<task_id>")
def backup_status(task_id):
status = cache_get(f"backup_status:{task_id}")
if not status:
return jsonify({"error": "Task not found"}), 404
return jsonify(status)
@config_bp.route("/backup/download/<task_id>")
def backup_download(task_id):
status = cache_get(f"backup_status:{task_id}")
if not status or status.get("status") != "completed":
return "Archivo no listo o expirado", 404
file_path = status.get("file_path")
if not file_path or not os.path.exists(file_path):
return "Archivo no encontrado", 404
filename = f"backup_noticias_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
return send_file(file_path, as_attachment=True, download_name=filename)
import io
def _backup_worker(task_id):
"""Background thread to generate the backup ZIP with direct streaming."""
print(f"[BACKUP {task_id}] Inicia proceso...")
try:
tmp_dir = tempfile.mkdtemp()
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
zip_path = os.path.join(tmp_dir, f"backup_{timestamp}.zip")
from db import get_read_conn # Use replica for large reads
with get_read_conn() as conn:
# 1. Count totals for progress
print(f"[BACKUP {task_id}] Contando registros...")
with conn.cursor() as cur:
cur.execute("SELECT count(*) FROM noticias")
total_n = cur.fetchone()[0]
cur.execute("SELECT count(*) FROM traducciones WHERE status = 'done'")
total_t = cur.fetchone()[0]
total_total = total_n + total_t
print(f"[BACKUP {task_id}] Total registros: {total_total}")
cache_set(f"backup_status:{task_id}", {"progress": 0, "total": total_total, "status": "processing"})
processed = 0
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
# --- NOTICIAS ---
print(f"[BACKUP {task_id}] Exportando noticias...")
with zf.open("noticias.jsonl", "w") as bf:
# Wrap binary file for text writing
with io.TextIOWrapper(bf, encoding='utf-8') as f:
with conn.cursor(name=f'bak_n_{task_id}', cursor_factory=extras.DictCursor) as cur:
cur.itersize = 2000
cur.execute("SELECT id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id FROM noticias")
for row in cur:
item = dict(row)
if item.get("fecha"): item["fecha"] = item["fecha"].isoformat()
f.write(json.dumps(item, ensure_ascii=False) + "\n")
processed += 1
if processed % 2000 == 0:
cache_set(f"backup_status:{task_id}", {"progress": processed, "total": total_total, "status": "processing"})
# --- TRADUCCIONES ---
print(f"[BACKUP {task_id}] Exportando traducciones...")
with zf.open("traducciones.jsonl", "w") as bf:
with io.TextIOWrapper(bf, encoding='utf-8') as f:
with conn.cursor(name=f'bak_t_{task_id}', cursor_factory=extras.DictCursor) as cur:
cur.itersize = 2000
cur.execute("SELECT id, noticia_id, lang_from, lang_to, titulo_trad, resumen_trad, status, created_at FROM traducciones WHERE status = 'done'")
for row in cur:
item = dict(row)
if item.get("created_at"): item["created_at"] = item["created_at"].isoformat()
f.write(json.dumps(item, ensure_ascii=False) + "\n")
processed += 1
if processed % 2000 == 0:
cache_set(f"backup_status:{task_id}", {"progress": processed, "total": total_total, "status": "processing"})
print(f"[BACKUP {task_id}] Finalizado con éxito: {zip_path}")
cache_set(f"backup_status:{task_id}", {
"progress": total_total,
"total": total_total,
"status": "completed",
"file_path": zip_path
}, ttl_seconds=3600)
except Exception as e:
import traceback
error_msg = traceback.format_exc()
print(f"[BACKUP {task_id}] ERROR: {error_msg}")
cache_set(f"backup_status:{task_id}", {"status": "error", "error": str(e)})
@config_bp.route("/restore/noticias", methods=["GET", "POST"])
def restore_noticias():
# Keep current restore logic but maybe add progress too?
# For now let's focus on fix the client's immediate backup download issue.
if request.method == "GET":
return render_template("config_restore.html")
file = request.files.get("file")
if not file:
flash("Debes seleccionar un archivo ZIP.", "error")
return redirect(url_for("config.restore_noticias"))
if not file.filename.endswith(".zip"):
flash("El formato debe ser .zip", "error")
return redirect(url_for("config.restore_noticias"))
imported_n = 0
imported_t = 0
tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
file.save(tmp_zip.name)
tmp_zip.close()
try:
with zipfile.ZipFile(tmp_zip.name, "r") as zf:
if "noticias.jsonl" in zf.namelist():
with zf.open("noticias.jsonl") as f:
chunk = []
for line in f:
chunk.append(json.loads(line.decode("utf-8")))
if len(chunk) >= 500:
_import_noticias_chunk(chunk)
imported_n += len(chunk)
chunk = []
if chunk:
_import_noticias_chunk(chunk)
imported_n += len(chunk)
if "traducciones.jsonl" in zf.namelist():
with zf.open("traducciones.jsonl") as f:
chunk = []
for line in f:
chunk.append(json.loads(line.decode("utf-8")))
if len(chunk) >= 500:
_import_traducciones_chunk(chunk)
imported_t += len(chunk)
chunk = []
if chunk:
_import_traducciones_chunk(chunk)
imported_t += len(chunk)
finally:
if os.path.exists(tmp_zip.name):
os.remove(tmp_zip.name)
flash(f"Restauración completada: {imported_n} noticias, {imported_t} traducciones.", "success")
return redirect(url_for("config.config_home"))
def _import_noticias_chunk(chunk):
with get_conn() as conn:
with conn.cursor() as cur:
cur.executemany("""
INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id)
VALUES (%(id)s, %(titulo)s, %(resumen)s, %(url)s, %(fecha)s, %(imagen_url)s, %(fuente_nombre)s, %(categoria_id)s, %(pais_id)s)
ON CONFLICT (id) DO UPDATE SET
titulo = EXCLUDED.titulo,
resumen = EXCLUDED.resumen
""", chunk)
conn.commit()
def _import_traducciones_chunk(chunk):
with get_conn() as conn:
with conn.cursor() as cur:
cur.executemany("""
INSERT INTO traducciones (id, noticia_id, lang_from, lang_to, titulo_trad, resumen_trad, status, created_at)
VALUES (%(id)s, %(noticia_id)s, %(lang_from)s, %(lang_to)s, %(titulo_trad)s, %(resumen_trad)s, %(status)s, %(created_at)s)
ON CONFLICT (id) DO UPDATE SET
titulo_trad = EXCLUDED.titulo_trad,
resumen_trad = EXCLUDED.resumen_trad
""", chunk)
conn.commit()
@config_bp.route("/translator")
def translator_config():
return "Pagina de configuracion del modelo (pendiente de implementar)"

141
routers/conflicts.py Normal file
View file

@ -0,0 +1,141 @@
from flask import Blueprint, render_template, request, flash, redirect, url_for
from db import get_conn, get_read_conn
import psycopg2.extras
from utils.qdrant_search import search_by_keywords
conflicts_bp = Blueprint("conflicts", __name__, url_prefix="/conflicts")
def ensure_table(conn):
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS conflicts (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
keywords TEXT,
description TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
""")
conn.commit()
@conflicts_bp.route("/")
def index():
with get_conn() as conn:
ensure_table(conn)
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute("SELECT * FROM conflicts ORDER BY id DESC")
conflicts = cur.fetchall()
return render_template("conflicts_list.html", conflicts=conflicts)
@conflicts_bp.route("/create", methods=["POST"])
def create():
name = request.form.get("name")
keywords = request.form.get("keywords")
description = request.form.get("description", "")
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute(
"INSERT INTO conflicts (name, keywords, description) VALUES (%s, %s, %s)",
(name, keywords, description)
)
conn.commit()
flash("Conflicto creado correctamente.", "success")
return redirect(url_for("conflicts.index"))
@conflicts_bp.route("/<int:id>")
def timeline(id):
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute("SELECT * FROM conflicts WHERE id = %s", (id,))
conflict = cur.fetchone()
if not conflict:
flash("Conflicto no encontrado.", "error")
return redirect(url_for("conflicts.index"))
# Keywords logic: comma separated
kw_raw = conflict['keywords'] or ""
kw_list = [k.strip() for k in kw_raw.split(',') if k.strip()]
noticias = []
if kw_list:
try:
# Usar búsqueda semántica por keywords (mucho más rápido y efectivo)
semantic_results = search_by_keywords(
keywords=kw_list,
limit=200,
score_threshold=0.35
)
# Enriquecer con datos de PostgreSQL
if semantic_results:
news_ids = [r['news_id'] for r in semantic_results]
with get_read_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute("""
SELECT
t.id AS tr_id,
t.lang_to,
COALESCE(t.titulo_trad, n.titulo) as titulo,
COALESCE(t.resumen_trad, n.resumen) as resumen,
n.id AS noticia_id,
n.fecha,
n.imagen_url,
n.fuente_nombre,
p.nombre as pais
FROM noticias n
LEFT JOIN traducciones t ON n.id = t.noticia_id AND t.lang_to = 'es'
LEFT JOIN paises p ON p.id = n.pais_id
WHERE n.id = ANY(%s)
ORDER BY n.fecha DESC
""", (news_ids,))
noticias = cur.fetchall()
except Exception as e:
print(f"⚠️ Error en búsqueda semántica de conflictos, usando fallback: {e}")
# Fallback a búsqueda tradicional ILIKE
patterns = [f"%{k}%" for k in kw_list]
with get_read_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute("""
SELECT
t.id AS tr_id,
t.lang_to,
COALESCE(t.titulo_trad, n.titulo) as titulo,
COALESCE(t.resumen_trad, n.resumen) as resumen,
n.id AS noticia_id,
n.fecha,
n.imagen_url,
n.fuente_nombre,
p.nombre as pais
FROM noticias n
LEFT JOIN traducciones t ON n.id = t.noticia_id AND t.lang_to = 'es'
LEFT JOIN paises p ON p.id = n.pais_id
WHERE
(t.titulo_trad ILIKE ANY(%s) OR n.titulo ILIKE ANY(%s))
OR
(t.resumen_trad ILIKE ANY(%s) OR n.resumen ILIKE ANY(%s))
ORDER BY n.fecha DESC
LIMIT 200
""", (patterns, patterns, patterns, patterns))
noticias = cur.fetchall()
return render_template("conflict_timeline.html", conflict=conflict, noticias=noticias)
@conflicts_bp.route("/delete/<int:id>", methods=["POST"])
def delete(id):
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM conflicts WHERE id = %s", (id,))
conn.commit()
flash("Conflicto eliminado.", "success")
return redirect(url_for("conflicts.index"))

494
routers/describe.txt Normal file
View file

@ -0,0 +1,494 @@
routers/
├── __init__.py # Paquete Python (vacío)
├── home.py # Página principal y búsqueda de noticias
├── feeds.py # Gestión de feeds RSS
├── urls.py # Gestión de fuentes de URL
├── noticia.py # Página de detalle de noticia
├── eventos.py # Visualización de eventos por país
└── backup.py # Importación/exportación de feeds
init.py
Propósito: Archivo necesario para que Python reconozca este directorio como un paquete.
Contenido: Vacío o comentario explicativo.
Uso: Permite importar blueprints desde routers:
python
from routers.home import home_bp
home.py
Propósito: Blueprint para la página principal y búsqueda de noticias.
Ruta base: / y /home
Blueprints definidos:
home_bp = Blueprint("home", __name__)
Rutas:
@home_bp.route("/") y @home_bp.route("/home")
Método: GET
Descripción: Página principal con sistema de búsqueda avanzada.
Parámetros de consulta soportados:
page: Número de página (default: 1)
per_page: Resultados por página (default: 20, range: 10-100)
q: Término de búsqueda
categoria_id: Filtrar por categoría
continente_id: Filtrar por continente
pais_id: Filtrar por país
fecha: Filtrar por fecha (YYYY-MM-DD)
lang: Idioma para mostrar (default: "es")
orig: Si está presente, mostrar sólo originales sin traducciones
Funcionalidades:
Paginación: Sistema robusto con límites
Búsqueda avanzada: Usa models.noticias.buscar_noticias()
Soporte AJAX: Si X-Requested-With: XMLHttpRequest, retorna solo _noticias_list.html
Filtros combinados: Todos los filtros pueden usarse simultáneamente
Manejo de fechas: Conversión segura de strings a date
Variables de contexto para template:
noticias: Lista de noticias con datos completos
total_results: Total de resultados
total_pages: Total de páginas
categorias, paises: Para dropdowns de filtros
tags_por_tr: Diccionario de tags por traducción
Templates utilizados:
noticias.html: Página completa (HTML)
_noticias_list.html: Fragmento para AJAX (solo lista de noticias)
Características especiales:
use_tr = not bool(request.args.get("orig")): Controla si mostrar traducciones
lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]: Manejo seguro de idioma
feeds.py
Propósito: Blueprint para la gestión completa de feeds RSS.
Ruta base: /feeds
Blueprints definidos:
feeds_bp = Blueprint("feeds", __name__, url_prefix="/feeds")
Rutas:
@feeds_bp.route("/") - list_feeds()
Método: GET
Descripción: Listado paginado de feeds con filtros avanzados.
Parámetros de filtro:
pais_id: Filtrar por país
categoria_id: Filtrar por categoría
estado: "activos", "inactivos", "errores" o vacío para todos
Características:
Paginación (50 feeds por página)
Contador de totales
Ordenamiento: país → categoría → nombre
@feeds_bp.route("/add", methods=["GET", "POST"]) - add_feed()
Método: GET y POST
Descripción: Formulario para añadir nuevo feed.
Campos del formulario:
nombre: Nombre del feed (requerido)
descripcion: Descripción opcional
url: URL del feed RSS (requerido)
categoria_id: Categoría (select dropdown)
pais_id: País (select dropdown)
idioma: Código de idioma (2 letras, opcional)
Validaciones:
idioma se normaliza a minúsculas y máximo 2 caracteres
Campos opcionales convertidos a None si vacíos
@feeds_bp.route("/<int:feed_id>/edit", methods=["GET", "POST"]) - edit_feed(feed_id)
Método: GET y POST
Descripción: Editar feed existente.
Funcionalidades:
Pre-carga datos actuales del feed
Mismo formulario que add_feed pero con datos existentes
Campo adicional: activo (checkbox)
@feeds_bp.route("/<int:feed_id>/delete") - delete_feed(feed_id)
Método: GET
Descripción: Eliminar feed por ID.
Nota: DELETE simple sin confirmación en frontend (depende de template).
@feeds_bp.route("/<int:feed_id>/reactivar") - reactivar_feed(feed_id)
Método: GET
Descripción: Reactivar feed que tiene fallos.
Acción: Establece activo=TRUE y fallos=0.
Templates utilizados:
feeds_list.html: Listado principal
add_feed.html: Formulario de añadir
edit_feed.html: Formulario de editar
urls.py
Propósito: Blueprint para gestión de fuentes de URL (no feeds RSS).
Ruta base: /urls
Blueprints definidos:
urls_bp = Blueprint("urls", __name__, url_prefix="/urls")
Rutas:
@urls_bp.route("/") - manage_urls()
Método: GET
Descripción: Lista todas las fuentes de URL registradas.
Datos mostrados: ID, nombre, URL, categoría, país, idioma.
@urls_bp.route("/add_source", methods=["GET", "POST"]) - add_url_source()
Método: GET y POST
Descripción: Añadir/actualizar fuente de URL.
Características únicas:
Usa ON CONFLICT (url) DO UPDATE: Si la URL ya existe, actualiza
idioma default: "es" si no se especifica
Mismos campos que feeds pero para URLs individuales
Templates utilizados:
urls_list.html: Listado
add_url_source.html: Formulario
noticia.py
Propósito: Blueprint para página de detalle de noticia individual.
Ruta base: /noticia
Blueprints definidos:
noticia_bp = Blueprint("noticia", __name__)
Rutas:
@noticia_bp.route("/noticia") - noticia()
Método: GET
Descripción: Muestra detalle completo de una noticia.
Parámetros de consulta:
tr_id: ID de traducción (prioritario)
id: ID de noticia original (si no hay tr_id)
Flujo de datos:
Si hay tr_id: Obtiene datos combinados de traducción y noticia original
Si solo hay id: Obtiene solo datos originales
Si no hay ninguno: Redirige a home con mensaje de error
Datos obtenidos:
Información básica: título, resumen, URL, fecha, imagen, fuente
Datos de traducción (si aplica): idiomas, títulos/resúmenes traducidos
Metadatos: categoría, país
Tags: Etiquetas asociadas a la traducción
Noticias relacionadas: Hasta 8, ordenadas por score de similitud
Consultas adicionales (solo si hay traducción):
Tags: SELECT tg.valor, tg.tipo FROM tags_noticia...
Noticias relacionadas: SELECT n2.url, n2.titulo... FROM related_noticias...
Templates utilizados:
noticia.html: Página de detalle completa
eventos.py
Propósito: Blueprint para visualización de eventos agrupados por país.
Ruta base: /eventos_pais
Blueprints definidos:
eventos_bp = Blueprint("eventos", __name__, url_prefix="/eventos_pais")
Rutas:
@eventos_bp.route("/") - eventos_pais()
Método: GET
Descripción: Lista eventos (clusters de noticias) filtrados por país.
Parámetros de consulta:
pais_id: ID del país (obligatorio para ver eventos)
page: Número de página (default: 1)
lang: Idioma para traducciones (default: "es")
Funcionalidades:
Lista de países: Siempre visible para selección
Eventos paginados: 30 por página
Noticias por evento: Agrupadas bajo cada evento
Datos completos: Cada noticia con originales y traducidos
Estructura de datos:
Países: Lista completa para dropdown
Eventos: Paginados, con título, fechas, conteo de noticias
Noticias por evento: Diccionario {evento_id: [noticias...]}
Consultas complejas:
Agrupación con GROUP BY y MAX(p.nombre)
JOIN múltiple: eventos ↔ traducciones ↔ noticias ↔ países
Subconsulta para noticias por evento usando ANY(%s)
Variables de contexto:
paises, eventos, noticias_por_evento
pais_nombre: Nombre del país seleccionado
total_eventos, total_pages, page, lang
Templates utilizados:
eventos_pais.html: Página principal
backup.py
Propósito: Blueprint para importación y exportación de feeds en CSV.
Ruta base: /backup_feeds y /restore_feeds
Blueprints definidos:
backup_bp = Blueprint("backup", __name__)
Rutas:
@backup_bp.route("/backup_feeds") - backup_feeds()
Método: GET
Descripción: Exporta todos los feeds a CSV.
Características:
Incluye joins con categorías y países para nombres legibles
Codificación UTF-8 con BOM
Nombre de archivo: feeds_backup.csv
Usa io.StringIO y io.BytesIO para evitar archivos temporales
Campos exportados:
Todos los campos de feeds más nombres de categoría y país
@backup_bp.route("/restore_feeds", methods=["GET", "POST"]) - restore_feeds()
Método: GET y POST
Descripción: Restaura feeds desde CSV (reemplazo completo).
Flujo de restauración:
GET: Muestra formulario de subida
POST:
Valida archivo y encabezados CSV
TRUNCATE feeds RESTART IDENTITY CASCADE: Borra todo antes de importar
Procesa cada fila con validación
Estadísticas: importados, saltados, fallidos
Validaciones:
Encabezados exactos esperados
URL y nombre no vacíos
Conversión segura de tipos (int, bool)
Normalización de idioma (2 caracteres minúsculas)
Limpieza de datos:
python
row = {k: (v.strip().rstrip("ç") if v else "") for k, v in row.items()}
Manejo de booleanos:
python
activo = str(row["activo"]).lower() in ("true", "1", "t", "yes", "y")
Templates utilizados:
restore_feeds.html: Formulario de subida
Patrones de Diseño Comunes
1. Estructura de Blueprints
python
# Definición estándar
bp = Blueprint("nombre", __name__, url_prefix="/ruta")
# Registro en app.py
app.register_blueprint(bp)
2. Manejo de Conexiones a BD
python
with get_conn() as conn:
# Usar conn para múltiples operaciones
# conn.autocommit = True si es necesario
3. Paginación Consistente
python
page = max(int(request.args.get("page", 1)), 1)
per_page = 50 # o variable
offset = (page - 1) * per_page
4. Manejo de Parámetros de Filtro
python
where = []
params = []
if pais_id:
where.append("f.pais_id = %s")
params.append(int(pais_id))
where_sql = "WHERE " + " AND ".join(where) if where else ""
5. Flash Messages
python
flash("Operación exitosa", "success")
flash("Error: algo salió mal", "error")
6. Redirecciones
python
return redirect(url_for("blueprint.funcion"))
7. Manejo de Formularios
python
if request.method == "POST":
# Procesar datos
return redirect(...)
# GET: mostrar formulario
return render_template("form.html", datos=...)
Seguridad y Validaciones
1. SQL Injection
Todos los parámetros usan %s con psycopg2
No hay concatenación de strings en SQL
2. Validación de Entrada
Conversión segura a int: int(valor) if valor else None
Limpieza de strings: .strip(), normalización
Rangos: min(max(per_page, 10), 100)
3. Manejo de Archivos
Validación de tipo de contenido
Decodificación UTF-8 con manejo de BOM
Uso de io para evitar archivos temporales
Optimizaciones
1. JOINs Eficientes
LEFT JOIN para datos opcionales
GROUP BY cuando es necesario
Uso de índices implícitos en ORDER BY
2. Batch Operations
TRUNCATE ... RESTART IDENTITY más rápido que DELETE
Inserción fila por fila con validación
3. Manejo de Memoria
io.StringIO para CSV en memoria
Cursors con DictCursor para acceso por nombre
Dependencias entre Blueprints
text
home.py
└── usa: models.noticias.buscar_noticias()
└── usa: _extraer_tags_por_traduccion()
feeds.py
└── usa: models.categorias.get_categorias()
└── usa: models.paises.get_paises()
urls.py
└── usa: models.categorias.get_categorias()
└── usa: models.paises.get_paises()
noticia.py
└── consultas directas (no usa models/)
eventos.py
└── consultas directas (no usa models/)
backup.py
└── consultas directas (no usa models/)

203
routers/favoritos.py Normal file
View file

@ -0,0 +1,203 @@
"""
Favorites router - Save and manage favorite news.
"""
from flask import Blueprint, request, jsonify, session, render_template
from psycopg2 import extras
from db import get_read_conn, get_write_conn
from utils.auth import get_current_user, is_authenticated
import secrets
favoritos_bp = Blueprint("favoritos", __name__, url_prefix="/favoritos")
def get_user_or_session_id():
"""Get user ID if authenticated, otherwise session ID.
Returns:
Tuple of (user_id, session_id)
"""
user = get_current_user()
if user:
return (user['id'], None)
# Anonymous user - use session_id
if "user_session" not in session:
session["user_session"] = secrets.token_hex(16)
return (None, session["user_session"])
def ensure_favoritos_table(conn):
"""Create/update favoritos table to support both users and sessions."""
with conn.cursor() as cur:
# Table is created by init-db scripts, just ensure it exists
cur.execute("""
CREATE TABLE IF NOT EXISTS favoritos (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES usuarios(id) ON DELETE CASCADE,
session_id VARCHAR(64),
noticia_id VARCHAR(32) REFERENCES noticias(id) ON DELETE CASCADE,
created_at TIMESTAMP DEFAULT NOW()
);
""")
cur.execute("CREATE INDEX IF NOT EXISTS idx_favoritos_session ON favoritos(session_id);")
cur.execute("CREATE INDEX IF NOT EXISTS idx_favoritos_user_id ON favoritos(user_id);")
# Ensure session_id can be null (for logged in users)
try:
cur.execute("ALTER TABLE favoritos ALTER COLUMN session_id DROP NOT NULL;")
except Exception:
conn.rollback()
else:
conn.commit()
conn.commit()
# ============================================================
# API: Toggle Favorite
# ============================================================
@favoritos_bp.route("/toggle/<noticia_id>", methods=["POST"])
def toggle_favorite(noticia_id):
"""Toggle favorite status for a news item."""
user_id, session_id = get_user_or_session_id()
with get_write_conn() as conn:
ensure_favoritos_table(conn)
with conn.cursor() as cur:
# Check if already favorited (by user_id OR session_id)
if user_id:
cur.execute(
"SELECT id FROM favoritos WHERE user_id = %s AND noticia_id = %s",
(user_id, noticia_id)
)
else:
cur.execute(
"SELECT id FROM favoritos WHERE session_id = %s AND noticia_id = %s",
(session_id, noticia_id)
)
existing = cur.fetchone()
if existing:
# Remove favorite
if user_id:
cur.execute(
"DELETE FROM favoritos WHERE user_id = %s AND noticia_id = %s",
(user_id, noticia_id)
)
else:
cur.execute(
"DELETE FROM favoritos WHERE session_id = %s AND noticia_id = %s",
(session_id, noticia_id)
)
is_favorite = False
else:
# Add favorite
cur.execute(
"INSERT INTO favoritos (user_id, session_id, noticia_id) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING",
(user_id, session_id, noticia_id)
)
is_favorite = True
conn.commit()
return jsonify({"success": True, "is_favorite": is_favorite})
# ============================================================
# API: Check if Favorite
# ============================================================
@favoritos_bp.route("/check/<noticia_id>")
def check_favorite(noticia_id):
"""Check if a news item is favorited."""
user_id, session_id = get_user_or_session_id()
with get_read_conn() as conn:
with conn.cursor() as cur:
if user_id:
cur.execute(
"SELECT id FROM favoritos WHERE user_id = %s AND noticia_id = %s",
(user_id, noticia_id)
)
else:
cur.execute(
"SELECT id FROM favoritos WHERE session_id = %s AND noticia_id = %s",
(session_id, noticia_id)
)
is_favorite = cur.fetchone() is not None
return jsonify({"is_favorite": is_favorite})
# ============================================================
# API: Get User's Favorites IDs
# ============================================================
@favoritos_bp.route("/ids")
def get_favorite_ids():
"""Get list of favorite noticia IDs for current user."""
user_id, session_id = get_user_or_session_id()
with get_read_conn() as conn:
with conn.cursor() as cur:
if user_id:
cur.execute(
"SELECT noticia_id FROM favoritos WHERE user_id = %s",
(user_id,)
)
else:
cur.execute(
"SELECT noticia_id FROM favoritos WHERE session_id = %s",
(session_id,)
)
ids = [row[0] for row in cur.fetchall()]
return jsonify({"ids": ids})
# ============================================================
# Page: View Favorites
# ============================================================
@favoritos_bp.route("/")
def view_favorites():
"""View all favorited news items."""
user_id, session_id = get_user_or_session_id()
user = get_current_user()
with get_read_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
if user_id:
cur.execute("""
SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url,
n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais,
t.titulo_trad, t.resumen_trad, t.lang_to,
f.created_at AS favorito_at
FROM favoritos f
JOIN noticias n ON n.id = f.noticia_id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' AND t.status = 'done'
WHERE f.user_id = %s
ORDER BY f.created_at DESC
LIMIT 100;
""", (user_id,))
else:
cur.execute("""
SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url,
n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais,
t.titulo_trad, t.resumen_trad, t.lang_to,
f.created_at AS favorito_at
FROM favoritos f
JOIN noticias n ON n.id = f.noticia_id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' AND t.status = 'done'
WHERE f.session_id = %s
ORDER BY f.created_at DESC
LIMIT 100;
""", (session_id,))
noticias = cur.fetchall()
return render_template("favoritos.html", noticias=noticias, user=user)

428
routers/feeds.py Normal file
View file

@ -0,0 +1,428 @@
from flask import Blueprint, render_template, request, redirect, flash, url_for, jsonify
from db import get_conn
from psycopg2 import extras
from models.categorias import get_categorias
from models.paises import get_paises
from utils.feed_discovery import discover_feeds, validate_feed, get_feed_metadata
# Blueprint correcto
feeds_bp = Blueprint("feeds", __name__, url_prefix="/feeds")
@feeds_bp.route("/")
def list_feeds():
"""Listado con filtros"""
page = max(int(request.args.get("page", 1)), 1)
per_page = 50
offset = (page - 1) * per_page
pais_id = request.args.get("pais_id")
categoria_id = request.args.get("categoria_id")
estado = request.args.get("estado") or ""
where = []
params = []
if pais_id:
where.append("f.pais_id = %s")
params.append(int(pais_id))
if categoria_id:
where.append("f.categoria_id = %s")
params.append(int(categoria_id))
if estado == "activos":
where.append("f.activo = TRUE")
elif estado == "inactivos":
where.append("f.activo = FALSE")
elif estado == "errores":
where.append("COALESCE(f.fallos, 0) > 0")
where_sql = "WHERE " + " AND ".join(where) if where else ""
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Total
# Total
cur.execute(f"SELECT COUNT(*) FROM feeds f {where_sql}", params)
total_feeds = cur.fetchone()[0]
# Caídos (Inactivos o con max fallos logic check, usually inactive is enough if logic works)
# Using the same filter context to see how many of THESE are fallen
# Caídos (Inactivos o con max fallos logic check)
# Using the same filter context to see how many of THESE are fallen
caidos_condition = "(f.activo = FALSE OR f.fallos >= 5)"
if where_sql:
# where_sql ya incluye "WHERE ..."
caidos_sql = f"SELECT COUNT(*) FROM feeds f {where_sql} AND {caidos_condition}"
else:
caidos_sql = f"SELECT COUNT(*) FROM feeds f WHERE {caidos_condition}"
cur.execute(caidos_sql, params)
feeds_caidos = cur.fetchone()[0]
total_pages = (total_feeds // per_page) + (1 if total_feeds % per_page else 0)
# Lista paginada
cur.execute(
f"""
SELECT
f.id, f.nombre, f.descripcion, f.url,
f.activo, f.fallos, f.last_error,
c.nombre AS categoria,
p.nombre AS pais,
(SELECT COUNT(*) FROM noticias n WHERE n.fuente_nombre = f.nombre) as noticias_count
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
{where_sql}
ORDER BY p.nombre NULLS LAST, f.activo DESC, f.fallos ASC, c.nombre NULLS LAST, f.nombre
LIMIT %s OFFSET %s
""",
params + [per_page, offset],
)
feeds = cur.fetchall()
# Selects
cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre;")
categorias = cur.fetchall()
cur.execute("SELECT id, nombre FROM paises ORDER BY nombre;")
paises = cur.fetchall()
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return render_template(
"_feeds_table.html",
feeds=feeds,
total_feeds=total_feeds,
feeds_caidos=feeds_caidos,
total_pages=total_pages,
page=page,
filtro_pais_id=pais_id,
filtro_categoria_id=categoria_id,
filtro_estado=estado,
)
return render_template(
"feeds_list.html",
feeds=feeds,
total_feeds=total_feeds,
feeds_caidos=feeds_caidos,
total_pages=total_pages,
page=page,
categorias=categorias,
paises=paises,
filtro_pais_id=pais_id,
filtro_categoria_id=categoria_id,
filtro_estado=estado,
)
@feeds_bp.route("/add", methods=["GET", "POST"])
def add_feed():
"""Añadir feed"""
with get_conn() as conn:
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion") or None
url = request.form.get("url")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s, %s)
""",
(
nombre,
descripcion,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
),
)
conn.commit()
flash("Feed añadido correctamente.", "success")
return redirect(url_for("feeds.list_feeds"))
except Exception as e:
flash(f"Error al añadir feed: {e}", "error")
return render_template("add_feed.html", categorias=categorias, paises=paises)
@feeds_bp.route("/<int:feed_id>/edit", methods=["GET", "POST"])
def edit_feed(feed_id):
"""Editar feed"""
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT * FROM feeds WHERE id = %s;", (feed_id,))
feed = cur.fetchone()
if not feed:
flash("Feed no encontrado.", "error")
return redirect(url_for("feeds.list_feeds"))
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion") or None
url = request.form.get("url")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
activo = bool(request.form.get("activo"))
try:
cur.execute(
"""
UPDATE feeds
SET nombre=%s, descripcion=%s, url=%s,
categoria_id=%s, pais_id=%s, idioma=%s, activo=%s
WHERE id=%s;
""",
(
nombre,
descripcion,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
activo,
feed_id,
),
)
conn.commit()
flash("Feed actualizado.", "success")
return redirect(url_for("feeds.list_feeds"))
except Exception as e:
flash(f"Error al actualizar: {e}", "error")
return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)
@feeds_bp.route("/<int:feed_id>/delete")
def delete_feed(feed_id):
"""Eliminar feed"""
with get_conn() as conn, conn.cursor() as cur:
try:
cur.execute("DELETE FROM feeds WHERE id=%s;", (feed_id,))
conn.commit()
flash("Feed eliminado.", "success")
except Exception as e:
flash(f"No se pudo eliminar: {e}", "error")
return redirect(url_for("feeds.list_feeds"))
@feeds_bp.route("/<int:feed_id>/reactivar")
def reactivar_feed(feed_id):
"""Reactivar feed KO"""
with get_conn() as conn, conn.cursor() as cur:
try:
cur.execute(
"UPDATE feeds SET activo=TRUE, fallos=0 WHERE id=%s;",
(feed_id,),
)
conn.commit()
flash("Feed reactivado.", "success")
except Exception as e:
flash(f"No se pudo reactivar: {e}", "error")
return redirect(url_for("feeds.list_feeds"))
@feeds_bp.route("/discover", methods=["GET", "POST"])
def discover_feed():
"""Descubrir feeds RSS desde una URL"""
discovered_feeds = []
source_url = ""
with get_conn() as conn:
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
source_url = request.form.get("source_url", "").strip()
if not source_url:
flash("Por favor, ingresa una URL válida.", "error")
else:
try:
# Discover feeds from the URL
discovered_feeds = discover_feeds(source_url, timeout=15)
if not discovered_feeds:
flash(f"No se encontraron feeds RSS en la URL: {source_url}", "warning")
else:
# Check which feeds already exist in DB
found_urls = [f['url'] for f in discovered_feeds]
existing_urls = set()
try:
with conn.cursor() as cur:
cur.execute("SELECT url FROM feeds WHERE url = ANY(%s)", (found_urls,))
rows = cur.fetchall()
existing_urls = {r[0] for r in rows}
except Exception as db_e:
# Fallback if DB fails, though unlikely
print(f"Error checking existing feeds: {db_e}")
for feed in discovered_feeds:
feed['exists'] = feed['url'] in existing_urls
new_count = len(discovered_feeds) - len(existing_urls)
flash(f"Feeds disponibles: {new_count} de {len(discovered_feeds)} encontrados.", "success")
except Exception as e:
flash(f"Error al descubrir feeds: {e}", "error")
return render_template(
"discover_feeds.html",
discovered_feeds=discovered_feeds,
source_url=source_url,
categorias=categorias,
paises=paises
)
@feeds_bp.route("/discover_and_add", methods=["POST"])
def discover_and_add():
"""Añadir múltiples feeds descubiertos"""
selected_feeds = request.form.getlist("selected_feeds")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = (request.form.get("idioma") or "").strip().lower()[:2] or None
if not selected_feeds:
flash("No se seleccionó ningún feed.", "warning")
return redirect(url_for("feeds.discover_feed"))
added_count = 0
errors = []
with get_conn() as conn:
for feed_url in selected_feeds:
try:
# Get individual settings for this feed
# The form uses the feed URL as part of the field name
item_cat_id = request.form.get(f"cat_{feed_url}")
item_country_id = request.form.get(f"country_{feed_url}")
item_lang = request.form.get(f"lang_{feed_url}")
# Get feed metadata
metadata = get_feed_metadata(feed_url, timeout=10)
if not metadata:
errors.append(f"No se pudo obtener metadata del feed: {feed_url}")
continue
# Use context title from discovery if available, otherwise use metadata title
context_title = request.form.get(f"context_{feed_url}")
nombre = context_title if context_title else metadata.get('title', 'Feed sin título')
descripcion = metadata.get('description', '')
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s, %s)
ON CONFLICT (url) DO NOTHING
""",
(
nombre,
descripcion[:500] if descripcion else None,
feed_url,
int(item_cat_id) if item_cat_id else None,
int(item_country_id) if item_country_id else None,
(item_lang or "").strip().lower()[:2] or None,
),
)
if cur.rowcount > 0:
added_count += 1
conn.commit()
except Exception as e:
errors.append(f"Error al añadir {feed_url}: {e}")
is_ajax = request.headers.get("X-Requested-With") == "XMLHttpRequest"
if added_count > 0:
msg = f"Se añadieron {added_count} feeds correctamente."
if not is_ajax:
flash(msg, "success")
else:
msg = "No se añadieron feeds nuevos."
if not is_ajax:
# Only flash warning if not ajax, or handle differently
if not errors:
flash(msg, "warning")
if errors:
for error in errors[:5]: # Mostrar solo los primeros 5 errores
if not is_ajax:
flash(error, "error")
if is_ajax:
return jsonify({
"success": added_count > 0,
"added_count": added_count,
"message": msg,
"errors": errors
})
return redirect(url_for("feeds.list_feeds"))
@feeds_bp.route("/api/validate", methods=["POST"])
def api_validate_feed():
"""API endpoint para validar una URL de feed"""
data = request.get_json()
feed_url = data.get("url", "").strip()
if not feed_url:
return jsonify({"error": "URL no proporcionada"}), 400
try:
feed_info = validate_feed(feed_url, timeout=10)
if not feed_info:
return jsonify({"error": "No se pudo validar el feed"}), 400
return jsonify(feed_info), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
@feeds_bp.route("/api/discover", methods=["POST"])
def api_discover_feeds():
"""API endpoint para descubrir feeds desde una URL"""
data = request.get_json()
source_url = data.get("url", "").strip()
if not source_url:
return jsonify({"error": "URL no proporcionada"}), 400
try:
discovered = discover_feeds(source_url, timeout=15)
return jsonify({"feeds": discovered, "count": len(discovered)}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500

294
routers/home.py Normal file
View file

@ -0,0 +1,294 @@
from flask import Blueprint, render_template, request
from datetime import datetime
from psycopg2 import extras
from db import get_read_conn, get_write_conn
from utils.auth import get_current_user
from config import DEFAULT_TRANSLATION_LANG, DEFAULT_LANG, NEWS_PER_PAGE_DEFAULT
from models.categorias import get_categorias
from models.paises import get_paises
from models.noticias import buscar_noticias, buscar_noticias_semantica
from cache import cached
home_bp = Blueprint("home", __name__)
@home_bp.route("/")
@home_bp.route("/home")
def home():
page = max(int(request.args.get("page", 1)), 1)
per_page = int(request.args.get("per_page", NEWS_PER_PAGE_DEFAULT))
per_page = min(max(per_page, 10), 100)
q = (request.args.get("q") or "").strip()
categoria_id = request.args.get("categoria_id")
continente_id = request.args.get("continente_id")
pais_id = request.args.get("pais_id")
fecha_str = request.args.get("fecha") or ""
lang = (request.args.get("lang") or DEFAULT_TRANSLATION_LANG or DEFAULT_LANG).lower()[:5]
use_tr = not bool(request.args.get("orig"))
fecha_str = request.args.get("fecha") or ""
fecha_filtro = None
if fecha_str:
try:
fecha_filtro = datetime.strptime(fecha_str, "%Y-%m-%d").date()
except ValueError:
fecha_filtro = None
from utils.qdrant_search import semantic_search
# Logic for semantic search enabled by default if query exists, unless explicitly disabled
# If the user passed 'semantic=' explicitly as empty string, it might mean False, but for UX speed default to True is better.
# However, let's respect the flag if it's explicitly 'false' or '0'.
# If key is missing, default to True. If key is present but empty, treat as False (standard HTML form behavior unfortunately).
# But wait, the previous log showed 'semantic='. HTML checkboxes send nothing if unchecked, 'on' if checked.
# So if it appears as empty string, it might be a hidden input or unassigned var.
# Let's check 'semantic' param presence.
raw_semantic = request.args.get("semantic")
if raw_semantic is None:
use_semantic = True # Default to semantic if not specified
elif raw_semantic == "" or raw_semantic.lower() in ["false", "0", "off"]:
use_semantic = False
else:
use_semantic = True
with get_read_conn() as conn:
conn.autocommit = True
categorias = get_categorias(conn)
paises = get_paises(conn)
noticias = []
total_results = 0
total_pages = 0
tags_por_tr = {}
# 1. Intentar búsqueda semántica si hay query y está habilitado
semantic_success = False
if use_semantic and q:
try:
# Obtener más resultados para 'llenar' la página si hay IDs no encontrados
limit_fetch = per_page * 2
sem_results = semantic_search(
query=q,
limit=limit_fetch, # Pedimos más para asegurar
score_threshold=0.30
)
if sem_results:
# Extraer IDs
news_ids = [r['news_id'] for r in sem_results]
# Traer datos completos de PostgreSQL (igual que en search.py)
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
query_sql = """
SELECT
n.id,
n.titulo,
n.resumen,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais,
-- traducciones
t.id AS traduccion_id,
t.titulo_trad AS titulo_traducido,
t.resumen_trad AS resumen_traducido,
CASE WHEN t.id IS NOT NULL THEN TRUE ELSE FALSE END AS tiene_traduccion,
-- originales
n.titulo AS titulo_original,
n.resumen AS resumen_original
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t
ON t.noticia_id = n.id
AND t.lang_to = %s
AND t.status = 'done'
WHERE n.id = ANY(%s)
"""
cur.execute(query_sql, (lang, news_ids))
rows = cur.fetchall()
# Convertimos a lista para poder ordenar por fecha
rows_list = list(rows)
# Ordenar cronológicamente (más reciente primero)
sorted_rows = sorted(
rows_list,
key=lambda x: x['fecha'] if x['fecha'] else datetime.min,
reverse=True
)
# Aplicar paginación manual sobre los resultados ordenados
# Nota: semantic_search ya devolvió los "top" globales (aproximadamente).
# Para paginación real profunda con Qdrant se necesita scroll/offset,
# aquí asumimos que page request mapea al limit/offset enviado a Qdrant.
# Pero `semantic_search` simple en utils no tiene offset.
# Arreglo temporal: Solo mostramos la primera "tanda" de resultados semánticos.
# Si el usuario quiere paginar profundo, Qdrant search debe soportar offset.
# utils/qdrant_search.py NO tiene offset.
# ASÍ QUE: Solo funcionará bien para la página 1.
# Si page > 1, semantic_search simple no sirve sin offset.
# Fallback: Si page > 1, usamos búsqueda tradicional O implementamos offset en Qdrant (mejor).
# Por ahora: Usamos lo que devolvió semantic_search y cortamos localmente
# si page=1.
if len(sorted_rows) > 0:
noticias = sorted_rows
total_results = len(noticias) # Aproximado
total_pages = 1 # Qdrant simple no pagina bien aun
# Extraer tags
tr_ids = [n["traduccion_id"] for n in noticias if n["traduccion_id"]]
from models.noticias import _extraer_tags_por_traduccion
tags_por_tr = _extraer_tags_por_traduccion(cur, tr_ids)
semantic_success = True
except Exception as e:
print(f"⚠️ Error en semántica home, fallback: {e}")
semantic_success = False
# 2. Si no hubo búsqueda semántica (o falló, o no había query, o usuario la desactivó), usar la tradicional
if not semantic_success:
noticias, total_results, total_pages, tags_por_tr = buscar_noticias(
conn=conn,
page=page,
per_page=per_page,
q=q,
categoria_id=categoria_id,
continente_id=continente_id,
pais_id=pais_id,
fecha=fecha_filtro,
lang=lang,
use_tr=use_tr,
)
# Record search history for logged-in users (only on first page to avoid dupes)
if (q or categoria_id or pais_id) and page == 1:
user = get_current_user()
if user:
try:
with get_write_conn() as w_conn:
with w_conn.cursor() as w_cur:
# Check if it's the same as the last search to avoid immediate duplicates
w_cur.execute("""
SELECT query, pais_id, categoria_id
FROM search_history
WHERE user_id = %s
ORDER BY searched_at DESC LIMIT 1
""", (user['id'],))
last_search = w_cur.fetchone()
current_search = (q or None, int(pais_id) if pais_id else None, int(categoria_id) if categoria_id else None)
if not last_search or (last_search[0], last_search[1], last_search[2]) != current_search:
w_cur.execute("""
INSERT INTO search_history (user_id, query, pais_id, categoria_id, results_count)
VALUES (%s, %s, %s, %s, %s)
""", (user['id'], current_search[0], current_search[1], current_search[2], total_results))
w_conn.commit()
except Exception as e:
# Log error but don't break the page load
print(f"Error saving search history: {e}")
pass
user = get_current_user()
recent_searches_with_results = []
if user and not q and not categoria_id and not pais_id and page == 1:
with get_read_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Fetch unique latest searches using DISTINCT ON
cur.execute("""
SELECT sub.id, query, pais_id, categoria_id, results_count, searched_at,
p.nombre as pais_nombre, c.nombre as categoria_nombre
FROM (
SELECT DISTINCT ON (COALESCE(query, ''), COALESCE(pais_id, 0), COALESCE(categoria_id, 0))
id, query, pais_id, categoria_id, results_count, searched_at
FROM search_history
WHERE user_id = %s
ORDER BY COALESCE(query, ''), COALESCE(pais_id, 0), COALESCE(categoria_id, 0), searched_at DESC
) sub
LEFT JOIN paises p ON p.id = sub.pais_id
LEFT JOIN categorias c ON c.id = sub.categoria_id
ORDER BY searched_at DESC
LIMIT 6
""", (user['id'],))
recent_searches = cur.fetchall()
for s in recent_searches:
# Fetch top 6 news for this search
news_items, _, _, _ = buscar_noticias(
conn=conn,
page=1,
per_page=6,
q=s['query'] or "",
pais_id=s['pais_id'],
categoria_id=s['categoria_id'],
lang=lang,
use_tr=use_tr,
skip_count=True
)
recent_searches_with_results.append({
'id': s['id'],
'query': s['query'],
'pais_id': s['pais_id'],
'pais_nombre': s['pais_nombre'],
'categoria_id': s['categoria_id'],
'categoria_nombre': s['categoria_nombre'],
'results_count': s['results_count'],
'searched_at': s['searched_at'],
'noticias': news_items
})
context = dict(
noticias=noticias,
total_results=total_results,
total_pages=total_pages,
page=page,
per_page=per_page,
categorias=categorias,
paises=paises,
q=q,
cat_id=int(categoria_id) if categoria_id else None,
pais_id=int(pais_id) if pais_id else None,
fecha_filtro=fecha_str,
lang=lang,
use_tr=use_tr,
use_semantic=use_semantic,
tags_por_tr=tags_por_tr,
recent_searches_with_results=recent_searches_with_results,
)
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return render_template("_noticias_list.html", **context)
return render_template("noticias.html", **context)
@home_bp.route("/delete_search/<int:search_id>", methods=["POST"])
def delete_search(search_id):
user = get_current_user()
if not user:
return {"error": "No autenticado"}, 401
try:
with get_write_conn() as conn:
with conn.cursor() as cur:
# Direct deletion ensuring ownership
cur.execute(
"DELETE FROM search_history WHERE id = %s AND user_id = %s",
(search_id, user["id"])
)
conn.commit()
return {"success": True}
except Exception as e:
print(f"Error deleting search {search_id}: {e}")
return {"error": str(e)}, 500

116
routers/noticia.py Normal file
View file

@ -0,0 +1,116 @@
from flask import Blueprint, render_template, request, redirect, flash, url_for
from db import get_read_conn
from psycopg2 import extras
noticia_bp = Blueprint("noticia", __name__)
@noticia_bp.route("/noticia")
def noticia():
tr_id = request.args.get("tr_id")
noticia_id = request.args.get("id")
if not tr_id and not noticia_id:
flash("No se ha indicado ninguna noticia.", "warning")
return redirect(url_for("home.home"))
with get_read_conn() as conn:
conn.autocommit = True
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
dato = None
if tr_id:
cur.execute(
"""
SELECT
t.id AS traduccion_id,
t.lang_from,
t.lang_to,
t.titulo_trad,
t.resumen_trad,
n.id AS noticia_id,
n.titulo AS titulo_orig,
n.resumen AS resumen_orig,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais
FROM traducciones t
JOIN noticias n ON n.id = t.noticia_id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE t.id = %s
""",
(int(tr_id),),
)
dato = cur.fetchone()
else:
cur.execute(
"""
SELECT
NULL AS traduccion_id,
NULL AS lang_from,
NULL AS lang_to,
NULL AS titulo_trad,
NULL AS resumen_trad,
n.id AS noticia_id,
n.titulo AS titulo_orig,
n.resumen AS resumen_orig,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE n.id = %s
""",
(noticia_id,),
)
dato = cur.fetchone()
tags = []
relacionadas = []
if dato and dato["traduccion_id"]:
cur.execute(
"""
SELECT tg.valor, tg.tipo
FROM tags_noticia tn
JOIN tags tg ON tg.id = tn.tag_id
WHERE tn.traduccion_id = %s
ORDER BY tg.tipo, tg.valor;
""",
(dato["traduccion_id"],),
)
tags = cur.fetchall()
cur.execute(
"""
SELECT
n2.url,
n2.titulo,
n2.fecha,
n2.imagen_url,
n2.fuente_nombre,
rn.score,
t2.titulo_trad,
t2.id AS related_tr_id
FROM related_noticias rn
JOIN traducciones t2 ON t2.id = rn.related_traduccion_id
JOIN noticias n2 ON n2.id = t2.noticia_id
WHERE rn.traduccion_id = %s
ORDER BY rn.score DESC
LIMIT 8;
""",
(dato["traduccion_id"],),
)
relacionadas = cur.fetchall()
return render_template("noticia.html", dato=dato, tags=tags, relacionadas=relacionadas)

44
routers/notifications.py Normal file
View file

@ -0,0 +1,44 @@
"""
Notifications router - Check for new important news.
"""
from flask import Blueprint, jsonify, request
from db import get_conn
from datetime import datetime
notifications_bp = Blueprint("notifications", __name__, url_prefix="/api/notifications")
@notifications_bp.route("/check")
def check_notifications():
"""Check for new news since a given timestamp."""
last_check = request.args.get("last_check")
if not last_check:
return jsonify({"has_news": False, "timestamp": datetime.utcnow().isoformat()})
try:
# Check for news created after last_check
# We define "important" as having translation or high score (if score existed)
# For now, just any new news to demonstrate functionality
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT COUNT(*), MAX(fecha)
FROM noticias
WHERE fecha > %s
""", (last_check,))
row = cur.fetchone()
count = row[0]
latest = row[1]
if count > 0:
return jsonify({
"has_news": True,
"count": count,
"timestamp": latest.isoformat() if latest else datetime.utcnow().isoformat(),
"message": f"¡{count} noticias nuevas encontradas!"
})
except Exception as e:
print(f"Error checking notifications: {e}")
return jsonify({"has_news": False, "timestamp": datetime.utcnow().isoformat()})

325
routers/parrillas.py Normal file
View file

@ -0,0 +1,325 @@
"""
Router para gestionar parrill
as de videos de noticias.
"""
from flask import Blueprint, render_template, request, jsonify, redirect, url_for, flash
from db import get_conn
from psycopg2 import extras
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
parrillas_bp = Blueprint("parrillas", __name__, url_prefix="/parrillas")
@parrillas_bp.route("/")
def index():
"""Dashboard principal de parrillas."""
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Obtener todas las parrillas
cur.execute("""
SELECT
p.*,
pa.nombre as pais_nombre,
c.nombre as categoria_nombre,
(SELECT COUNT(*) FROM video_generados WHERE parrilla_id = p.id) as total_videos
FROM video_parrillas p
LEFT JOIN paises pa ON pa.id = p.pais_id
LEFT JOIN categorias c ON c.id = p.categoria_id
ORDER BY p.created_at DESC
""")
parrillas = cur.fetchall()
return render_template("parrillas/index.html", parrillas=parrillas)
@parrillas_bp.route("/nueva", methods=["GET", "POST"])
def nueva():
"""Crear una nueva parrilla."""
if request.method == "GET":
# Cargar datos para el formulario
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("SELECT id, nombre FROM paises ORDER BY nombre")
paises = cur.fetchall()
cur.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cur.fetchall()
return render_template("parrillas/form.html",
paises=paises,
categorias=categorias)
# POST: Crear parrilla
try:
data = request.form
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO video_parrillas (
nombre, descripcion, tipo_filtro,
pais_id, categoria_id, entidad_nombre, entidad_tipo,
max_noticias, duracion_maxima, idioma_voz,
template, include_images, include_subtitles,
frecuencia, activo
) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
) RETURNING id
""", (
data.get('nombre'),
data.get('descripcion'),
data.get('tipo_filtro'),
data.get('pais_id') or None,
data.get('categoria_id') or None,
data.get('entidad_nombre') or None,
data.get('entidad_tipo') or None,
int(data.get('max_noticias', 5)),
int(data.get('duracion_maxima', 180)),
data.get('idioma_voz', 'es'),
data.get('template', 'standard'),
data.get('include_images') == 'on',
data.get('include_subtitles') == 'on',
data.get('frecuencia', 'manual'),
data.get('activo') == 'on'
))
parrilla_id = cur.fetchone()[0]
conn.commit()
flash(f"Parrilla '{data.get('nombre')}' creada exitosamente", "success")
return redirect(url_for('parrillas.ver', id=parrilla_id))
except Exception as e:
logger.error(f"Error creating parrilla: {e}", exc_info=True)
flash(f"Error al crear parrilla: {str(e)}", "error")
return redirect(url_for('parrillas.nueva'))
@parrillas_bp.route("/<int:id>")
def ver(id):
"""Ver detalles de una parrilla específica."""
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Obtener parrilla
cur.execute("""
SELECT
p.*,
pa.nombre as pais_nombre,
c.nombre as categoria_nombre
FROM video_parrillas p
LEFT JOIN paises pa ON pa.id = p.pais_id
LEFT JOIN categorias c ON c.id = p.categoria_id
WHERE p.id = %s
""", (id,))
parrilla = cur.fetchone()
if not parrilla:
flash("Parrilla no encontrada", "error")
return redirect(url_for('parrillas.index'))
# Obtener videos generados
cur.execute("""
SELECT * FROM video_generados
WHERE parrilla_id = %s
ORDER BY fecha_generacion DESC
LIMIT 50
""", (id,))
videos = cur.fetchall()
return render_template("parrillas/detail.html", parrilla=parrilla, videos=videos)
@parrillas_bp.route("/api/<int:id>/preview")
def preview_noticias(id):
"""Preview de noticias que se incluirían en el siguiente video."""
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Obtener configuración de parrilla
cur.execute("SELECT * FROM video_parrillas WHERE id = %s", (id,))
parrilla = cur.fetchone()
if not parrilla:
return jsonify({"error": "Parrilla no encontrada"}), 404
# Construir query según filtros
where_clauses = []
params = []
if parrilla['pais_id']:
where_clauses.append("n.pais_id = %s")
params.append(parrilla['pais_id'])
if parrilla['categoria_id']:
where_clauses.append("n.categoria_id = %s")
params.append(parrilla['categoria_id'])
if parrilla['entidad_nombre']:
# Filtrar por entidad
where_clauses.append("""
EXISTS (
SELECT 1 FROM tags_noticia tn
JOIN tags t ON t.id = tn.tag_id
WHERE tn.traduccion_id = tr.id
AND t.tipo = %s
AND t.valor ILIKE %s
)
""")
params.append(parrilla['entidad_tipo'])
params.append(f"%{parrilla['entidad_nombre']}%")
# Solo noticias de hoy o ayer
where_clauses.append("n.fecha >= NOW() - INTERVAL '1 day'")
where_sql = " AND ".join(where_clauses) if where_clauses else "1=1"
# Obtener noticias
cur.execute(f"""
SELECT
n.id,
n.titulo,
n.imagen_url,
n.fecha,
tr.titulo_trad,
tr.resumen_trad,
LENGTH(tr.resumen_trad) as longitud_texto
FROM noticias n
LEFT JOIN traducciones tr ON tr.noticia_id = n.id AND tr.lang_to = %s AND tr.status = 'done'
WHERE {where_sql}
AND tr.id IS NOT NULL
ORDER BY n.fecha DESC
LIMIT %s
""", [parrilla['idioma_voz']] + params + [parrilla['max_noticias']])
noticias = cur.fetchall()
return jsonify({
"noticias": [dict(n) for n in noticias],
"total": len(noticias),
"config": {
"max_noticias": parrilla['max_noticias'],
"duracion_maxima": parrilla['duracion_maxima']
}
})
@parrillas_bp.route("/api/<int:id>/generar", methods=["POST"])
def generar_video(id):
"""Iniciar generación de video para una parrilla."""
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Verificar que la parrilla existe
cur.execute("SELECT * FROM video_parrillas WHERE id = %s", (id,))
parrilla = cur.fetchone()
if not parrilla:
return jsonify({"error": "Parrilla no encontrada"}), 404
# Crear registro de video
cur.execute("""
INSERT INTO video_generados (
parrilla_id, titulo, descripcion, status
) VALUES (
%s, %s, %s, 'pending'
) RETURNING id
""", (
id,
f"{parrilla['nombre']} - {datetime.now().strftime('%Y-%m-%d %H:%M')}",
f"Video generado automáticamente para {parrilla['nombre']}"
))
video_id = cur.fetchone()[0]
# Actualizar fecha de última generación
cur.execute("""
UPDATE video_parrillas
SET ultima_generacion = NOW()
WHERE id = %s
""", (id,))
conn.commit()
# Lanzar el proceso de generación en segundo plano
import subprocess
import sys
# Ejecutamos el script generador pasando el ID de la parrilla
# Usamos Popen para no bloquear la respuesta HTTP (fire and forget)
cmd = [sys.executable, "generar_videos_noticias.py", str(id)]
subprocess.Popen(cmd, cwd="/app")
return jsonify({
"success": True,
"video_id": video_id,
"message": "Generación de video iniciada en segundo plano"
})
except Exception as e:
logger.error(f"Error queuing video: {e}", exc_info=True)
return jsonify({"error": str(e)}), 500
@parrillas_bp.route("/api/<int:id>", methods=["DELETE"])
def eliminar(id):
"""Eliminar una parrilla."""
try:
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("DELETE FROM video_parrillas WHERE id = %s", (id,))
conn.commit()
return jsonify({"success": True})
except Exception as e:
logger.error(f"Error deleting parrilla: {e}", exc_info=True)
return jsonify({"error": str(e)}), 500
@parrillas_bp.route("/api/<int:id>/toggle", methods=["POST"])
def toggle_activo(id):
"""Activar/desactivar una parrilla."""
try:
with get_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
UPDATE video_parrillas
SET activo = NOT activo
WHERE id = %s
RETURNING activo
""", (id,))
nuevo_estado = cur.fetchone()[0]
conn.commit()
return jsonify({"success": True, "activo": nuevo_estado})
except Exception as e:
logger.error(f"Error toggling parrilla: {e}", exc_info=True)
return jsonify({"error": str(e)}), 500
@parrillas_bp.route("/files/<int:video_id>/<filename>")
def serve_file(video_id, filename):
"""Servir archivos generados (audio, script, srt)."""
from flask import send_from_directory
import os
# Directorio base de videos
base_dir = "/app/data/videos"
video_dir = os.path.join(base_dir, str(video_id))
# Validar que sea un archivo permitido para evitar Path Traversal
allowed_files = ['audio.wav', 'script.txt', 'subtitles.srt', 'generation.log']
if filename not in allowed_files:
logger.warning(f"File download attempt blocked: {filename}")
return "File not allowed", 403
full_path = os.path.join(video_dir, filename)
if not os.path.exists(full_path):
logger.error(f"File not found: {full_path}")
return "File not found", 404
try:
return send_from_directory(video_dir, filename)
except Exception as e:
logger.error(f"Error serving file {full_path}: {e}")
return f"Error serving file: {e}", 500

88
routers/pdf.py Normal file
View file

@ -0,0 +1,88 @@
"""
PDF Export router.
"""
from flask import Blueprint, make_response, render_template, url_for
from db import get_conn
from psycopg2 import extras
from weasyprint import HTML
import logging
import re
from io import BytesIO
logger = logging.getLogger(__name__)
pdf_bp = Blueprint("pdf", __name__, url_prefix="/pdf")
def clean_text(text):
"""Clean text from problematic characters for PDF generation."""
if not text:
return ""
# Remove <unk> tokens
text = text.replace('<unk>', '')
text = text.replace('<EFBFBD>', '')
# Remove other problematic Unicode characters
text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]', '', text)
return text.strip()
@pdf_bp.route("/noticia/<noticia_id>")
def export_noticia(noticia_id):
"""Exportar noticia a PDF."""
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("""
SELECT
n.*,
t.titulo_trad, t.resumen_trad, t.lang_to,
c.nombre as categoria_nombre,
p.nombre as pais_nombre
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.status = 'done' AND t.lang_to = 'es'
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE n.id = %s
""", (noticia_id,))
noticia = cur.fetchone()
if not noticia:
return "Noticia no encontrada", 404
# Prepare data for template
d = dict(noticia)
# Use translated content if available and clean it
titulo = clean_text(d.get('titulo_trad') or d.get('titulo', ''))
resumen = clean_text(d.get('resumen_trad') or d.get('resumen', ''))
# Don't include external images to avoid SSL/network errors
# imagen_url = d.get('imagen_url') if d.get('imagen_url', '').startswith('http') else None
html_content = render_template(
"pdf_template.html",
titulo=titulo,
resumen=resumen,
fecha=d.get('fecha', ''),
fuente=d.get('fuente_nombre', ''), # Esta columna existe directamente en noticias
categoria=d.get('categoria_nombre', ''),
url=d.get('url', ''),
imagen_url=None # Disable images for now to avoid errors
)
# Convert to PDF using WeasyPrint
logger.info(f"Generating PDF for noticia {noticia_id}")
# Create PDF in memory
pdf_file = BytesIO()
HTML(string=html_content).write_pdf(pdf_file)
pdf_bytes = pdf_file.getvalue()
response = make_response(pdf_bytes)
response.headers['Content-Type'] = 'application/pdf'
response.headers['Content-Disposition'] = f'attachment; filename=noticia_{noticia_id}.pdf'
logger.info(f"PDF generated successfully for noticia {noticia_id}")
return response
except Exception as e:
logger.error(f"Error generando PDF para noticia {noticia_id}: {str(e)}", exc_info=True)
return f"Error generando PDF: {str(e)}", 500

76
routers/resumen.py Normal file
View file

@ -0,0 +1,76 @@
"""
Resumen router - Daily summary of news.
"""
from flask import Blueprint, render_template, request
from psycopg2 import extras
from db import get_conn
from datetime import datetime, timedelta
resumen_bp = Blueprint("resumen", __name__, url_prefix="/resumen")
@resumen_bp.route("/")
def diario():
"""Daily summary page."""
# Default to today
date_str = request.args.get("date")
if date_str:
try:
target_date = datetime.strptime(date_str, "%Y-%m-%d").date()
except ValueError:
target_date = datetime.utcnow().date()
else:
target_date = datetime.utcnow().date()
prev_date = target_date - timedelta(days=1)
next_date = target_date + timedelta(days=1)
if next_date > datetime.utcnow().date():
next_date = None
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Fetch top news for the day grouped by category
# We'll limit to 5 per category to keep it concise
cur.execute("""
WITH ranked_news AS (
SELECT
n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, n.fuente_nombre,
c.id as cat_id, c.nombre as categoria,
t.titulo_trad, t.resumen_trad,
ROW_NUMBER() OVER (PARTITION BY n.categoria_id ORDER BY n.fecha DESC) as rn
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN traducciones t ON t.noticia_id = n.id
AND t.lang_to = 'es' AND t.status = 'done'
WHERE n.fecha >= %s AND n.fecha < %s + INTERVAL '1 day'
)
SELECT * FROM ranked_news WHERE rn <= 5 ORDER BY categoria, rn
""", (target_date, target_date))
rows = cur.fetchall()
# Group by category
noticias_by_cat = {}
for r in rows:
cat = r["categoria"] or "Sin Categoría"
if cat not in noticias_by_cat:
noticias_by_cat[cat] = []
noticias_by_cat[cat].append({
"id": r["id"],
"titulo": r["titulo_trad"] or r["titulo"],
"resumen": r["resumen_trad"] or r["resumen"],
"url": r["url"],
"fecha": r["fecha"],
"imagen_url": r["imagen_url"],
"fuente": r["fuente_nombre"]
})
return render_template(
"resumen.html",
noticias_by_cat=noticias_by_cat,
current_date=target_date,
prev_date=prev_date,
next_date=next_date
)

186
routers/rss.py Normal file
View file

@ -0,0 +1,186 @@
"""
RSS Feed router - Generate custom RSS feeds with filters.
"""
from flask import Blueprint, request, Response
from psycopg2 import extras
from db import get_read_conn
from datetime import datetime
import html
rss_bp = Blueprint("rss", __name__, url_prefix="/rss")
def escape_xml(text):
"""Escape text for XML."""
if not text:
return ""
return html.escape(str(text))
def build_rss_xml(title, description, link, items):
"""Build RSS 2.0 XML feed."""
now = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
xml = f'''<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>{escape_xml(title)}</title>
<description>{escape_xml(description)}</description>
<link>{escape_xml(link)}</link>
<lastBuildDate>{now}</lastBuildDate>
<language>es</language>
'''
for item in items:
pub_date = ""
if item.get("fecha"):
try:
pub_date = item["fecha"].strftime("%a, %d %b %Y %H:%M:%S +0000")
except:
pass
xml += f''' <item>
<title>{escape_xml(item.get("titulo", ""))}</title>
<description><![CDATA[{item.get("resumen", "")}]]></description>
<link>{escape_xml(item.get("url", ""))}</link>
<guid isPermaLink="false">{escape_xml(item.get("id", ""))}</guid>
<pubDate>{pub_date}</pubDate>
</item>
'''
xml += '''</channel>
</rss>'''
return xml
@rss_bp.route("/custom")
def custom_feed():
"""
Generate a custom RSS feed with filters.
Query params:
- pais_id: Filter by country ID
- categoria_id: Filter by category ID
- lang: Translation language (default: es)
- limit: Number of items (default: 50, max: 100)
"""
pais_id = request.args.get("pais_id")
categoria_id = request.args.get("categoria_id")
lang = (request.args.get("lang") or "es").lower()[:5]
limit = min(int(request.args.get("limit", 50)), 100)
# Build description based on filters
filters_desc = []
with get_read_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Get filter names for description
if pais_id:
cur.execute("SELECT nombre FROM paises WHERE id = %s", (pais_id,))
row = cur.fetchone()
if row:
filters_desc.append(f"País: {row['nombre']}")
if categoria_id:
cur.execute("SELECT nombre FROM categorias WHERE id = %s", (categoria_id,))
row = cur.fetchone()
if row:
filters_desc.append(f"Categoría: {row['nombre']}")
# Build query
query = """
SELECT
n.id, n.titulo, n.resumen, n.url, n.fecha,
n.imagen_url, n.fuente_nombre,
t.titulo_trad, t.resumen_trad
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id
AND t.lang_to = %s AND t.status = 'done'
WHERE 1=1
"""
params = [lang]
if pais_id:
query += " AND n.pais_id = %s"
params.append(pais_id)
if categoria_id:
query += " AND n.categoria_id = %s"
params.append(categoria_id)
query += " ORDER BY n.fecha DESC LIMIT %s"
params.append(limit)
cur.execute(query, tuple(params))
rows = cur.fetchall()
# Build items
items = []
for r in rows:
items.append({
"id": r["id"],
"titulo": r["titulo_trad"] or r["titulo"],
"resumen": r["resumen_trad"] or r["resumen"] or "",
"url": r["url"],
"fecha": r["fecha"],
})
# Build feed metadata
title = "The Daily Feed"
if filters_desc:
title += " - " + ", ".join(filters_desc)
description = "Noticias personalizadas"
if filters_desc:
description = "Feed personalizado: " + ", ".join(filters_desc)
link = request.host_url.rstrip("/")
xml = build_rss_xml(title, description, link, items)
return Response(xml, mimetype="application/rss+xml")
@rss_bp.route("/favoritos")
def favoritos_feed():
"""Generate RSS feed of user's favorites."""
from routers.favoritos import get_session_id, ensure_favoritos_table
session_id = get_session_id()
with get_read_conn() as conn:
ensure_favoritos_table(conn)
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("""
SELECT n.id, n.titulo, n.resumen, n.url, n.fecha,
t.titulo_trad, t.resumen_trad
FROM favoritos f
JOIN noticias n ON n.id = f.noticia_id
LEFT JOIN traducciones t ON t.noticia_id = n.id
AND t.lang_to = 'es' AND t.status = 'done'
WHERE f.session_id = %s
ORDER BY f.created_at DESC
LIMIT 50;
""", (session_id,))
rows = cur.fetchall()
items = []
for r in rows:
items.append({
"id": r["id"],
"titulo": r["titulo_trad"] or r["titulo"],
"resumen": r["resumen_trad"] or r["resumen"] or "",
"url": r["url"],
"fecha": r["fecha"],
})
xml = build_rss_xml(
"The Daily Feed - Mis Favoritos",
"Noticias guardadas en favoritos",
request.host_url.rstrip("/"),
items
)
return Response(xml, mimetype="application/rss+xml")

257
routers/search.py Normal file
View file

@ -0,0 +1,257 @@
"""
Search API router - Real-time search with semantic search (Qdrant) and autocomplete.
"""
from flask import Blueprint, request, jsonify
from psycopg2 import extras
from db import get_read_conn, get_write_conn
from utils.auth import get_current_user
from utils.qdrant_search import semantic_search
search_bp = Blueprint("search", __name__, url_prefix="/api/search")
@search_bp.route("/")
def search():
"""Search noticias using semantic search (Qdrant) with PostgreSQL fallback."""
q = (request.args.get("q") or "").strip()
limit = min(int(request.args.get("limit", 10)), 50)
page = max(int(request.args.get("page", 1)), 1) # Página actual (1-indexed)
offset = (page - 1) * limit # Calcular offset
lang = (request.args.get("lang") or "es").lower()[:5]
use_semantic = request.args.get("semantic", "true").lower() == "true"
if not q or len(q) < 2:
return jsonify({
"results": [],
"total": 0,
"page": page,
"limit": limit,
"total_pages": 0
})
results = []
total = 0
# Intentar búsqueda semántica primero (más rápida y mejor)
if use_semantic:
try:
# Para paginación, obtenemos más resultados de Qdrant
# Qdrant es muy rápido, así que podemos obtener bastantes resultados
max_qdrant_results = min(offset + limit * 3, 200) # Obtener hasta 3 páginas adelante
semantic_results = semantic_search(
query=q,
limit=max_qdrant_results,
score_threshold=0.3 # Umbral más bajo para capturar más resultados
)
if semantic_results:
# Calcular total encontrado (hasta el límite de fetching)
total = len(semantic_results)
# Obtener solo los resultados de la página actual
page_results = semantic_results[offset : offset + limit]
if page_results:
# Enriquecer con datos adicionales de PostgreSQL solo para esta página
news_ids = [r['news_id'] for r in page_results]
with get_read_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Obtener datos adicionales (categoría, país)
cur.execute("""
SELECT
n.id,
n.titulo,
n.resumen,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais,
t.titulo_trad,
t.resumen_trad,
t.id AS traduccion_id
FROM noticias n
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t ON t.noticia_id = n.id
AND t.lang_to = %s AND t.status = 'done'
WHERE n.id = ANY(%s)
""", (lang, news_ids))
db_rows = {row['id']: row for row in cur.fetchall()}
# Combinar resultados semánticos con datos de PostgreSQL
for sem_result in page_results:
news_id = sem_result['news_id']
db_row = db_rows.get(news_id)
if db_row:
results.append({
"id": db_row["id"],
"titulo": db_row["titulo_trad"] or db_row["titulo"],
"resumen": (db_row["resumen_trad"] or db_row["resumen"] or "")[:150],
"url": db_row["url"],
"fecha": db_row["fecha"].isoformat() if db_row["fecha"] else None,
"imagen_url": db_row["imagen_url"],
"fuente": db_row["fuente_nombre"],
"categoria": db_row["categoria"],
"pais": db_row["pais"],
"traduccion_id": db_row["traduccion_id"],
"semantic_score": sem_result['score'],
"fecha_raw": db_row["fecha"] # Para ordenación
})
# Ordenar por fecha cronológicamente (más reciente primero)
results.sort(key=lambda x: x.get("fecha_raw") or "", reverse=True)
# Eliminar el campo temporal usado para ordenación
for r in results:
r.pop("fecha_raw", None)
except Exception as e:
print(f"⚠️ Error en búsqueda semántica, usando fallback: {e}")
import traceback
traceback.print_exc()
# Continuar con búsqueda tradicional
# Fallback a búsqueda tradicional si no hay resultados semánticos y no hubo error fatal
if not results and total == 0:
with get_read_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
print(f"⚠️ Usando fallback PostgreSQL para búsqueda: '{q}'")
# Búsqueda tradicional optimizada usando Full Text Search
# Nota: Esta query es más lenta que Qdrant pero necesaria como fallback
cur.execute("""
WITH ranked_news AS (
-- Búsqueda en noticias originales
SELECT
n.id,
ts_rank(n.search_vector_es, websearch_to_tsquery('spanish', %s)) as rank
FROM noticias n
WHERE n.search_vector_es @@ websearch_to_tsquery('spanish', %s)
UNION ALL
-- Búsqueda en traducciones
SELECT
t.noticia_id as id,
ts_rank(t.search_vector_es, websearch_to_tsquery('spanish', %s)) as rank
FROM traducciones t
WHERE t.search_vector_es @@ websearch_to_tsquery('spanish', %s)
AND t.lang_to = 'es'
AND t.status = 'done'
),
best_ranks AS (
SELECT id, MAX(rank) as max_rank
FROM ranked_news
GROUP BY id
)
SELECT
n.id,
n.titulo,
n.resumen,
n.url,
n.fecha,
n.imagen_url,
n.fuente_nombre,
c.nombre AS categoria,
p.nombre AS pais,
t.titulo_trad,
t.resumen_trad,
t.id AS traduccion_id,
br.max_rank AS rank
FROM best_ranks br
JOIN noticias n ON n.id = br.id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
LEFT JOIN traducciones t ON t.noticia_id = n.id
AND t.lang_to = %s AND t.status = 'done'
ORDER BY n.fecha DESC, br.max_rank DESC
LIMIT %s OFFSET %s
""", (q, q, q, q, lang, limit, offset))
rows = cur.fetchall()
print(f"✅ PostgreSQL retornó {len(rows)} resultados")
# Count total - Query simplificada
cur.execute("""
SELECT COUNT(DISTINCT id) FROM (
SELECT id FROM noticias
WHERE search_vector_es @@ websearch_to_tsquery('spanish', %s)
UNION
SELECT noticia_id as id FROM traducciones
WHERE search_vector_es @@ websearch_to_tsquery('spanish', %s)
AND lang_to = 'es' AND status = 'done'
) as all_hits
""", (q, q))
total_row = cur.fetchone()
total = total_row[0] if total_row else 0
for r in rows:
results.append({
"id": r["id"],
"titulo": r["titulo_trad"] or r["titulo"],
"resumen": (r["resumen_trad"] or r["resumen"] or "")[:150],
"url": r["url"],
"fecha": r["fecha"].isoformat() if r["fecha"] else None,
"imagen_url": r["imagen_url"],
"fuente": r["fuente_nombre"],
"categoria": r["categoria"],
"pais": r["pais"],
"traduccion_id": r["traduccion_id"],
})
# Save search history for authenticated users
user = get_current_user()
if user and q and page == 1: # Solo guardar en página 1
try:
with get_write_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO search_history (user_id, query, results_count)
VALUES (%s, %s, %s)
""", (user['id'], q, total))
conn.commit()
except Exception as e:
print(f"ERROR SAVING SEARCH HISTORY: {e}")
pass
total_pages = (total + limit - 1) // limit if limit > 0 else 0
return jsonify({
"results": results,
"total": total,
"query": q,
"page": page,
"limit": limit,
"total_pages": total_pages
})
@search_bp.route("/suggestions")
def suggestions():
"""Get search suggestions based on recent/popular searches and tags."""
q = (request.args.get("q") or "").strip()
limit = min(int(request.args.get("limit", 5)), 10)
if not q or len(q) < 2:
return jsonify({"suggestions": []})
with get_read_conn() as conn:
with conn.cursor() as cur:
# Get matching tags as suggestions
cur.execute("""
SELECT DISTINCT valor
FROM tags
WHERE valor ILIKE %s
ORDER BY valor
LIMIT %s;
""", (f"%{q}%", limit))
suggestions = [row[0] for row in cur.fetchall()]
return jsonify({"suggestions": suggestions, "query": q})

911
routers/stats.py Normal file
View file

@ -0,0 +1,911 @@
from flask import Blueprint, render_template, jsonify
from db import get_read_conn
from datetime import datetime, timedelta
import os
import subprocess
import time
from cache import cached
stats_bp = Blueprint("stats", __name__, url_prefix="/stats")
# ==================================================================================
# ENTITY NORMALIZATION SYSTEM
# ==================================================================================
# Dictionary to map entity name variations to canonical names
import json
CONFIG_FILE = "entity_config.json"
_config_cache = {"data": None, "mtime": 0}
def load_entity_config():
"""Load entity config from JSON file with simple modification time caching."""
global _config_cache
try:
# Check if file exists
if not os.path.exists(CONFIG_FILE):
return {"blacklist": [], "synonyms": {}}
# Check modification time
mtime = os.path.getmtime(CONFIG_FILE)
if _config_cache["data"] is not None and mtime <= _config_cache["mtime"]:
return _config_cache["data"]
# Load fresh config
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
# Normalize structure
if "blacklist" not in data: data["blacklist"] = []
if "synonyms" not in data: data["synonyms"] = {}
# Pre-process synonyms for reverse lookup (variation -> canonical)
lookup = {}
for canonical, variations in data["synonyms"].items():
lookup[canonical.lower()] = canonical # Map canonical to itself
for var in variations:
lookup[var.lower()] = canonical
data["_lookup"] = lookup
data["_blacklist_set"] = {x.lower() for x in data["blacklist"]}
_config_cache = {"data": data, "mtime": mtime}
return data
except Exception as e:
print(f"Error loading entity config: {e}")
# Return fallback or previous cache if available
return _config_cache["data"] if _config_cache["data"] else {"blacklist": [], "synonyms": {}}
def normalize_entity_name(name: str, config=None) -> str:
"""Normalize entity name to its canonical form."""
if config is None:
config = load_entity_config()
lookup = config.get("_lookup", {})
return lookup.get(name.lower(), name)
def aggregate_normalized_entities(rows, entity_type='persona'):
"""Aggregate entity counts by normalized names and filter blacklisted items.
Args:
rows: List of (name, count) tuples from database
entity_type: Type of entity for normalization (kept for compatibility but config is global now)
Returns:
List of (normalized_name, total_count) tuples sorted by count
"""
aggregated = {}
config = load_entity_config()
blacklist = config.get("_blacklist_set", set())
for name, count in rows:
# 1. Check blacklist (exact or lower match)
if name.lower() in blacklist:
continue
# 2. Normalize
normalized = normalize_entity_name(name, config)
# 3. Check blacklist again (in case canonical name is blacklisted)
if normalized.lower() in blacklist:
continue
aggregated[normalized] = aggregated.get(normalized, 0) + count
# Sort by count descending
sorted_items = sorted(aggregated.items(), key=lambda x: x[1], reverse=True)
return sorted_items
# ==================================================================================
@stats_bp.route("/")
def index():
"""Stats dashboard page."""
# Calculate translation stats for the banner
with get_read_conn() as conn:
with conn.cursor() as cur:
# Translations per minute (last 5 minutes)
cur.execute("""
SELECT COUNT(*) FROM traducciones
WHERE status = 'done'
AND created_at > NOW() - INTERVAL '5 minutes'
""")
recent_5min = cur.fetchone()[0]
translations_per_min = round(recent_5min / 5, 1) if recent_5min else 0
# Status counts
cur.execute("SELECT COUNT(*) FROM traducciones WHERE status = 'done'")
traducciones_count = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM traducciones WHERE status = 'pending'")
pending_count = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM traducciones WHERE status = 'processing'")
processing_count = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM traducciones WHERE status = 'error'")
error_count = cur.fetchone()[0]
# Total noticias (exact count - cached for 5 min in view)
cur.execute("SELECT COUNT(*) FROM noticias")
noticias_count = cur.fetchone()[0] or 0
# News ingested today
cur.execute("""
SELECT COUNT(*) FROM noticias
WHERE DATE(fecha) = CURRENT_DATE
""")
noticias_hoy = cur.fetchone()[0] or 0
# News ingested in the last hour
cur.execute("""
SELECT COUNT(*) FROM noticias
WHERE fecha >= NOW() - INTERVAL '1 hour'
""")
noticias_ultima_hora = cur.fetchone()[0] or 0
return render_template("stats.html",
translations_per_min=translations_per_min,
noticias_count=noticias_count,
traducciones_count=traducciones_count,
pending_count=pending_count,
processing_count=processing_count,
error_count=error_count,
noticias_hoy=noticias_hoy,
noticias_ultima_hora=noticias_ultima_hora)
@stats_bp.route("/api/activity")
@cached(ttl_seconds=300, prefix="stats")
def activity_data():
"""Get activity data (news count) for the specified range."""
from flask import request
range_param = request.args.get("range", "30d")
# Default: 30d -> group by day
days = 30
minutes = 0
interval_sql = "day" # For date_trunc or casting
timedelta_step = timedelta(days=1)
date_format = "%Y-%m-%d"
if range_param == "1h":
minutes = 60
interval_sql = "minute"
timedelta_step = timedelta(minutes=1)
date_format = "%H:%M"
elif range_param == "8h":
minutes = 480
interval_sql = "minute"
timedelta_step = timedelta(minutes=1)
date_format = "%H:%M"
elif range_param == "1d": # Alias for 24h
minutes = 1440
interval_sql = "hour"
timedelta_step = timedelta(hours=1)
date_format = "%H:%M"
elif range_param == "24h":
minutes = 1440
interval_sql = "hour"
timedelta_step = timedelta(hours=1)
date_format = "%H:%M"
elif range_param == "7d":
minutes = 10080
interval_sql = "hour"
timedelta_step = timedelta(hours=1)
# Include Month-Day for 7d context
date_format = "%d %H:%M"
elif range_param == "30d":
# Specific existing logic uses date casting, we can adapt
minutes = 0
days = 30
interval_sql = "day"
timedelta_step = timedelta(days=1)
date_format = "%Y-%m-%d"
# Calculate start time
if minutes > 0:
start_time = datetime.utcnow() - timedelta(minutes=minutes)
# Using timestamp column directly
date_column = "fecha"
else:
start_time = datetime.utcnow() - timedelta(days=days)
# For 30d we might just use date part start
start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0)
date_column = "fecha"
with get_read_conn() as conn:
with conn.cursor() as cur:
# Construct query based on interval
if interval_sql == "day":
# Original logic style for 30d, but generalized
cur.execute("""
SELECT
fecha::date as time_slot,
COUNT(*) as count
FROM noticias
WHERE fecha >= %s
GROUP BY time_slot
ORDER BY time_slot
""", (start_time,))
else:
# Granular logic
cur.execute(f"""
SELECT
date_trunc('{interval_sql}', fecha) as time_slot,
COUNT(*) as count
FROM noticias
WHERE fecha >= %s
GROUP BY time_slot
ORDER BY time_slot
""", (start_time,))
rows = cur.fetchall()
# Fill gaps
data_map = {row[0]: row[1] for row in rows}
labels = []
data = []
# Iterate with step
if minutes > 0:
# Granular start alignment
current = start_time.replace(second=0, microsecond=0)
if interval_sql == "hour":
current = current.replace(minute=0)
end = datetime.utcnow().replace(second=0, microsecond=0)
if interval_sql == "hour":
end = end.replace(minute=0) + timedelta(hours=1)
else:
# Daily start alignment
current = start_time.date() if isinstance(start_time, datetime) else start_time
end = datetime.utcnow().date()
while current <= end:
# Format label
labels.append(current.strftime(date_format))
# Lookup key can be date or datetime depending on query
# DB returns date for ::date and datetime for date_trunc
# Let's handle both lookup types safely
lookup_key = current
# API might have mismatch if current is date object and DB returned datetime or vice versa
# rows[0] is date object for 'day', datetime for 'minute'/'hour'
val = data_map.get(lookup_key, 0)
# Fallback if types don't match exactly (datetime vs date) - unlikely if logic is consistent but good to check
if val == 0 and isinstance(lookup_key, datetime) and interval_sql == 'day':
val = data_map.get(lookup_key.date(), 0)
data.append(val)
current += timedelta_step
return jsonify({
"labels": labels,
"data": data
})
@stats_bp.route("/api/categories")
@cached(ttl_seconds=300, prefix="stats")
def categories_data():
"""Get news count per category (Top 8 + Others)."""
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT
c.nombre,
COUNT(n.id) as count
FROM noticias n
JOIN categorias c ON c.id = n.categoria_id
GROUP BY c.nombre
ORDER BY count DESC
""")
rows = cur.fetchall()
# Process Top 8 + Others
labels = []
data = []
others_count = 0
top_limit = 8
for i, row in enumerate(rows):
if i < top_limit:
labels.append(row[0])
data.append(row[1])
else:
others_count += row[1]
if others_count > 0:
labels.append("Otros")
data.append(others_count)
return jsonify({
"labels": labels,
"data": data
})
@stats_bp.route("/api/countries")
@cached(ttl_seconds=300, prefix="stats")
def countries_data():
"""Get news count per country (Top 10 + Others)."""
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT
p.nombre,
COUNT(n.id) as count
FROM noticias n
JOIN paises p ON p.id = n.pais_id
GROUP BY p.nombre
ORDER BY count DESC
""")
rows = cur.fetchall()
# Process Top 10 + Others
labels = []
data = []
others_count = 0
top_limit = 10
for i, row in enumerate(rows):
if i < top_limit:
labels.append(row[0])
data.append(row[1])
else:
others_count += row[1]
return jsonify({
"labels": labels,
"data": data
})
@stats_bp.route("/api/countries/list")
def countries_list():
"""Get alphabetical list of all countries with flags."""
from utils import country_flag
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute("SELECT nombre FROM paises ORDER BY nombre ASC")
rows = cur.fetchall()
return jsonify([
{"name": row[0], "flag": country_flag(row[0])}
for row in rows
])
@stats_bp.route("/api/translations/activity")
def translations_activity_data():
"""Get translation count per day for the last 30 days."""
days = 30
start_date = (datetime.utcnow() - timedelta(days=days)).date()
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT
created_at::date as day,
COUNT(*) as count
FROM traducciones
WHERE created_at >= %s
GROUP BY day
ORDER BY day
""", (start_date,))
rows = cur.fetchall()
# Fill gaps
data_map = {row[0]: row[1] for row in rows}
labels = []
data = []
current = start_date
end = datetime.utcnow().date()
while current <= end:
labels.append(current.strftime("%Y-%m-%d"))
data.append(data_map.get(current, 0))
current += timedelta(days=1)
return jsonify({
"labels": labels,
"data": data
})
@stats_bp.route("/api/translations/languages")
@cached(ttl_seconds=60, prefix="stats")
def translations_languages_data():
"""Get translation count per source language."""
# Friendly names for common languages
LANG_NAMES = {
'en': 'Inglés',
'es': 'Español',
'fr': 'Francés',
'de': 'Alemán',
'it': 'Italiano',
'pt': 'Portugués',
'ru': 'Ruso',
'zh': 'Chino',
'ja': 'Japonés',
'ar': 'Árabe'
}
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute("""
SELECT
lang_from,
COUNT(*) as count
FROM translation_stats
WHERE lang_from IS NOT NULL
GROUP BY lang_from
ORDER BY count DESC
""")
rows = cur.fetchall()
labels = []
data = []
for code, count in rows:
code = code.strip().lower()
labels.append(LANG_NAMES.get(code, code.upper()))
data.append(count)
return jsonify({
"labels": labels,
"data": data
})
def get_system_uptime():
try:
with open('/proc/uptime', 'r') as f:
uptime_seconds = float(f.readline().split()[0])
days = int(uptime_seconds // (24 * 3600))
hours = int((uptime_seconds % (24 * 3600)) // 3600)
minutes = int((uptime_seconds % 3600) // 60)
if days > 0:
return f"{days}d {hours}h {minutes}m"
return f"{hours}h {minutes}m"
except:
return "N/A"
def get_gpu_info():
try:
cmd = "nvidia-smi --query-gpu=name,temperature.gpu,utilization.gpu,memory.used,memory.total --format=csv,noheader,nounits"
with open(os.devnull, 'w') as devnull:
res = subprocess.check_output(cmd, shell=True, stderr=devnull).decode().strip()
parts = [p.strip() for p in res.split(',')]
if len(parts) >= 5:
return {
"name": parts[0],
"temp": f"{parts[1]}°C",
"util": f"{parts[2]}%",
"mem": f"{parts[3]} MB / {parts[4]} MB"
}
except:
pass
return None
def get_cpu_info():
try:
load = os.getloadavg()
cores = os.cpu_count()
return {
"load": f"{load[0]:.2f}, {load[1]:.2f}, {load[2]:.2f}",
"cores": cores
}
except:
return None
@stats_bp.route("/api/system/info")
def system_info_api():
"""Endpoint for real-time system monitoring."""
return jsonify({
"uptime": get_system_uptime(),
"gpu": get_gpu_info(),
"cpu": get_cpu_info(),
"timestamp": datetime.now().strftime("%H:%M:%S")
})
@stats_bp.route("/api/translations/rate")
@cached(ttl_seconds=60, prefix="stats")
def translations_rate_data():
"""Get translation count for the specified range (1h, 8h, 24h, 7d)."""
# Parameters
from flask import request
range_param = request.args.get("range", "1h")
# Default: 1h -> group by minute
minutes = 60
interval_sql = "minute"
timedelta_step = timedelta(minutes=1)
date_format = "%H:%M"
if range_param == "8h":
minutes = 8 * 60
interval_sql = "minute" # Still group by minute for detailed graph? Or 5 mins?
# Let's simple group by minute but it might be dense. 480 points. Fine.
timedelta_step = timedelta(minutes=1)
date_format = "%H:%M"
elif range_param == "24h":
minutes = 24 * 60
# Group by 15 minutes? Postgres: date_trunc('hour', ...) or extract?
# Let's use custom grouping? Or simple 'hour' is too granular? 1440 mins.
# Let's group by hour for 24h to be safe/clean
interval_sql = "hour"
timedelta_step = timedelta(hours=1)
date_format = "%H:%M"
elif range_param == "7d":
minutes = 7 * 24 * 60
interval_sql = "hour" # 7 * 24 = 168 points
timedelta_step = timedelta(hours=1)
date_format = "%Y-%m-%d %H:%M"
start_time = datetime.utcnow() - timedelta(minutes=minutes)
with get_read_conn() as conn:
with conn.cursor() as cur:
# Query translation_stats instead of traducciones
cur.execute(f"""
SELECT
date_trunc('{interval_sql}', created_at) as time_slot,
COUNT(*) as count
FROM translation_stats
WHERE created_at >= %s
GROUP BY time_slot
ORDER BY time_slot
""", (start_time,))
rows = cur.fetchall()
# Fill gaps
data_map = {row[0]: row[1] for row in rows}
labels = []
data = []
# Iterate by step
# Align start_time to step if possible (lazy alignment)
current = start_time.replace(second=0, microsecond=0)
if interval_sql == "hour":
current = current.replace(minute=0)
end = datetime.utcnow().replace(second=0, microsecond=0)
if interval_sql == "hour":
end = end.replace(minute=0) + timedelta(hours=1) # Ensure we cover current partial hour
while current <= end:
labels.append(current.strftime(date_format))
data.append(data_map.get(current, 0))
current += timedelta_step
return jsonify({
"labels": labels,
"data": data
})
@stats_bp.route("/entities")
def entities_dashboard():
"""Dashboard for Named Entities statistics."""
return render_template("stats_entities.html")
@stats_bp.route("/api/entities/people")
def entities_people():
"""Top 25 mentioned people, optionally filtered by country and/or date."""
from flask import request
from datetime import datetime
from cache import cache_get, cache_set
# 1. Check config mtime for cache invalidation
try:
config_mtime = os.path.getmtime(CONFIG_FILE)
except OSError:
config_mtime = 0
country_filter = request.args.get("country")
date_filter = request.args.get("date")
# 2. Build cache key with mtime
cache_key = f"entities:people:{country_filter}:{date_filter}:{config_mtime}"
# 3. Try cache
cached_data = cache_get(cache_key)
if cached_data:
return jsonify(cached_data)
# Determine time range
if date_filter:
# Single day query
try:
target_date = datetime.strptime(date_filter, "%Y-%m-%d").date()
time_condition = "DATE(tr.created_at) = %s"
time_params = [target_date]
except ValueError:
# Invalid date format, fallback to 30 days
time_condition = "tr.created_at >= NOW() - INTERVAL '30 days'"
time_params = []
else:
# Default: last 30 days
time_condition = "tr.created_at >= NOW() - INTERVAL '30 days'"
time_params = []
if country_filter and country_filter != 'global':
# Filtered by country
query = f"""
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
WHERE t.tipo = 'persona'
AND {time_condition}
AND n.pais_id = (SELECT id FROM paises WHERE nombre ILIKE %s LIMIT 1)
GROUP BY t.valor
ORDER BY menciones DESC
"""
params = tuple(time_params + [country_filter])
else:
# Global view
query = f"""
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
WHERE t.tipo = 'persona'
AND {time_condition}
GROUP BY t.valor
ORDER BY menciones DESC
"""
params = tuple(time_params)
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute(query, params)
rows = cur.fetchall()
# Normalize and aggregate
normalized_rows = aggregate_normalized_entities(rows, entity_type='persona')
# Take top 50
top_50 = normalized_rows[:50]
# Enrich with Wikipedia Images (Parallel Execution)
from concurrent.futures import ThreadPoolExecutor
from utils.wiki import fetch_wiki_data
images = []
summaries = []
def get_image_safe(name):
try:
return fetch_wiki_data(name)
except Exception:
return None, None
if top_50:
names = [row[0] for row in top_50]
with ThreadPoolExecutor(max_workers=10) as executor:
try:
results = list(executor.map(get_image_safe, names))
# Unpack results
for img, smry in results:
images.append(img)
summaries.append(smry)
except Exception as e:
import logging
logging.error(f"Error fetching wiki data: {e}")
# Fallback to empty if threading fails
images = [None] * len(names)
summaries = [None] * len(names)
else:
images = []
summaries = []
result = {
"labels": [row[0] for row in top_50],
"data": [row[1] for row in top_50],
"images": images,
"summaries": summaries
}
# 4. Set cache
cache_set(cache_key, result, ttl_seconds=600)
return jsonify(result)
@stats_bp.route("/api/entities/orgs")
def entities_orgs():
"""Top mentioned organizations, optionally filtered by country."""
from flask import request
from cache import cache_get, cache_set
country_filter = request.args.get("country")
try:
config_mtime = os.path.getmtime(CONFIG_FILE)
except OSError:
config_mtime = 0
cache_key = f"entities:orgs:{country_filter}:{config_mtime}"
cached_data = cache_get(cache_key)
if cached_data:
return jsonify(cached_data)
if country_filter and country_filter != 'global':
query = """
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
WHERE t.tipo = 'organizacion'
AND tr.created_at >= NOW() - INTERVAL '30 days'
AND n.pais_id = (SELECT id FROM paises WHERE nombre ILIKE %s LIMIT 1)
GROUP BY t.valor
ORDER BY menciones DESC
LIMIT 50
"""
params = (country_filter,)
else:
query = """
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
WHERE t.tipo = 'organizacion'
AND tr.created_at >= NOW() - INTERVAL '30 days'
GROUP BY t.valor
ORDER BY menciones DESC
LIMIT 50
"""
params = ()
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute(query, params)
rows = cur.fetchall()
normalized_rows = aggregate_normalized_entities(rows, entity_type='organizacion')
# Enrich with Wikipedia Images
from concurrent.futures import ThreadPoolExecutor
from utils.wiki import fetch_wiki_data
images = []
summaries = []
def get_info_safe(name):
try:
return fetch_wiki_data(name)
except Exception:
return None, None
if normalized_rows:
names = [row[0] for row in normalized_rows]
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(get_info_safe, names))
for img, smry in results:
images.append(img)
summaries.append(smry)
result = {
"labels": [row[0] for row in normalized_rows],
"data": [row[1] for row in normalized_rows],
"images": images,
"summaries": summaries
}
cache_set(cache_key, result, ttl_seconds=600)
return jsonify(result)
@stats_bp.route("/api/entities/places")
def entities_places():
"""Top mentioned places, optionally filtered by country."""
from flask import request
from cache import cache_get, cache_set
country_filter = request.args.get("country")
try:
config_mtime = os.path.getmtime(CONFIG_FILE)
except OSError:
config_mtime = 0
cache_key = f"entities:places:{country_filter}:{config_mtime}"
cached_data = cache_get(cache_key)
if cached_data:
return jsonify(cached_data)
if country_filter and country_filter != 'global':
query = """
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
WHERE t.tipo = 'lugar'
AND tr.created_at >= NOW() - INTERVAL '30 days'
AND n.pais_id = (SELECT id FROM paises WHERE nombre ILIKE %s LIMIT 1)
AND n.pais_id != (SELECT id FROM paises WHERE nombre = 'España')
GROUP BY t.valor
ORDER BY menciones DESC
LIMIT 50
"""
params = (country_filter,)
else:
query = """
SELECT t.valor, COUNT(*) as menciones
FROM tags t
JOIN tags_noticia tn ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
WHERE t.tipo = 'lugar'
AND tr.created_at >= NOW() - INTERVAL '30 days'
AND n.pais_id != (SELECT id FROM paises WHERE nombre = 'España')
GROUP BY t.valor
ORDER BY menciones DESC
LIMIT 50
"""
params = ()
with get_read_conn() as conn:
with conn.cursor() as cur:
cur.execute(query, params)
rows = cur.fetchall()
# Normalize
normalized_rows = aggregate_normalized_entities(rows, entity_type='lugar')
# Enrich with Wikipedia Images
from concurrent.futures import ThreadPoolExecutor
from utils.wiki import fetch_wiki_data
images = []
summaries = []
def get_info_safe(name):
try:
return fetch_wiki_data(name)
except Exception:
return None, None
if normalized_rows:
names = [row[0] for row in normalized_rows]
with ThreadPoolExecutor(max_workers=10) as executor:
results = list(executor.map(get_info_safe, names))
for img, smry in results:
images.append(img)
summaries.append(smry)
result = {
"labels": [row[0] for row in normalized_rows],
"data": [row[1] for row in normalized_rows],
"images": images,
"summaries": summaries
}
cache_set(cache_key, result, ttl_seconds=600)
return jsonify(result)

81
routers/topics.py Normal file
View file

@ -0,0 +1,81 @@
from flask import Blueprint, render_template, request
from db import get_read_conn
from psycopg2 import extras
import datetime
topics_bp = Blueprint("topics", __name__, url_prefix="/topics")
@topics_bp.route("/")
def monitor():
# Monitor de Impacto por País
days = int(request.args.get("days", 3))
with get_read_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Ranking de Países por "Calor" (Suma de scores de noticias recientes)
cur.execute("""
SELECT p.id, p.nombre,
COUNT(DISTINCT n.id) as news_count,
SUM(nt.score) as total_impact
FROM paises p
JOIN noticias n ON n.pais_id = p.id
JOIN news_topics nt ON nt.noticia_id = n.id
WHERE n.fecha > NOW() - INTERVAL '%s days'
GROUP BY p.id, p.nombre
HAVING SUM(nt.score) > 0
ORDER BY total_impact DESC
LIMIT 50;
""", (days,))
countries = cur.fetchall()
return render_template("monitor_list.html", countries=countries, days=days)
@topics_bp.route("/country/<int:pais_id>")
def country_detail(pais_id):
days = int(request.args.get("days", 3))
with get_read_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
# Info País
cur.execute("SELECT * FROM paises WHERE id = %s", (pais_id,))
pais = cur.fetchone()
if not pais:
return "País no encontrado", 404
# Top Noticias de Impacto (Últimos días)
# News with their highest topic score sum
cur.execute("""
SELECT n.id,
COALESCE(t.titulo_trad, n.titulo) as titulo,
COALESCE(t.resumen_trad, n.resumen) as resumen,
n.fecha, n.imagen_url, n.fuente_nombre, n.url,
SUM(nt.score) as impact_score
FROM noticias n
JOIN news_topics nt ON nt.noticia_id = n.id
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' AND t.status = 'done'
WHERE n.pais_id = %s
AND n.fecha > NOW() - INTERVAL '%s days'
GROUP BY n.id, n.titulo, n.resumen, n.fecha, n.imagen_url, n.fuente_nombre, n.url, t.titulo_trad, t.resumen_trad
ORDER BY impact_score DESC
LIMIT 20;
""", (pais_id, days))
top_news = cur.fetchall()
# Temas Activos en este País
cur.execute("""
SELECT t.name, SUM(nt.score) as topic_volume
FROM topics t
JOIN news_topics nt ON nt.topic_id = t.id
JOIN noticias n ON n.id = nt.noticia_id
WHERE n.pais_id = %s
AND n.fecha > NOW() - INTERVAL '%s days'
GROUP BY t.id, t.name
ORDER BY topic_volume DESC
LIMIT 10;
""", (pais_id, days))
active_topics = cur.fetchall()
return render_template("monitor_detail.html",
pais=pais,
news=top_news,
active_topics=active_topics,
days=days)

59
routers/traducciones.py Normal file
View file

@ -0,0 +1,59 @@
from flask import Blueprint, render_template, request
from db import get_read_conn
traducciones_bp = Blueprint("traducciones", __name__)
@traducciones_bp.route("/traducciones")
def ultimas_traducciones():
"""Muestra las últimas noticias traducidas."""
page = max(int(request.args.get("page", 1)), 1)
per_page = min(max(int(request.args.get("per_page", 20)), 10), 100)
offset = (page - 1) * per_page
with get_read_conn() as conn:
conn.autocommit = True
with conn.cursor() as cur:
# Total count
cur.execute("""
SELECT COUNT(*) FROM traducciones WHERE status = 'done'
""")
total = cur.fetchone()[0]
# Fetch latest translations
cur.execute("""
SELECT
t.id,
t.noticia_id,
t.titulo_trad,
t.resumen_trad,
t.lang_from,
t.lang_to,
t.created_at AS updated_at,
n.url AS link,
n.imagen_url AS imagen,
n.fuente_nombre AS feed_nombre,
c.nombre AS categoria_nombre,
p.nombre AS pais_nombre
FROM traducciones t
JOIN noticias n ON n.id = t.noticia_id
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE t.status = 'done'
ORDER BY t.created_at DESC
LIMIT %s OFFSET %s
""", (per_page, offset))
columns = [desc[0] for desc in cur.description]
traducciones = [dict(zip(columns, row)) for row in cur.fetchall()]
total_pages = (total + per_page - 1) // per_page
return render_template(
"traducciones.html",
traducciones=traducciones,
page=page,
per_page=per_page,
total=total,
total_pages=total_pages,
)

81
routers/urls.py Normal file
View file

@ -0,0 +1,81 @@
from flask import Blueprint, render_template, request, redirect, flash, url_for
from psycopg2 import extras
from db import get_conn
from models.categorias import get_categorias
from models.paises import get_paises
urls_bp = Blueprint("urls", __name__, url_prefix="/urls")
@urls_bp.route("/")
def manage_urls():
with get_conn() as conn, conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute(
"""
SELECT fu.id, fu.nombre, fu.url,
c.nombre AS categoria,
p.nombre AS pais,
fu.idioma,
fu.last_check,
fu.last_status,
fu.status_message,
fu.last_http_code,
COALESCE((
SELECT COUNT(*)
FROM noticias n
JOIN feeds f ON n.fuente_nombre = f.nombre
WHERE f.fuente_url_id = fu.id
), 0) as noticias_count
FROM fuentes_url fu
LEFT JOIN categorias c ON c.id=fu.categoria_id
LEFT JOIN paises p ON p.id=fu.pais_id
ORDER BY fu.nombre;
"""
)
fuentes = cur.fetchall()
return render_template("urls_list.html", fuentes=fuentes)
@urls_bp.route("/add_source", methods=["GET", "POST"])
def add_url_source():
with get_conn() as conn:
categorias = get_categorias(conn)
paises = get_paises(conn)
if request.method == "POST":
nombre = request.form.get("nombre")
url = request.form.get("url")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = (request.form.get("idioma", "es") or "es").strip().lower()[:2]
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (url) DO UPDATE
SET nombre=EXCLUDED.nombre,
categoria_id=EXCLUDED.categoria_id,
pais_id=EXCLUDED.pais_id,
idioma=EXCLUDED.idioma;
""",
(
nombre,
url,
int(categoria_id) if categoria_id else None,
int(pais_id) if pais_id else None,
idioma,
),
)
conn.commit()
flash("Fuente añadida/actualizada.", "success")
return redirect(url_for("urls.manage_urls"))
except Exception as e:
flash(f"Error: {e}", "error")
return render_template("add_url_source.html", categorias=categorias, paises=paises)