Migración a PostgreSQL, fix idioma y campos feeds

This commit is contained in:
jlimolina 2025-05-30 16:13:58 +02:00
parent 34f60011a3
commit 72dd972352
4 changed files with 242 additions and 141 deletions

341
app.py
View file

@ -1,59 +1,85 @@
# -*- coding: utf-8 -*-
"""Flask RSS aggregator — versión PostgreSQL
Cambios principales respecto al original (MySQL):
- mysql.connector psycopg2
- DB_CONFIG con claves PostgreSQL
- Función auxiliar get_conn() para abrir conexiones
- Reemplazo de INSERT IGNORE / ON DUPLICATE KEY UPDATE por ON CONFLICT
"""
from flask import Flask, render_template, request, redirect, url_for, Response
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
import feedparser
import hashlib
import re
import mysql.connector
import psycopg2
import psycopg2.extras
import csv
from io import StringIO
app = Flask(__name__)
# ---------------------------------------------------------------------------
# Configuración de la base de datos PostgreSQL
# ---------------------------------------------------------------------------
DB_CONFIG = {
'host': 'localhost',
'user': 'x',
'password': 'x',
'database': 'noticiasrss'
"host": "localhost",
"port": 5432,
"dbname": "rss",
"user": "rss",
"password": "x",
}
def get_conn():
"""Devuelve una conexión nueva usando psycopg2 y el diccionario DB_CONFIG."""
return psycopg2.connect(**DB_CONFIG)
MAX_FALLOS = 5 # Número máximo de fallos antes de desactivar el feed
# ======================================
# Página principal: últimas noticias
# ======================================
@app.route('/')
@app.route("/")
def home():
conn = None
noticias = []
categorias = []
continentes = []
paises = []
cat_id = request.args.get('categoria_id')
cont_id = request.args.get('continente_id')
pais_id = request.args.get('pais_id')
cat_id = request.args.get("categoria_id")
cont_id = request.args.get("continente_id")
pais_id = request.args.get("pais_id")
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
cursor.execute("SELECT id, nombre FROM categorias_estandar ORDER BY nombre")
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cursor.fetchall()
cursor.execute("SELECT id, nombre FROM continentes ORDER BY nombre")
continentes = cursor.fetchall()
if cont_id:
cursor.execute("SELECT id, nombre, continente_id FROM paises WHERE continente_id = %s ORDER BY nombre", (cont_id,))
cursor.execute(
"SELECT id, nombre, continente_id FROM paises WHERE continente_id = %s ORDER BY nombre",
(cont_id,),
)
else:
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
paises = cursor.fetchall()
sql = """
sql = (
"""
SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url,
c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente
FROM noticias n
LEFT JOIN categorias_estandar c ON n.categoria_id = c.id
LEFT JOIN categorias c ON n.categoria_id = c.id
LEFT JOIN paises p ON n.pais_id = p.id
LEFT JOIN continentes co ON p.continente_id = co.id
WHERE 1=1
"""
)
params = []
if cat_id:
sql += " AND n.categoria_id = %s"
@ -67,142 +93,180 @@ def home():
sql += " ORDER BY n.fecha DESC LIMIT 50"
cursor.execute(sql, params)
noticias = cursor.fetchall()
except mysql.connector.Error as db_err:
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al leer noticias: {db_err}", exc_info=True)
finally:
if conn:
conn.close()
return render_template(
'noticias.html',
"noticias.html",
noticias=noticias,
categorias=categorias,
continentes=continentes,
paises=paises,
cat_id=int(cat_id) if cat_id else None,
cont_id=int(cont_id) if cont_id else None,
pais_id=int(pais_id) if pais_id else None
pais_id=int(pais_id) if pais_id else None,
)
# ======================================
# Gestión de feeds en /feeds
# ======================================
@app.route('/feeds')
@app.route("/feeds")
def feeds():
conn = None
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
# Feeds con descripción y fallos
cursor.execute("""
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, f.pais_id, f.activo, f.fallos, c.nombre, p.nombre
cursor.execute(
"""
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, f.pais_id,
f.activo, f.fallos, c.nombre, p.nombre
FROM feeds f
LEFT JOIN categorias_estandar c ON f.categoria_id = c.id
LEFT JOIN categorias c ON f.categoria_id = c.id
LEFT JOIN paises p ON f.pais_id = p.id
""")
feeds = cursor.fetchall()
cursor.execute("SELECT id, nombre FROM categorias_estandar ORDER BY nombre")
"""
)
feeds_ = cursor.fetchall()
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cursor.fetchall()
cursor.execute("SELECT id, nombre FROM continentes ORDER BY nombre")
continentes = cursor.fetchall()
cursor.execute("SELECT id, nombre, continente_id FROM paises ORDER BY nombre")
paises = cursor.fetchall()
except mysql.connector.Error as db_err:
app.logger.error(f"[DB ERROR] Al leer feeds/categorías/países: {db_err}", exc_info=True)
feeds, categorias, continentes, paises = [], [], [], []
except psycopg2.Error as db_err:
app.logger.error(
f"[DB ERROR] Al leer feeds/categorías/países: {db_err}", exc_info=True
)
feeds_, categorias, continentes, paises = [], [], [], []
finally:
if conn:
conn.close()
return render_template("index.html", feeds=feeds, categorias=categorias, continentes=continentes, paises=paises)
return render_template(
"index.html",
feeds=feeds_,
categorias=categorias,
continentes=continentes,
paises=paises,
)
# Añadir feed
@app.route('/add', methods=['POST'])
@app.route("/add", methods=["POST"])
def add_feed():
nombre = request.form.get('nombre')
descripcion = request.form.get('descripcion')
url = request.form.get('url')
categoria_id = request.form.get('categoria_id')
pais_id = request.form.get('pais_id')
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion")
url = request.form.get("url")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = request.form.get("idioma") or None
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id) VALUES (%s, %s, %s, %s, %s)",
(nombre, descripcion, url, categoria_id, pais_id)
"""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES (%s, %s, %s, %s, %s, %s)
""",
(nombre, descripcion, url, categoria_id, pais_id, idioma),
)
conn.commit()
except mysql.connector.Error as db_err:
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al agregar feed: {db_err}", exc_info=True)
finally:
if conn:
conn.close()
return redirect(url_for('feeds'))
return redirect(url_for("feeds"))
# Editar feed
@app.route('/edit/<int:feed_id>', methods=['GET', 'POST'])
@app.route("/edit/<int:feed_id>", methods=["GET", "POST"])
def edit_feed(feed_id):
conn = None
try:
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor(dictionary=True)
if request.method == 'POST':
nombre = request.form.get('nombre')
descripcion = request.form.get('descripcion')
url_feed = request.form.get('url')
categoria_id = request.form.get('categoria_id')
pais_id = request.form.get('pais_id')
activo = 1 if request.form.get('activo') == 'on' else 0
conn = get_conn()
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
if request.method == "POST":
nombre = request.form.get("nombre")
descripcion = request.form.get("descripcion")
url_feed = request.form.get("url")
categoria_id = request.form.get("categoria_id")
pais_id = request.form.get("pais_id")
idioma = request.form.get("idioma") or None
activo = request.form.get("activo") == "on"
cursor.execute(
"UPDATE feeds SET nombre=%s, descripcion=%s, url=%s, categoria_id=%s, pais_id=%s, activo=%s WHERE id=%s",
(nombre, descripcion, url_feed, categoria_id, pais_id, activo, feed_id)
"""
UPDATE feeds
SET nombre=%s, descripcion=%s, url=%s, categoria_id=%s,
pais_id=%s, idioma=%s, activo=%s
WHERE id=%s
""",
(
nombre,
descripcion,
url_feed,
categoria_id,
pais_id,
idioma,
activo,
feed_id,
),
)
conn.commit()
return redirect(url_for('feeds'))
return redirect(url_for("feeds"))
cursor.execute("SELECT * FROM feeds WHERE id = %s", (feed_id,))
feed = cursor.fetchone()
cursor.execute("SELECT id, nombre FROM categorias_estandar ORDER BY nombre")
cursor.execute("SELECT id, nombre FROM categorias ORDER BY nombre")
categorias = cursor.fetchall()
cursor.execute("SELECT id, nombre FROM paises ORDER BY nombre")
paises = cursor.fetchall()
except mysql.connector.Error as db_err:
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al editar feed: {db_err}", exc_info=True)
feed, categorias, paises = {}, [], []
finally:
if conn:
conn.close()
return render_template('edit_feed.html', feed=feed, categorias=categorias, paises=paises)
return render_template("edit_feed.html", feed=feed, categorias=categorias, paises=paises)
# Eliminar feed
@app.route('/delete/<int:feed_id>')
@app.route("/delete/<int:feed_id>")
def delete_feed(feed_id):
conn = None
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
cursor.execute("DELETE FROM feeds WHERE id=%s", (feed_id,))
conn.commit()
except mysql.connector.Error as db_err:
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al eliminar feed: {db_err}", exc_info=True)
finally:
if conn:
conn.close()
return redirect(url_for('feeds'))
return redirect(url_for("feeds"))
# Backup de feeds a CSV
@app.route('/backup_feeds')
@app.route("/backup_feeds")
def backup_feeds():
conn = None
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
cursor.execute("""
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, c.nombre AS categoria, f.pais_id, p.nombre AS pais, f.activo, f.fallos
cursor.execute(
"""
SELECT f.id, f.nombre, f.descripcion, f.url, f.categoria_id, c.nombre AS categoria,
f.pais_id, p.nombre AS pais, f.idioma, f.activo, f.fallos
FROM feeds f
LEFT JOIN categorias_estandar c ON f.categoria_id = c.id
LEFT JOIN categorias c ON f.categoria_id = c.id
LEFT JOIN paises p ON f.pais_id = p.id
""")
feeds = cursor.fetchall()
"""
)
feeds_ = cursor.fetchall()
header = [desc[0] for desc in cursor.description]
except mysql.connector.Error as db_err:
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] Al hacer backup de feeds: {db_err}", exc_info=True)
return "Error generando backup.", 500
finally:
@ -212,53 +276,61 @@ def backup_feeds():
si = StringIO()
cw = csv.writer(si)
cw.writerow(header)
cw.writerows(feeds)
cw.writerows(feeds_)
output = si.getvalue()
si.close()
return Response(
output,
mimetype="text/csv",
headers={"Content-Disposition": "attachment;filename=feeds_backup.csv"}
headers={"Content-Disposition": "attachment;filename=feeds_backup.csv"},
)
# Restaurar feeds desde CSV
@app.route('/restore_feeds', methods=['GET', 'POST'])
@app.route("/restore_feeds", methods=["GET", "POST"])
def restore_feeds():
msg = ""
if request.method == 'POST':
file = request.files.get('file')
if not file or not file.filename.endswith('.csv'):
if request.method == "POST":
file = request.files.get("file")
if not file or not file.filename.endswith(".csv"):
msg = "Archivo no válido."
else:
file_stream = StringIO(file.read().decode('utf-8'))
file_stream = StringIO(file.read().decode("utf-8"))
reader = csv.DictReader(file_stream)
rows = list(reader)
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
n_ok = 0
for row in rows:
try:
descripcion = row.get('descripcion') or ""
cursor.execute("""
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, activo, fallos)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
nombre=VALUES(nombre),
descripcion=VALUES(descripcion),
url=VALUES(url),
categoria_id=VALUES(categoria_id),
pais_id=VALUES(pais_id),
activo=VALUES(activo),
fallos=VALUES(fallos)
""", (
row['nombre'],
descripcion,
row['url'],
row['categoria_id'],
row['pais_id'],
int(row['activo']),
int(row.get('fallos', 0))
))
cursor.execute(
"""
INSERT INTO feeds (
id, nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos
) VALUES (%(id)s, %(nombre)s, %(descripcion)s, %(url)s, %(categoria_id)s,
%(pais_id)s, %(idioma)s, %(activo)s, %(fallos)s)
ON CONFLICT (id) DO UPDATE SET
nombre = EXCLUDED.nombre,
descripcion = EXCLUDED.descripcion,
url = EXCLUDED.url,
categoria_id = EXCLUDED.categoria_id,
pais_id = EXCLUDED.pais_id,
idioma = EXCLUDED.idioma,
activo = EXCLUDED.activo,
fallos = EXCLUDED.fallos;
""",
{
"id": row.get("id"),
"nombre": row["nombre"],
"descripcion": row.get("descripcion") or "",
"url": row["url"],
"categoria_id": row["categoria_id"],
"pais_id": row["pais_id"],
"idioma": row.get("idioma"),
"activo": bool(int(row["activo"])),
"fallos": int(row.get("fallos", 0)),
},
)
n_ok += 1
except Exception as e:
app.logger.error(f"Error insertando feed {row}: {e}")
@ -267,36 +339,41 @@ def restore_feeds():
msg = f"Feeds restaurados correctamente: {n_ok}"
return render_template("restore_feeds.html", msg=msg)
@app.route('/noticias')
@app.route("/noticias")
def show_noticias():
return home()
# ================================
# Lógica de procesado de feeds con control de fallos
# ================================
def sumar_fallo_feed(cursor, feed_id):
cursor.execute("UPDATE feeds SET fallos = fallos + 1 WHERE id = %s", (feed_id,))
cursor.execute("SELECT fallos FROM feeds WHERE id = %s", (feed_id,))
fallos = cursor.fetchone()[0]
if fallos >= MAX_FALLOS:
cursor.execute("UPDATE feeds SET activo = 0 WHERE id = %s", (feed_id,))
cursor.execute("UPDATE feeds SET activo = FALSE WHERE id = %s", (feed_id,))
return fallos
def resetear_fallos_feed(cursor, feed_id):
cursor.execute("UPDATE feeds SET fallos = 0 WHERE id = %s", (feed_id,))
def fetch_and_store():
conn = None
try:
conn = mysql.connector.connect(**DB_CONFIG)
conn = get_conn()
cursor = conn.cursor()
cursor.execute("SELECT id, url, categoria_id, pais_id FROM feeds WHERE activo = TRUE")
feeds = cursor.fetchall()
except mysql.connector.Error as db_err:
feeds_ = cursor.fetchall()
except psycopg2.Error as db_err:
app.logger.error(f"[DB ERROR] No se pudo conectar o leer feeds: {db_err}", exc_info=True)
return
for feed_id, rss_url, categoria_id, pais_id in feeds:
for feed_id, rss_url, categoria_id, pais_id in feeds_:
try:
app.logger.info(f"Procesando feed: {rss_url} [{categoria_id}] [{pais_id}]")
parsed = feedparser.parse(rss_url)
@ -305,8 +382,8 @@ def fetch_and_store():
sumar_fallo_feed(cursor, feed_id)
continue
if getattr(parsed, 'bozo', False):
bozo_exc = getattr(parsed, 'bozo_exception', 'Unknown')
if getattr(parsed, "bozo", False):
bozo_exc = getattr(parsed, "bozo_exception", "Unknown")
app.logger.warning(f"[BOZO] Feed mal formado: {rss_url} - {bozo_exc}")
sumar_fallo_feed(cursor, feed_id)
continue
@ -314,31 +391,33 @@ def fetch_and_store():
resetear_fallos_feed(cursor, feed_id)
for entry in parsed.entries:
link = entry.get('link') or entry.get('id')
link = entry.get("link") or entry.get("id")
if not link:
links_list = entry.get('links', [])
links_list = entry.get("links", [])
if isinstance(links_list, list) and links_list:
href = next((l.get('href') for l in links_list if l.get('href')), None)
href = next((l.get("href") for l in links_list if l.get("href")), None)
link = href
if not link:
app.logger.error(f"[ENTRY ERROR] Entrada sin link en feed {rss_url}, salto entrada.")
app.logger.error(
f"[ENTRY ERROR] Entrada sin link en feed {rss_url}, salto entrada."
)
continue
try:
noticia_id = hashlib.md5(link.encode()).hexdigest()
titulo = entry.get('title', '')
resumen = entry.get('summary', '')
imagen_url = ''
titulo = entry.get("title", "")
resumen = entry.get("summary", "")
imagen_url = ""
fecha = None
if 'media_content' in entry:
imagen_url = entry.media_content[0].get('url', '')
if "media_content" in entry:
imagen_url = entry.media_content[0].get("url", "")
else:
img = re.search(r'<img.+?src="(.+?)"', resumen)
img = re.search(r"<img.+?src=\"(.+?)\"", resumen)
if img:
imagen_url = img.group(1)
published = entry.get('published_parsed') or entry.get('updated_parsed')
published = entry.get("published_parsed") or entry.get("updated_parsed")
if published:
try:
fecha = datetime(*published[:6])
@ -347,11 +426,21 @@ def fetch_and_store():
cursor.execute(
"""
INSERT IGNORE INTO noticias
(id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
INSERT INTO noticias (
id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (id) DO NOTHING
""",
(noticia_id, titulo, resumen, link, fecha, imagen_url, categoria_id, pais_id)
(
noticia_id,
titulo,
resumen,
link,
fecha,
imagen_url,
categoria_id,
pais_id,
),
)
except Exception as entry_err:
app.logger.error(
@ -362,15 +451,21 @@ def fetch_and_store():
try:
conn.commit()
except Exception as commit_err:
app.logger.error(f"[DB ERROR] Al confirmar transacción: {commit_err}", exc_info=True)
app.logger.error(
f"[DB ERROR] Al confirmar transacción: {commit_err}", exc_info=True
)
finally:
if conn:
conn.close()
app.logger.info(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Feeds procesados.")
if __name__ == '__main__':
# ---------------------------------------------------------------------------
# Lanzador de la aplicación + scheduler
# ---------------------------------------------------------------------------
if __name__ == "__main__":
scheduler = BackgroundScheduler()
scheduler.add_job(fetch_and_store, 'interval', minutes=2, id='rss_job')
scheduler.add_job(fetch_and_store, "interval", minutes=2, id="rss_job")
scheduler.start()
try:
app.run(host="0.0.0.0", port=5000, debug=True)

View file

@ -1,5 +1,4 @@
flask==2.3.3
Flask==2.3.3
feedparser==6.0.11
mysql-connector-python==8.3.0
APScheduler==3.10.4
psycopg2-binary==2.9.10 # conector PostgreSQL

View file

@ -39,6 +39,11 @@
<option value="{{ pid }}">{{ pnom }}</option>
{% endfor %}
</select>
<!-- Nuevo campo: idioma -->
<label for="idioma">Idioma</label>
<input id="idioma" name="idioma" maxlength="2" placeholder="Ej: es, en, fr">
<button class="btn" type="submit">Añadir</button>
<!-- Datos en JSON para el filtro dinámico de países -->
<script type="application/json" id="paises-data">{{ paises|tojson }}</script>

View file

@ -15,10 +15,12 @@
{% endif %}
<p style="font-size:0.92em;color:#64748b;">
El archivo debe contener las columnas:<br>
<code>id, nombre, [descripcion,] url, categoria_id, categoria, pais_id, pais, activo</code><br>
<small>La columna <b>descripcion</b> es opcional.</small>
<code>id, nombre, [descripcion,] url, categoria_id, categoria, pais_id, pais, idioma, activo, fallos</code><br>
<small>
Las columnas <b>descripcion</b> e <b>idioma</b> son opcionales.<br>
<b>idioma</b> debe ser el código ISO 639-1 de dos letras (ej: es, en, fr...).
</small>
</p>
</div>
<a href="/feeds" class="top-link">← Volver a feeds</a>
{% endblock %}