rss2/routers/pdf.py
2026-01-13 13:39:51 +01:00

88 lines
3.2 KiB
Python
Raw Permalink Blame History

"""
PDF Export router.
"""
from flask import Blueprint, make_response, render_template, url_for
from db import get_conn
from psycopg2 import extras
from weasyprint import HTML
import logging
import re
from io import BytesIO
logger = logging.getLogger(__name__)
pdf_bp = Blueprint("pdf", __name__, url_prefix="/pdf")
def clean_text(text):
"""Clean text from problematic characters for PDF generation."""
if not text:
return ""
# Remove <unk> tokens
text = text.replace('<unk>', '')
text = text.replace('<EFBFBD>', '')
# Remove other problematic Unicode characters
text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]', '', text)
return text.strip()
@pdf_bp.route("/noticia/<noticia_id>")
def export_noticia(noticia_id):
"""Exportar noticia a PDF."""
try:
with get_conn() as conn:
with conn.cursor(cursor_factory=extras.DictCursor) as cur:
cur.execute("""
SELECT
n.*,
t.titulo_trad, t.resumen_trad, t.lang_to,
c.nombre as categoria_nombre,
p.nombre as pais_nombre
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.status = 'done' AND t.lang_to = 'es'
LEFT JOIN categorias c ON c.id = n.categoria_id
LEFT JOIN paises p ON p.id = n.pais_id
WHERE n.id = %s
""", (noticia_id,))
noticia = cur.fetchone()
if not noticia:
return "Noticia no encontrada", 404
# Prepare data for template
d = dict(noticia)
# Use translated content if available and clean it
titulo = clean_text(d.get('titulo_trad') or d.get('titulo', ''))
resumen = clean_text(d.get('resumen_trad') or d.get('resumen', ''))
# Don't include external images to avoid SSL/network errors
# imagen_url = d.get('imagen_url') if d.get('imagen_url', '').startswith('http') else None
html_content = render_template(
"pdf_template.html",
titulo=titulo,
resumen=resumen,
fecha=d.get('fecha', ''),
fuente=d.get('fuente_nombre', ''), # Esta columna existe directamente en noticias
categoria=d.get('categoria_nombre', ''),
url=d.get('url', ''),
imagen_url=None # Disable images for now to avoid errors
)
# Convert to PDF using WeasyPrint
logger.info(f"Generating PDF for noticia {noticia_id}")
# Create PDF in memory
pdf_file = BytesIO()
HTML(string=html_content).write_pdf(pdf_file)
pdf_bytes = pdf_file.getvalue()
response = make_response(pdf_bytes)
response.headers['Content-Type'] = 'application/pdf'
response.headers['Content-Disposition'] = f'attachment; filename=noticia_{noticia_id}.pdf'
logger.info(f"PDF generated successfully for noticia {noticia_id}")
return response
except Exception as e:
logger.error(f"Error generando PDF para noticia {noticia_id}: {str(e)}", exc_info=True)
return f"Error generando PDF: {str(e)}", 500