optimizaciones

2025-11-24 02:37:05 +01:00 · 2025-11-24 02:37:05 +01:00 · 86ee083b90
commit 86ee083b90
parent 937da3f90b
5 changed files with 26 additions and 100 deletions
--- a/feed_processor.py
+++ b/feed_processor.py
@ -1,5 +1,5 @@
 import hashlib
-import re # <-- CORRECCIÓN: Se importa el módulo de expresiones regulares
+import re
 from datetime import datetime
 import logging
 import feedparser
@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 import requests
 import xml.sax._exceptions

-NETWORK_TIMEOUT = 15 # segundos
+NETWORK_TIMEOUT = 15

 def process_single_feed(feed_data):
    """
@ -55,14 +55,12 @@ def process_single_feed(feed_data):
            noticia_id = hashlib.md5(link.encode()).hexdigest()
            titulo = entry.get("title", "Sin título")
            
-            # --- MEJORA: Se prioriza el contenido completo sobre el resumen para obtener más texto ---
            resumen_html = ""
            if hasattr(entry, 'content') and entry.content:
                resumen_html = entry.content[0].value
            elif hasattr(entry, 'summary'):
                resumen_html = entry.summary

-            # --- CORRECCIÓN: Limpia los códigos de media personalizados y otros artefactos ---
            if resumen_html:
                resumen_html = re.sub(r'\[\[\{.*?\}\]\]', '', resumen_html)

@ -84,8 +82,17 @@ def process_single_feed(feed_data):
                fecha_publicacion = datetime(*entry.updated_parsed[:6])

            noticias_encontradas.append(
-                (noticia_id, titulo, resumen_texto_plano, link, fecha_publicacion, 
-                 imagen_url, feed_nombre, feed_data['categoria_id'], feed_data['pais_id'])
+                (
+                    noticia_id,
+                    titulo,
+                    resumen_texto_plano,
+                    link,
+                    fecha_publicacion,
+                    imagen_url,
+                    feed_nombre,
+                    feed_data['categoria_id'],
+                    feed_data['pais_id']
+                )
            )

        new_etag = response.headers.get('ETag')
@ -100,3 +107,4 @@ def process_single_feed(feed_data):
        logging.error(f"Excepción inesperada al procesar el feed {feed_url} (ID: {feed_id}): {e}", exc_info=True)

    return feed_id, noticias_encontradas, new_etag, new_modified, success
+