Actualización del 2025-06-15 a las 22:45:55

2025-06-15 22:45:55 +02:00 · 2025-06-15 22:45:55 +02:00 · eb72ec9e56
commit eb72ec9e56
parent 603149d47a
5 changed files with 114 additions and 94 deletions
--- a/app.py
+++ b/app.py
@ -93,7 +93,8 @@ def home():
                paises = cursor.fetchall()
                sql_params, conditions = [], []
-                sql_base = "SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id"
+                # --- CORRECCIÓN: SE AÑADE 'fuente_nombre' AL SELECT ---
                sql_base = "SELECT n.fecha, n.titulo, n.resumen, n.url, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id"
                if q:
                    search_query = " & ".join(q.split())
@ -361,40 +362,44 @@ def scrape_url():
            flash("Debes seleccionar una fuente para procesar.", "error")
            return redirect(url_for('scrape_url'))
        source = None
        try:
            with get_conn() as conn:
                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
                    cursor.execute("SELECT * FROM fuentes_url WHERE id = %s", (source_id,))
                    source = cursor.fetchone()
                if not source:
                    flash("La fuente seleccionada no existe.", "error")
                    return redirect(url_for('scrape_url'))
                lista_noticias, message = process_newspaper_url(source['url'], source['categoria_id'], source['pais_id'], source['idioma'])
                if lista_noticias:
                    # Se necesita una nueva conexión/cursor para la inserción
                    with get_conn() as insert_conn:
                        with insert_conn.cursor() as insert_cursor:
                            insert_query = """
                                INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id)
                                VALUES %s
                                ON CONFLICT (url) DO UPDATE SET
                                    titulo = EXCLUDED.titulo,
                                    resumen = EXCLUDED.resumen,
                                    fecha = EXCLUDED.fecha,
                                    imagen_url = EXCLUDED.imagen_url;
                            """
                            psycopg2.extras.execute_values(insert_cursor, insert_query, lista_noticias)
                    flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde '{source['nombre']}'.", "success")
                    return redirect(url_for("home"))
                else:
                    flash(message, "warning")
        except psycopg2.Error as db_err:
-            app.logger.error(f"[DB ERROR] Al procesar fuente URL: {db_err}", exc_info=True)
+            app.logger.error(f"[DB ERROR] Al buscar fuente URL: {db_err}", exc_info=True)
-            flash(f"Error de base de datos al procesar la fuente: {db_err}", "error")
+            flash("Error de base de datos al buscar la fuente.", "error")
            return redirect(url_for('scrape_url'))
        if not source:
            flash("La fuente seleccionada no existe.", "error")
            return redirect(url_for('scrape_url'))
        lista_noticias, message = process_newspaper_url(source['nombre'], source['url'], source['categoria_id'], source['pais_id'], source['idioma'])
        if lista_noticias:
            try:
                with get_conn() as conn:
                    with conn.cursor() as cursor:
                        insert_query = """
                            INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id)
                            VALUES %s
                            ON CONFLICT (url) DO UPDATE SET
                                titulo = EXCLUDED.titulo,
                                resumen = EXCLUDED.resumen,
                                fecha = EXCLUDED.fecha,
                                imagen_url = EXCLUDED.imagen_url;
                        """
                        psycopg2.extras.execute_values(cursor, insert_query, lista_noticias)
                flash(f"Se encontraron y guardaron {len(lista_noticias)} noticias desde '{source['nombre']}'.", "success")
                return redirect(url_for("home"))
            except psycopg2.Error as db_err:
                app.logger.error(f"[DB ERROR] Al insertar noticias scrapeadas: {db_err}", exc_info=True)
                flash(f"Error de base de datos al guardar las noticias: {db_err}", "error")
        else:
            flash(message, "warning")
        return redirect(url_for('scrape_url'))
@ -437,7 +442,7 @@ def backup_noticias():
    try:
        with get_conn() as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
-                cursor.execute("SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id ORDER BY n.fecha DESC")
+                cursor.execute("SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id ORDER BY n.fecha DESC")
                noticias = cursor.fetchall()
        if not noticias:
            flash("No hay noticias para exportar.", "warning")
@ -471,7 +476,7 @@ def backup_completo():
                        writer.writerows([dict(f) for f in feeds_data])
                        zipf.writestr("feeds.csv", output.getvalue())
-                    cursor.execute("SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id ORDER BY n.fecha DESC")
+                    cursor.execute("SELECT n.id, n.titulo, n.resumen, n.url, n.fecha, n.imagen_url, n.fuente_nombre, c.nombre AS categoria, p.nombre AS pais, co.nombre AS continente FROM noticias n LEFT JOIN categorias c ON n.categoria_id = c.id LEFT JOIN paises p ON n.pais_id = p.id LEFT JOIN continentes co ON p.continente_id = co.id ORDER BY n.fecha DESC")
                    noticias_data = cursor.fetchall()
                    if noticias_data:
                        fieldnames_noticias = list(noticias_data[0].keys())
@ -543,7 +548,8 @@ def fetch_and_store():
            with get_conn() as conn:
                with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
                    logging.info("Paso 1: Obteniendo lista de feeds...")
-                    cursor.execute("SELECT id, url, categoria_id, pais_id, last_etag, last_modified FROM feeds WHERE activo = TRUE")
+                    # --- CORRECCIÓN: Se añade 'nombre' al SELECT ---
                    cursor.execute("SELECT id, nombre, url, categoria_id, pais_id, last_etag, last_modified FROM feeds WHERE activo = TRUE")
                    feeds_to_process = cursor.fetchall()
                    logging.info(f"Paso 2: {len(feeds_to_process)} feeds para procesar.")
        except psycopg2.Error as db_err:
@ -608,9 +614,10 @@ def fetch_and_store():
                if todas_las_noticias:
                    logging.info(f"Intentando insertar {len(todas_las_noticias)} noticias en la base de datos.")
                    with conn.cursor() as cursor_news_insert:
                        # --- CORRECCIÓN: Se añade 'fuente_nombre' a la consulta INSERT ---
                        psycopg2.extras.execute_values(
                            cursor_news_insert,
-                            "INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, categoria_id, pais_id) VALUES %s ON CONFLICT (id) DO NOTHING",
+                            "INSERT INTO noticias (id, titulo, resumen, url, fecha, imagen_url, fuente_nombre, categoria_id, pais_id) VALUES %s ON CONFLICT (url) DO NOTHING",
                            todas_las_noticias
                        )
                        rows_inserted = cursor_news_insert.rowcount
--- a/feed_processor.py
+++ b/feed_processor.py
@ -1,15 +1,12 @@
 # /home/x/rss/feed_processor.py
 import hashlib
 from datetime import datetime
 import logging
 import feedparser
 from bs4 import BeautifulSoup
 import requests
-import xml.sax._exceptions # Make sure this import is present
+import xml.sax._exceptions
-# You might want to define these constants in a central config or pass them
+NETWORK_TIMEOUT = 15 # segundos
 NETWORK_TIMEOUT = 15 # seconds for fetching the feed
 def process_single_feed(feed_data):
    """
@ -17,6 +14,9 @@ def process_single_feed(feed_data):
    """
    feed_id = feed_data['id']
    feed_url = feed_data['url']
    # --- LÍNEA CLAVE ---
    # Obtenemos el nombre del feed para usarlo como fuente de la noticia.
    feed_nombre = feed_data.get('nombre', 'Fuente Desconocida')
    etag = feed_data.get('last_etag')
    modified = feed_data.get('last_modified')
@ -25,43 +25,28 @@ def process_single_feed(feed_data):
    success = False
    try:
-        headers = {}
+        headers = {'User-Agent': 'RssApp/1.0'}
        if etag:
            headers['If-None-Match'] = etag
        if modified:
            headers['If-Modified-Since'] = modified
        response = requests.get(feed_url, headers=headers, timeout=NETWORK_TIMEOUT)
-        response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
+        response.raise_for_status()
        if response.status_code == 304:
            logging.info(f"Feed {feed_url} (ID: {feed_id}) no modificado (304).")
            # Return existing etag/modified if not modified, as per standard HTTP caching
            return feed_id, [], etag, modified, True
        parsed = feedparser.parse(response.content)
-        # Check if parsed.bozo is set, meaning there was an issue during parsing
+        if parsed.bozo and isinstance(parsed.bozo_exception, xml.sax._exceptions.SAXParseException):
-        if parsed.bozo:
+            logging.error(f"Feed malformado para {feed_url} (ID: {feed_id}): {parsed.bozo_exception}")
-            # feedparser.bozo_exception will contain the actual exception
+            return feed_id, [], None, None, False
            # We catch specific bozo exceptions for better error logging
            if isinstance(parsed.bozo_exception, (feedparser.CharacterEncodingOverride, feedparser.NonXMLContentType)):
                # These are usually harmless warnings; we can proceed
                logging.warning(f"Advertencia al parsear feed {feed_url} (ID: {feed_id}): {parsed.bozo_exception}")
            elif isinstance(parsed.bozo_exception, xml.sax._exceptions.SAXParseException):
                # This is a critical parsing error (e.g., invalid XML)
                logging.error(f"Feed malformado para {feed_url} (ID: {feed_id}): {parsed.bozo_exception}")
                return feed_id, [], None, None, False # Indicate failure due to parsing error
            else:
                # Catch any other unexpected bozo exceptions
                logging.error(f"Excepción inesperada de bozo en feed {feed_url} (ID: {feed_id}): {parsed.bozo_exception}")
                return feed_id, [], None, None, False # Indicate failure
        # Proceed only if parsing was successful or had minor warnings
        if not parsed.entries:
-             logging.warning(f"Feed {feed_url} (ID: {feed_id}) no contiene entradas.")
+            logging.warning(f"Feed {feed_url} (ID: {feed_id}) no contiene entradas.")
-             # If no entries but parsing was successful, update etag/modified
+            return feed_id, [], parsed.get('etag'), parsed.get('modified'), True
             return feed_id, [], parsed.get('etag'), parsed.get('modified'), True
        for entry in parsed.entries:
            link = entry.get("link")
@ -73,9 +58,7 @@ def process_single_feed(feed_data):
            resumen_html = entry.get("summary", "")
            imagen_url = ""
            # Attempt to get image from media:content or from HTML summary
            if "media_content" in entry and entry.media_content:
                # Assuming the first media_content is the relevant one with a 'url'
                imagen_url = entry.media_content[0].get("url", "")
            elif resumen_html:
                soup = BeautifulSoup(resumen_html, 'html.parser')
@ -84,17 +67,19 @@ def process_single_feed(feed_data):
                    imagen_url = img_tag['src']
            resumen_texto_plano = BeautifulSoup(resumen_html, 'html.parser').get_text(separator=' ', strip=True)
-            fecha_publicacion = datetime.now() # Default to now if no publication date
+            fecha_publicacion = datetime.now()
            if hasattr(entry, 'published_parsed') and entry.published_parsed:
                fecha_publicacion = datetime(*entry.published_parsed[:6])
            elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
                fecha_publicacion = datetime(*entry.updated_parsed[:6])
            # --- LÍNEA CLAVE ---
            # Añadimos 'feed_nombre' a la tupla de datos que se guardará en la BD.
            noticias_encontradas.append(
-                (noticia_id, titulo, resumen_texto_plano, link, fecha_publicacion, imagen_url, feed_data['categoria_id'], feed_data['pais_id'])
+                (noticia_id, titulo, resumen_texto_plano, link, fecha_publicacion, 
                 imagen_url, feed_nombre, feed_data['categoria_id'], feed_data['pais_id'])
            )
        # Get ETag and Last-Modified headers from the response
        new_etag = response.headers.get('ETag')
        new_modified = response.headers.get('Last-Modified')
        success = True
@ -103,7 +88,8 @@ def process_single_feed(feed_data):
        logging.error(f"TIMEOUT al intentar obtener el feed {feed_url} (ID: {feed_id})")
    except requests.exceptions.RequestException as e:
        logging.error(f"Error de HTTP/red para el feed {feed_url} (ID: {feed_id}): {e}")
-    except Exception as e: # General Exception for any other unexpected errors during the process
+    except Exception as e:
        logging.error(f"Excepción inesperada al procesar el feed {feed_url} (ID: {feed_id}): {e}", exc_info=True)
    return feed_id, noticias_encontradas, new_etag, new_modified, success
--- a/install.sh
+++ b/install.sh
@ -47,7 +47,7 @@ rm -f /etc/systemd/system/$APP_NAME*
 systemctl daemon-reload
 echo "   -> Servicios systemd limpiados."
-echo "🟢 Paso 1: Instalando dependencias del sistema (PostgreSQL, Python, Gunicorn...)"
+echo "🟢 Paso 1: Instalando dependencias del sistema..."
 apt-get update
 apt-get install -y wget ca-certificates postgresql postgresql-contrib python3-venv python3-pip python3-dev libpq-dev gunicorn
@ -91,7 +91,7 @@ else
    echo "⚠️ ADVERTENCIA: No se encontró download_models.py. El scraping de URLs puede fallar."
 fi
-echo "📐 Paso 4: Creando esquema de BD, configurando FTS y sembrando datos desde archivos .sql..."
+echo "📐 Paso 4: Creando esquema de BD y sembrando datos..."
 export PGPASSWORD="$DB_PASS"
 psql -U "$DB_USER" -h localhost -d "$DB_NAME" <<SQL
@ -100,7 +100,7 @@ CREATE TABLE IF NOT EXISTS categorias (id SERIAL PRIMARY KEY, nombre VARCHAR(100
 CREATE TABLE IF NOT EXISTS paises (id SERIAL PRIMARY KEY, nombre VARCHAR(100) NOT NULL UNIQUE, continente_id INTEGER REFERENCES continentes(id) ON DELETE SET NULL);
 CREATE TABLE IF NOT EXISTS feeds (id SERIAL PRIMARY KEY, nombre VARCHAR(255), descripcion TEXT, url TEXT NOT NULL UNIQUE, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, idioma CHAR(2), activo BOOLEAN DEFAULT TRUE, fallos INTEGER DEFAULT 0, last_etag TEXT, last_modified TEXT);
 CREATE TABLE IF NOT EXISTS fuentes_url (id SERIAL PRIMARY KEY, nombre VARCHAR(255) NOT NULL, url TEXT NOT NULL UNIQUE, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, idioma CHAR(2) DEFAULT 'es');
-CREATE TABLE IF NOT EXISTS noticias (id VARCHAR(32) PRIMARY KEY, titulo TEXT, resumen TEXT, url TEXT NOT NULL UNIQUE, fecha TIMESTAMP, imagen_url TEXT, categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, tsv tsvector);
+CREATE TABLE IF NOT EXISTS noticias (id VARCHAR(32) PRIMARY KEY, titulo TEXT, resumen TEXT, url TEXT NOT NULL UNIQUE, fecha TIMESTAMP, imagen_url TEXT, fuente_nombre VARCHAR(255), categoria_id INTEGER REFERENCES categorias(id) ON DELETE SET NULL, pais_id INTEGER REFERENCES paises(id) ON DELETE SET NULL, tsv tsvector);
 ALTER TABLE noticias ADD COLUMN IF NOT EXISTS tsv tsvector;
 CREATE OR REPLACE FUNCTION noticias_tsv_trigger() RETURNS trigger AS \$\$ BEGIN new.tsv := setweight(to_tsvector('spanish', coalesce(new.titulo,'')), 'A') || setweight(to_tsvector('spanish', coalesce(new.resumen,'')), 'B'); return new; END \$\$ LANGUAGE plpgsql;
 DROP TRIGGER IF EXISTS tsvectorupdate ON noticias;
@ -167,7 +167,7 @@ Environment="DB_PORT=5432"
 Environment="DB_NAME=$DB_NAME"
 Environment="DB_USER=$DB_USER"
 Environment="DB_PASS=$DB_PASS"
-ExecStart=$PYTHON_ENV/bin/gunicorn --workers 3 --bind 0.0.0.0:$WEB_PORT $WSGI_APP_ENTRY
+ExecStart=$PYTHON_ENV/bin/gunicorn --workers 3 --bind 0.0.0.0:$WEB_PORT --timeout 120 $WSGI_APP_ENTRY
 Restart=always
 [Install]
 WantedBy=multi-user.target
--- a/templates/_noticias_list.html
+++ b/templates/_noticias_list.html
@ -1,24 +1,37 @@
-<div class="noticias-list">
+<ul class="noticias-list">
    {% for noticia in noticias %}
-        <article class="noticia-item">
+    <li class="noticia-item">
-            {% if noticia.imagen_url %}
+        {% if noticia.imagen_url %}
-            <div class="noticia-imagen">
+        <div class="noticia-imagen">
-                <a href="{{ noticia.url }}" target="_blank" rel="noopener noreferrer"><img src="{{ noticia.imagen_url }}" alt="{{ noticia.titulo }}" loading="lazy"></a>
+            <a href="{{ noticia.url }}" target="_blank" rel="noopener noreferrer"><img src="{{ noticia.imagen_url }}" alt="Imagen para {{ noticia.titulo }}" loading="lazy"></a>
            </div>
            {% endif %}
            <div class="noticia-texto">
                <h3><a href="{{ noticia.url }}" target="_blank" rel="noopener noreferrer">{{ noticia.titulo }}</a></h3>
                <div class="noticia-meta">
                    <span><i class="far fa-calendar-alt"></i> {{ noticia.fecha.strftime('%d-%m-%Y %H:%M') if noticia.fecha else 'N/D' }}</span> |
                    <span><i class="fas fa-tag"></i> {{ noticia.categoria or 'N/A' }}</span> |
                    <span><i class="fas fa-globe-americas"></i> {{ noticia.pais or 'Global' }}</span>
                </div>
                <p>{{ noticia.resumen | striptags | safe_html | truncate(280) }}</p>
            </div>
        </article>
    {% else %}
        <div class="card" style="text-align:center;">
            <p><i class="fas fa-info-circle"></i> No hay noticias que mostrar con los filtros seleccionados.</p>
        </div>
        {% endif %}
        <div class="noticia-texto">
            <h3><a href="{{ noticia.url }}" target="_blank" rel="noopener noreferrer">{{ noticia.titulo }}</a></h3>
            <div class="noticia-meta">
                <span><i class="far fa-calendar-alt"></i> {{ noticia.fecha.strftime('%d-%m-%Y %H:%M') if noticia.fecha else 'N/D' }}</span>
                <!-- INICIO DE LA MODIFICACIÓN: Se añade la fuente de la noticia -->
                {% if noticia.fuente_nombre %}
                | <span><i class="fas fa-newspaper"></i> <strong>{{ noticia.fuente_nombre }}</strong></span>
                {% endif %}
                <!-- FIN DE LA MODIFICACIÓN -->
                {% if noticia.categoria %}
                | <span><i class="fas fa-tag"></i> {{ noticia.categoria }}</span>
                {% endif %}
                {% if noticia.pais %}
                | <span><i class="fas fa-globe-americas"></i> {{ noticia.pais }}</span>
                {% endif %}
            </div>
            <p>{{ noticia.resumen | safe_html | truncate(280) }}</p>
        </div>
    </li>
    {% else %}
    <li class="text-center p-4">
        <i class="fas fa-info-circle"></i> No hay noticias que mostrar con los filtros seleccionados.
    </li>
    {% endfor %}
-</div>
+</ul>
--- a/url_processor.py
+++ b/url_processor.py
@ -11,19 +11,25 @@ def _process_individual_article(article_url, config):
    Está diseñada para ser ejecutada en un hilo separado.
    """
    try:
        # Es crucial crear un nuevo objeto Article dentro de cada hilo.
        article = newspaper.Article(article_url, config=config)
        article.download()
        # Un artículo necesita ser parseado para tener título, texto, etc.
        article.parse()
        # Si no se pudo obtener título o texto, no es un artículo válido.
        if not article.title or not article.text:
            return None
        # El método nlp() es necesario para el resumen.
        article.nlp()
        return article
    except Exception:
        # Ignoramos errores en artículos individuales (p.ej., enlaces rotos, etc.)
        return None
-def process_newspaper_url(url, categoria_id, pais_id, idioma='es'):
+def process_newspaper_url(source_name, url, categoria_id, pais_id, idioma='es'):
    """
    Explora la URL de un periódico, extrae los artículos que encuentra
    en paralelo y devuelve una lista de noticias listas para la base de datos.
@ -35,32 +41,39 @@ def process_newspaper_url(url, categoria_id, pais_id, idioma='es'):
    try:
        config = Config()
        config.browser_user_agent = 'RssApp/1.0 (Scraper)'
-        config.request_timeout = 15
+        config.request_timeout = 15 # Timeout más corto para artículos individuales.
-        config.memoize_articles = False
+        config.memoize_articles = False # No guardar en caché para obtener siempre lo último.
        # Usamos el idioma proporcionado para mejorar la extracción
        source = newspaper.build(url, config=config, language=idioma)
        # Limitar el número de artículos para no sobrecargar el servidor.
        articles_to_process = source.articles[:25]
        logging.info(f"Fuente construida. Procesando {len(articles_to_process)} artículos en paralelo...")
        # Usamos un ThreadPoolExecutor para procesar los artículos concurrentemente.
        with ThreadPoolExecutor(max_workers=10) as executor:
            # Creamos un futuro para cada URL de artículo.
            future_to_article = {executor.submit(_process_individual_article, article.url, config): article for article in articles_to_process}
            for future in as_completed(future_to_article):
                processed_article = future.result()
                # Si el artículo se procesó correctamente, lo añadimos a la lista.
                if processed_article:
                    noticia_id = hashlib.md5(processed_article.url.encode()).hexdigest()
                    if processed_article.summary:
                        resumen = processed_article.summary
                    else:
                        # Fallback a un extracto del texto si no hay resumen.
                        resumen = (processed_article.text[:400] + '...') if len(processed_article.text) > 400 else processed_article.text
                    fecha = processed_article.publish_date if processed_article.publish_date else datetime.now()
                    # --- LÍNEA CLAVE ---
                    # Añadimos 'source_name' a la tupla de datos
                    todas_las_noticias.append((
                        noticia_id,
                        processed_article.title,
@ -68,6 +81,7 @@ def process_newspaper_url(url, categoria_id, pais_id, idioma='es'):
                        processed_article.url,
                        fecha,
                        processed_article.top_image or '',
                        source_name,
                        categoria_id,
                        pais_id
                    ))