Add image pipeline and demo files from pruebas branch

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
CAPITANSITO 2026-04-01 00:44:37 +02:00
parent 1f7bba58e6
commit 932e8e80db
14 changed files with 2663 additions and 0 deletions

View file

@ -0,0 +1,30 @@
"""Script de debug para ver qué devuelve la API de Wikipedia/Wikimedia."""
import requests
from wikipedia_image_scraper import (
search_articles, get_article_images, get_image_info, should_skip, SKIP_PATTERNS
)
# 1. Buscar artículos
print("=== ARTÍCULOS ===")
articles = search_articles("cambio climático", lang="es", limit=2)
for a in articles:
print(f" {a['title']}")
# 2. Imágenes del primer artículo
print("\n=== IMÁGENES DEL ARTÍCULO ===")
img_titles = get_article_images(articles[0]["title"], lang="es", limit=10)
for t in img_titles:
print(f" {t}")
# 3. Info de las primeras 5 imágenes
print("\n=== INFO DE CADA IMAGEN ===")
for title in img_titles[:5]:
print(f"\n Título: {title}")
info = get_image_info(title)
if info is None:
print(" → get_image_info devolvió None")
continue
print(f" url: {info.get('url', 'N/A')[:80]}")
print(f" size: {info.get('width')}x{info.get('height')} {info.get('size_bytes')}B")
skip, motivo = should_skip(title, info)
print(f" skip: {skip} ({motivo})")