Initial clean commit

This commit is contained in:
jlimolina 2026-01-13 13:39:51 +01:00
commit 6784d81c2c
141 changed files with 25219 additions and 0 deletions

View file

@ -0,0 +1,70 @@
import logging
import sys
import os
from concurrent.futures import ThreadPoolExecutor
# Add app to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from db import get_read_conn
from utils.wiki import fetch_wiki_data
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def get_top_entities():
"""Get top 100 people, 50 orgs, 50 places from last 30 days."""
entities = []
query = """
SELECT t.valor, COUNT(*) as c
FROM tags t
JOIN tags_noticia tn ON t.id = tn.tag_id
JOIN traducciones tr ON tn.traduccion_id = tr.id
WHERE tr.created_at > NOW() - INTERVAL '30 days'
AND t.tipo = %s
GROUP BY t.valor
ORDER BY c DESC
LIMIT %s
"""
try:
with get_read_conn() as conn:
with conn.cursor() as cur:
# People
cur.execute(query, ('persona', 100))
entities.extend([row[0] for row in cur.fetchall()])
# Orgs
cur.execute(query, ('organizacion', 50))
entities.extend([row[0] for row in cur.fetchall()])
# Places
cur.execute(query, ('lugar', 50))
entities.extend([row[0] for row in cur.fetchall()])
except Exception as e:
logger.error(f"Error fetching top entities: {e}")
return list(set(entities))
def precache_entity(name):
try:
img, summary = fetch_wiki_data(name)
if img or summary:
logger.info(f"✓ Cached: {name}")
else:
logger.info(f"✗ No data for: {name}")
except Exception as e:
logger.error(f"Error caching {name}: {e}")
def run_precache():
logger.info("Starting entity pre-cache...")
entities = get_top_entities()
logger.info(f"Found {len(entities)} unique top entities to cache.")
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(precache_entity, entities)
logger.info("Pre-cache complete.")
if __name__ == "__main__":
run_precache()