Initial clean commit
This commit is contained in:
commit
6784d81c2c
141 changed files with 25219 additions and 0 deletions
70
scripts/precache_entities.py
Normal file
70
scripts/precache_entities.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import logging
|
||||
import sys
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
# Add app to path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from db import get_read_conn
|
||||
from utils.wiki import fetch_wiki_data
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_top_entities():
|
||||
"""Get top 100 people, 50 orgs, 50 places from last 30 days."""
|
||||
entities = []
|
||||
query = """
|
||||
SELECT t.valor, COUNT(*) as c
|
||||
FROM tags t
|
||||
JOIN tags_noticia tn ON t.id = tn.tag_id
|
||||
JOIN traducciones tr ON tn.traduccion_id = tr.id
|
||||
WHERE tr.created_at > NOW() - INTERVAL '30 days'
|
||||
AND t.tipo = %s
|
||||
GROUP BY t.valor
|
||||
ORDER BY c DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
|
||||
try:
|
||||
with get_read_conn() as conn:
|
||||
with conn.cursor() as cur:
|
||||
# People
|
||||
cur.execute(query, ('persona', 100))
|
||||
entities.extend([row[0] for row in cur.fetchall()])
|
||||
|
||||
# Orgs
|
||||
cur.execute(query, ('organizacion', 50))
|
||||
entities.extend([row[0] for row in cur.fetchall()])
|
||||
|
||||
# Places
|
||||
cur.execute(query, ('lugar', 50))
|
||||
entities.extend([row[0] for row in cur.fetchall()])
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching top entities: {e}")
|
||||
|
||||
return list(set(entities))
|
||||
|
||||
def precache_entity(name):
|
||||
try:
|
||||
img, summary = fetch_wiki_data(name)
|
||||
if img or summary:
|
||||
logger.info(f"✓ Cached: {name}")
|
||||
else:
|
||||
logger.info(f"✗ No data for: {name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error caching {name}: {e}")
|
||||
|
||||
def run_precache():
|
||||
logger.info("Starting entity pre-cache...")
|
||||
entities = get_top_entities()
|
||||
logger.info(f"Found {len(entities)} unique top entities to cache.")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
executor.map(precache_entity, entities)
|
||||
|
||||
logger.info("Pre-cache complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_precache()
|
||||
Loading…
Add table
Add a link
Reference in a new issue