varios cambios esteticos y optimizaciones
This commit is contained in:
parent
e3a99d9604
commit
9a243db633
8 changed files with 64 additions and 105 deletions
|
|
@ -105,6 +105,8 @@ CHUNK_BY_SENTENCES = _env_bool("CHUNK_BY_SENTENCES", default=True)
|
|||
CHUNK_MAX_TOKENS = _env_int("CHUNK_MAX_TOKENS", default=900)
|
||||
CHUNK_OVERLAP_SENTS = _env_int("CHUNK_OVERLAP_SENTS", default=0)
|
||||
|
||||
IDENTITY_PAISES_ES = _env_int("IDENTITY_PAISES_ES", default=58)
|
||||
|
||||
_ABBR = ("Sr", "Sra", "Dr", "Dra", "Ing", "Lic", "pág", "etc")
|
||||
_ABBR_MARK = "§"
|
||||
|
||||
|
|
@ -216,6 +218,8 @@ def ensure_indexes(conn):
|
|||
|
||||
|
||||
def ensure_pending(conn, lang_to: str, enqueue_limit: int):
|
||||
if enqueue_limit <= 0:
|
||||
return
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
|
|
@ -236,7 +240,44 @@ def ensure_pending(conn, lang_to: str, enqueue_limit: int):
|
|||
conn.commit()
|
||||
|
||||
|
||||
def ensure_identity_spanish(conn, lang_to: str, enqueue_limit: int):
|
||||
lang_to = normalize_lang(lang_to, "es") or "es"
|
||||
if lang_to != "es":
|
||||
return
|
||||
if enqueue_limit <= 0:
|
||||
return
|
||||
|
||||
LOG.info(
|
||||
"Creando traducciones identidad ES para pais_id=%s (hasta %s noticias)…",
|
||||
IDENTITY_PAISES_ES,
|
||||
enqueue_limit,
|
||||
)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO traducciones (noticia_id, lang_from, lang_to, titulo_trad, resumen_trad, status)
|
||||
SELECT sub.id, 'es', %s, sub.titulo, sub.resumen, 'done'
|
||||
FROM (
|
||||
SELECT n.id, n.titulo, n.resumen
|
||||
FROM noticias n
|
||||
LEFT JOIN traducciones t
|
||||
ON t.noticia_id = n.id AND t.lang_to = %s
|
||||
WHERE t.id IS NULL
|
||||
AND n.pais_id = %s
|
||||
ORDER BY n.fecha DESC NULLS LAST, n.id
|
||||
LIMIT %s
|
||||
) AS sub;
|
||||
""",
|
||||
(lang_to, lang_to, IDENTITY_PAISES_ES, enqueue_limit),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def fetch_pending_batch(conn, lang_to: str, batch_size: int):
|
||||
if batch_size <= 0:
|
||||
return []
|
||||
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
|
|
@ -865,7 +906,10 @@ def main():
|
|||
ensure_indexes(conn)
|
||||
for lt in TARGET_LANGS:
|
||||
lt = normalize_lang(lt, "es") or "es"
|
||||
|
||||
ensure_identity_spanish(conn, lt, ENQUEUE_MAX)
|
||||
ensure_pending(conn, lt, ENQUEUE_MAX)
|
||||
|
||||
while True:
|
||||
rows = fetch_pending_batch(conn, lt, BATCH_SIZE)
|
||||
if not rows:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue