mejora de la semantica

This commit is contained in:
jlimolina 2025-11-19 21:29:15 +01:00
parent d508dc2058
commit cb8f69fb93
10 changed files with 191 additions and 227 deletions

View file

@ -80,32 +80,24 @@ services:
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- TARGET_LANGS=es
- TRANSLATOR_BATCH=8
- TRANSLATOR_BATCH=32
- ENQUEUE=200
- TRANSLATOR_SLEEP_IDLE=5
- MAX_SRC_TOKENS=680
- MAX_NEW_TOKENS=400
- NUM_BEAMS_TITLE=2
- NUM_BEAMS_BODY=1
- UNIVERSAL_MODEL=facebook/nllb-200-1.3B
- CHUNK_BY_SENTENCES=True
- CHUNK_MAX_TOKENS=700
- CHUNK_MAX_TOKENS=400
- CHUNK_OVERLAP_SENTS=1
- CLEAN_ARTICLE=1
- DEVICE=cuda
- PYTHONUNBUFFERED=1
- HF_HOME=/root/.cache/huggingface
- TOKENIZERS_PARALLELISM=false
- PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64,garbage_collection_threshold:0.9
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
volumes:
@ -149,11 +141,12 @@ services:
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- EMB_MODEL=sentence-transformers/all-MiniLM-L6-v2
- EMB_BATCH=64
- EMB_SLEEP=5
- EMB_BATCH=256
- EMB_SLEEP_IDLE=5
- EMB_LANGS=es
- EMB_LIMIT=5000
- DEVICE=cuda
- PYTHONUNBUFFERED=1
- HF_HOME=/root/.cache/huggingface
- TOKENIZERS_PARALLELISM=false
@ -163,7 +156,7 @@ services:
db:
condition: service_healthy
restart: always
# gpus: all
gpus: all
related:
build:
@ -178,7 +171,6 @@ services:
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- RELATED_TOPK=10
- RELATED_BATCH_IDS=200
- RELATED_BATCH_SIM=2000