This commit is contained in:
jlimolina 2025-11-24 23:06:26 +01:00
parent 86ee083b90
commit e3a99d9604
8 changed files with 489 additions and 483 deletions

View file

@ -13,7 +13,9 @@ services:
PGDATA: /var/lib/postgresql/data/18/main
command: ["postgres", "-c", "max_connections=400"]
volumes:
- /datos/rss/postgres/18:/var/lib/postgresql/data
# Datos de Postgres dentro del proyecto
- ./pgdata:/var/lib/postgresql/data
# Scripts de inicialización
- ./init-db:/docker-entrypoint-initdb.d:ro
restart: always
healthcheck:
@ -61,18 +63,19 @@ services:
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- SECRET_KEY=${SECRET_KEY}
- RSS_MAX_WORKERS=8
- RSS_MAX_WORKERS=16
depends_on:
db:
condition: service_healthy
restart: always
translator:
# --- Worker de traducción en GPU: encola + traduce ---
translator_gpu:
build:
context: .
args:
TORCH_CUDA: cu121
container_name: rss_translator
container_name: rss_translator_gpu
command: bash -lc "python translation_worker.py"
environment:
- DB_HOST=db
@ -81,19 +84,19 @@ services:
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- TARGET_LANGS=es
- TRANSLATOR_BATCH=32
- ENQUEUE=200
- TRANSLATOR_BATCH=16
- ENQUEUE=200 # ESTE encola traducciones nuevas
- TRANSLATOR_SLEEP_IDLE=5
- MAX_SRC_TOKENS=680
- MAX_NEW_TOKENS=400
- NUM_BEAMS_TITLE=2
- NUM_BEAMS_TITLE=1
- NUM_BEAMS_BODY=1
- UNIVERSAL_MODEL=facebook/nllb-200-1.3B
- UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
- CHUNK_BY_SENTENCES=True
- CHUNK_MAX_TOKENS=400
- CHUNK_OVERLAP_SENTS=1
- CLEAN_ARTICLE=1
- DEVICE=cuda
- DEVICE=cuda # GPU
- PYTHONUNBUFFERED=1
- HF_HOME=/root/.cache/huggingface
- TOKENIZERS_PARALLELISM=false
@ -101,13 +104,52 @@ services:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
volumes:
- /datos/rss/hf_cache:/root/.cache/huggingface
# Cache de modelos HF dentro del proyecto
- ./hf_cache:/root/.cache/huggingface
depends_on:
db:
condition: service_healthy
restart: always
gpus: all
# --- Worker de traducción en CPU: SOLO procesa pendientes ---
translator_cpu:
build:
context: .
args:
TORCH_CUDA: cu121
container_name: rss_translator_cpu
command: bash -lc "python translation_worker.py"
environment:
- DB_HOST=db
- DB_PORT=5432
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- TARGET_LANGS=es
- TRANSLATOR_BATCH=8 # batch más pequeño para CPU
- ENQUEUE=0 # NO encola nuevas traducciones
- TRANSLATOR_SLEEP_IDLE=5
- MAX_SRC_TOKENS=680
- MAX_NEW_TOKENS=400
- NUM_BEAMS_TITLE=1
- NUM_BEAMS_BODY=1
- UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
- CHUNK_BY_SENTENCES=True
- CHUNK_MAX_TOKENS=400
- CHUNK_OVERLAP_SENTS=1
- CLEAN_ARTICLE=1
- DEVICE=cpu # Fuerza CPU
- PYTHONUNBUFFERED=1
- HF_HOME=/root/.cache/huggingface
- TOKENIZERS_PARALLELISM=false
volumes:
- ./hf_cache:/root/.cache/huggingface
depends_on:
db:
condition: service_healthy
restart: always
ner:
build:
context: .
@ -141,7 +183,7 @@ services:
- DB_NAME=${DB_NAME}
- DB_USER=${DB_USER}
- DB_PASS=${DB_PASS}
- EMB_MODEL=sentence-transformers/paraphrase-multilingual-mpnet-base-v2
- EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
- EMB_BATCH=256
- EMB_SLEEP_IDLE=5
- EMB_LANGS=es
@ -151,7 +193,8 @@ services:
- HF_HOME=/root/.cache/huggingface
- TOKENIZERS_PARALLELISM=false
volumes:
- /datos/rss/hf_cache:/root/.cache/huggingface
# Reutiliza el mismo cache HF
- ./hf_cache:/root/.cache/huggingface
depends_on:
db:
condition: service_healthy