retoques
This commit is contained in:
parent
86ee083b90
commit
e3a99d9604
8 changed files with 489 additions and 483 deletions
|
|
@ -13,7 +13,9 @@ services:
|
|||
PGDATA: /var/lib/postgresql/data/18/main
|
||||
command: ["postgres", "-c", "max_connections=400"]
|
||||
volumes:
|
||||
- /datos/rss/postgres/18:/var/lib/postgresql/data
|
||||
# Datos de Postgres dentro del proyecto
|
||||
- ./pgdata:/var/lib/postgresql/data
|
||||
# Scripts de inicialización
|
||||
- ./init-db:/docker-entrypoint-initdb.d:ro
|
||||
restart: always
|
||||
healthcheck:
|
||||
|
|
@ -61,18 +63,19 @@ services:
|
|||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- RSS_MAX_WORKERS=8
|
||||
- RSS_MAX_WORKERS=16
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: always
|
||||
|
||||
translator:
|
||||
# --- Worker de traducción en GPU: encola + traduce ---
|
||||
translator_gpu:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_translator
|
||||
container_name: rss_translator_gpu
|
||||
command: bash -lc "python translation_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
|
|
@ -81,19 +84,19 @@ services:
|
|||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
- TARGET_LANGS=es
|
||||
- TRANSLATOR_BATCH=32
|
||||
- ENQUEUE=200
|
||||
- TRANSLATOR_BATCH=16
|
||||
- ENQUEUE=200 # ESTE encola traducciones nuevas
|
||||
- TRANSLATOR_SLEEP_IDLE=5
|
||||
- MAX_SRC_TOKENS=680
|
||||
- MAX_NEW_TOKENS=400
|
||||
- NUM_BEAMS_TITLE=2
|
||||
- NUM_BEAMS_TITLE=1
|
||||
- NUM_BEAMS_BODY=1
|
||||
- UNIVERSAL_MODEL=facebook/nllb-200-1.3B
|
||||
- UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
|
||||
- CHUNK_BY_SENTENCES=True
|
||||
- CHUNK_MAX_TOKENS=400
|
||||
- CHUNK_OVERLAP_SENTS=1
|
||||
- CLEAN_ARTICLE=1
|
||||
- DEVICE=cuda
|
||||
- DEVICE=cuda # GPU
|
||||
- PYTHONUNBUFFERED=1
|
||||
- HF_HOME=/root/.cache/huggingface
|
||||
- TOKENIZERS_PARALLELISM=false
|
||||
|
|
@ -101,13 +104,52 @@ services:
|
|||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
volumes:
|
||||
- /datos/rss/hf_cache:/root/.cache/huggingface
|
||||
# Cache de modelos HF dentro del proyecto
|
||||
- ./hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: always
|
||||
gpus: all
|
||||
|
||||
# --- Worker de traducción en CPU: SOLO procesa pendientes ---
|
||||
translator_cpu:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_translator_cpu
|
||||
command: bash -lc "python translation_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
- TARGET_LANGS=es
|
||||
- TRANSLATOR_BATCH=8 # batch más pequeño para CPU
|
||||
- ENQUEUE=0 # NO encola nuevas traducciones
|
||||
- TRANSLATOR_SLEEP_IDLE=5
|
||||
- MAX_SRC_TOKENS=680
|
||||
- MAX_NEW_TOKENS=400
|
||||
- NUM_BEAMS_TITLE=1
|
||||
- NUM_BEAMS_BODY=1
|
||||
- UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
|
||||
- CHUNK_BY_SENTENCES=True
|
||||
- CHUNK_MAX_TOKENS=400
|
||||
- CHUNK_OVERLAP_SENTS=1
|
||||
- CLEAN_ARTICLE=1
|
||||
- DEVICE=cpu # Fuerza CPU
|
||||
- PYTHONUNBUFFERED=1
|
||||
- HF_HOME=/root/.cache/huggingface
|
||||
- TOKENIZERS_PARALLELISM=false
|
||||
volumes:
|
||||
- ./hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: always
|
||||
|
||||
ner:
|
||||
build:
|
||||
context: .
|
||||
|
|
@ -141,7 +183,7 @@ services:
|
|||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
- EMB_MODEL=sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
||||
- EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
||||
- EMB_BATCH=256
|
||||
- EMB_SLEEP_IDLE=5
|
||||
- EMB_LANGS=es
|
||||
|
|
@ -151,7 +193,8 @@ services:
|
|||
- HF_HOME=/root/.cache/huggingface
|
||||
- TOKENIZERS_PARALLELISM=false
|
||||
volumes:
|
||||
- /datos/rss/hf_cache:/root/.cache/huggingface
|
||||
# Reutiliza el mismo cache HF
|
||||
- ./hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue