Mejoras: NER, embeddings, dashboard, docker-compose y limpieza
This commit is contained in:
parent
6c5aff9936
commit
d508dc2058
19 changed files with 2218 additions and 1185 deletions
|
|
@ -1,20 +1,27 @@
|
|||
services:
|
||||
db:
|
||||
image: postgres:15
|
||||
image: postgres:18
|
||||
container_name: rss_db
|
||||
environment:
|
||||
- POSTGRES_DB=${DB_NAME}
|
||||
- POSTGRES_USER=${DB_USER}
|
||||
- POSTGRES_PASSWORD=${DB_PASS}
|
||||
POSTGRES_DB: ${DB_NAME}
|
||||
POSTGRES_USER: ${DB_USER}
|
||||
POSTGRES_PASSWORD: ${DB_PASS}
|
||||
POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=C.UTF-8"
|
||||
LANG: C.UTF-8
|
||||
LC_ALL: C.UTF-8
|
||||
TZ: Europe/Madrid
|
||||
PGDATA: /var/lib/postgresql/data/18/main
|
||||
command: ["postgres", "-c", "max_connections=400"]
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ./init-db:/docker-entrypoint-initdb.d
|
||||
- /datos/rss/postgres/18:/var/lib/postgresql/data
|
||||
- ./init-db:/docker-entrypoint-initdb.d:ro
|
||||
restart: always
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${DB_USER} -d ${DB_NAME}"]
|
||||
test: ["CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
retries: 30
|
||||
start_period: 20s
|
||||
|
||||
web:
|
||||
build:
|
||||
|
|
@ -22,7 +29,7 @@ services:
|
|||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_web
|
||||
command: gunicorn --bind 0.0.0.0:8000 --workers 3 app:app
|
||||
command: bash -lc "gunicorn --bind 0.0.0.0:8000 --workers 3 --timeout 120 app:app"
|
||||
ports:
|
||||
- "8001:8000"
|
||||
environment:
|
||||
|
|
@ -46,7 +53,7 @@ services:
|
|||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_scheduler
|
||||
command: python scheduler.py
|
||||
command: bash -lc "python scheduler.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
|
|
@ -54,6 +61,7 @@ services:
|
|||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
- RSS_MAX_WORKERS=8
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
@ -65,7 +73,7 @@ services:
|
|||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_translator
|
||||
command: python translation_worker.py
|
||||
command: bash -lc "python translation_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
|
|
@ -101,7 +109,7 @@ services:
|
|||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
volumes:
|
||||
- hf_cache:/root/.cache/huggingface
|
||||
- /datos/rss/hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
@ -114,7 +122,7 @@ services:
|
|||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_ner
|
||||
command: python ner_worker.py
|
||||
command: bash -lc "python ner_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
|
|
@ -128,7 +136,61 @@ services:
|
|||
condition: service_healthy
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
hf_cache:
|
||||
embeddings:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_embeddings
|
||||
command: bash -lc "python embeddings_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
|
||||
- EMB_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
||||
- EMB_BATCH=64
|
||||
- EMB_SLEEP=5
|
||||
|
||||
- PYTHONUNBUFFERED=1
|
||||
- HF_HOME=/root/.cache/huggingface
|
||||
- TOKENIZERS_PARALLELISM=false
|
||||
volumes:
|
||||
- /datos/rss/hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: always
|
||||
# gpus: all
|
||||
|
||||
related:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
TORCH_CUDA: cu121
|
||||
container_name: rss_related
|
||||
command: bash -lc "python related_worker.py"
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_PORT=5432
|
||||
- DB_NAME=${DB_NAME}
|
||||
- DB_USER=${DB_USER}
|
||||
- DB_PASS=${DB_PASS}
|
||||
|
||||
- RELATED_TOPK=10
|
||||
- RELATED_BATCH_IDS=200
|
||||
- RELATED_BATCH_SIM=2000
|
||||
- RELATED_SLEEP=10
|
||||
- RELATED_MIN_SCORE=0.0
|
||||
- RELATED_WINDOW_H=0
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: rss_default
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue