services: db: image: postgres:18-alpine container_name: rss2_db shm_size: 4gb environment: POSTGRES_DB: ${POSTGRES_DB:-rss} POSTGRES_USER: ${POSTGRES_USER:-rss} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=C.UTF-8" LANG: C.UTF-8 LC_ALL: C.UTF-8 TZ: Europe/Madrid PGDATA: /var/lib/postgresql/data/18/main volumes: - ./data/pgdata:/var/lib/postgresql/data - ./init-db:/docker-entrypoint-initdb.d:rw - ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro entrypoint: ["bash", "/docker-entrypoint-db.sh"] networks: backend: aliases: - db - rss2_db restart: unless-stopped healthcheck: test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ] interval: 5s timeout: 5s retries: 30 start_period: 20s deploy: resources: limits: memory: 8G reservations: memory: 4G redis: image: redis:7-alpine container_name: rss2_redis environment: TZ: Europe/Madrid # SEGURIDAD: Redis con autenticación command: > redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD} volumes: - ./data/redis-data:/data - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro networks: backend: aliases: - redis - rss2_redis restart: unless-stopped healthcheck: test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ] interval: 5s timeout: 3s retries: 5 deploy: resources: limits: memory: 768M reservations: memory: 512M rss-ingestor-go: build: context: ./rss-ingestor-go dockerfile: Dockerfile container_name: rss2_ingestor_go environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} RSS_MAX_WORKERS: 100 RSS_POKE_INTERVAL_MIN: 15 TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '2' memory: 2G reservations: memory: 512M langdetect: build: context: . dockerfile: Dockerfile container_name: rss2_langdetect_py command: bash -lc "python -m workers.langdetect_worker" environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} LANG_DETECT_SLEEP: 60 LANG_DETECT_BATCH: 1000 TZ: Europe/Madrid volumes: - ./workers:/app/workers networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '0.5' memory: 512M # ================================================================================== # SCRAPER WORKER (Go) - Extrae artículos de URLs # ================================================================================== scraper: build: context: . dockerfile: Dockerfile.scraper container_name: rss2_scraper environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} SCRAPER_SLEEP: 60 SCRAPER_BATCH: 10 TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 512M # ================================================================================== # DISCOVERY WORKER (Go) - Descubre RSS feeds # ================================================================================== discovery: build: context: . dockerfile: Dockerfile.discovery container_name: rss2_discovery environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} DISCOVERY_INTERVAL: 900 DISCOVERY_BATCH: 10 MAX_FEEDS_PER_URL: 5 TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 512M # ================================================================================== # WIKI WORKER (Go) - Wikipedia info and thumbnails # ================================================================================== wiki-worker: build: context: . dockerfile: Dockerfile.wiki container_name: rss2_wiki_worker environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} WIKI_SLEEP: 10 TZ: Europe/Madrid volumes: - ./data/wiki_images:/app/data/wiki_images networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '0.5' memory: 256M # ================================================================================== # BACKEND GO (API REST) # ================================================================================== backend-go: build: context: ./backend dockerfile: Dockerfile container_name: rss2_backend_go environment: TZ: Europe/Madrid DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379 SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string} SERVER_PORT: "8080" volumes: - ./data/wiki_images:/app/data/wiki_images networks: - backend - frontend depends_on: db: condition: service_healthy redis: condition: service_healthy restart: unless-stopped # ================================================================================== # FRONTEND REACT # ================================================================================== rss2_frontend: build: context: ./frontend dockerfile: Dockerfile container_name: rss2_frontend environment: TZ: Europe/Madrid VITE_API_URL: /api networks: - frontend depends_on: - backend-go restart: unless-stopped # ================================================================================== # NGINX (Puerto 8001 - sirve React + proxy API) # ================================================================================== nginx: image: nginx:alpine container_name: rss2_nginx ports: - "8001:80" volumes: - ./nginx.conf:/etc/nginx/nginx.conf:ro networks: - frontend depends_on: - rss2_frontend - backend-go restart: unless-stopped # ================================================================================== # TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3 # ================================================================================== translator: build: context: . dockerfile: Dockerfile.translator image: rss2-translator:latest command: bash -lc "python -m workers.ctranslator_worker" security_opt: - seccomp=unconfined environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} TARGET_LANGS: es TRANSLATOR_BATCH: 32 CT2_MODEL_PATH: /app/models/nllb-ct2 CT2_DEVICE: cpu CT2_COMPUTE_TYPE: int8 UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M HF_HOME: /app/hf_cache TZ: Europe/Madrid TRANSLATOR_ID: ${TRANSLATOR_ID:-} volumes: - ./workers:/app/workers - ./hf_cache:/app/hf_cache - ./models:/app/models networks: - backend profiles: - cpu-only depends_on: db: condition: service_healthy restart: unless-stopped # ================================================================================== # TRANSLATION SCHEDULER - Creates translation jobs # ================================================================================== translation-scheduler: build: context: . dockerfile: Dockerfile.scheduler image: rss2-scheduler:latest container_name: rss2_translation_scheduler environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} TARGET_LANGS: es SCHEDULER_BATCH: 1000 SCHEDULER_SLEEP: 30 TZ: Europe/Madrid volumes: - ./workers:/app/workers networks: - backend deploy: resources: limits: cpus: '0.5' memory: 256M depends_on: db: condition: service_healthy restart: unless-stopped # ================================================================================== # TRANSLATOR GPU (CTranslate2 with CUDA) # ================================================================================== translator-gpu: build: context: . dockerfile: Dockerfile.translator-gpu image: rss2-translator-gpu:latest container_name: rss2_translator_gpu command: bash -lc "python -m workers.ctranslator_worker" security_opt: - seccomp=unconfined environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} TARGET_LANGS: es TRANSLATOR_BATCH: 64 CT2_MODEL_PATH: /app/models/nllb-ct2 CT2_DEVICE: cuda CT2_COMPUTE_TYPE: float16 UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M HF_HOME: /app/hf_cache TZ: Europe/Madrid volumes: - ./workers:/app/workers - ./hf_cache:/app/hf_cache - ./models:/app/models networks: - backend deploy: resources: limits: memory: 4G reservations: devices: - driver: nvidia count: 1 capabilities: [ gpu ] depends_on: db: condition: service_healthy restart: unless-stopped embeddings: build: context: . dockerfile: Dockerfile container_name: rss2_embeddings_py command: bash -lc "python -m workers.embeddings_worker" environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 EMB_BATCH: 64 EMB_SLEEP_IDLE: 5 EMB_LANGS: es EMB_LIMIT: 1000 DEVICE: cuda HF_HOME: /app/hf_cache TZ: Europe/Madrid volumes: - ./workers:/app/workers - ./hf_cache:/app/hf_cache networks: - backend deploy: resources: limits: memory: 6G reservations: devices: - driver: nvidia count: 1 capabilities: [ gpu ] depends_on: db: condition: service_healthy restart: unless-stopped # ================================================================================== # TOPICS WORKER (Go) - Matching temas y países # ================================================================================== topics: build: context: . dockerfile: Dockerfile.topics container_name: rss2_topics environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} TOPICS_SLEEP: 10 TOPICS_BATCH: 500 TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 512M # ================================================================================== # RELATED WORKER (Go) - Noticias relacionadas # ================================================================================== related: build: context: . dockerfile: Dockerfile.related container_name: rss2_related environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} RELATED_SLEEP: 10 RELATED_BATCH: 200 RELATED_TOPK: 10 EMB_MODEL: mxbai-embed-large TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 1G qdrant: image: qdrant/qdrant:latest container_name: rss2_qdrant environment: TZ: Europe/Madrid QDRANT__SERVICE__GRPC_PORT: 6334 volumes: - ./data/qdrant_storage:/qdrant/storage - /etc/timezone:/etc/timezone:ro - /etc/localtime:/etc/localtime:ro networks: - backend restart: unless-stopped deploy: resources: limits: cpus: '4' memory: 4G reservations: memory: 2G # ================================================================================== # QDRANT WORKER (Go) - Vectorización y búsqueda semántica # ================================================================================== qdrant-worker: build: context: . dockerfile: Dockerfile.qdrant container_name: rss2_qdrant_worker environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} QDRANT_HOST: qdrant QDRANT_PORT: 6333 QDRANT_COLLECTION: news_vectors OLLAMA_URL: http://ollama:11434 QDRANT_SLEEP: 30 QDRANT_BATCH: 100 TZ: Europe/Madrid networks: - backend depends_on: db: condition: service_healthy qdrant: condition: service_started restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 1G # ================================================================================== # NER WORKER (Python) - Extracción de entidades # ================================================================================== ner: build: context: . dockerfile: Dockerfile container_name: rss2_ner command: bash -lc "python -m workers.ner_worker" environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} NER_LANG: es NER_BATCH: 64 HF_HOME: /app/hf_cache TZ: Europe/Madrid volumes: - ./workers:/app/workers - ./hf_cache:/app/hf_cache networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '2' memory: 2G # ================================================================================== # CLUSTER WORKER (Python) - Agrupación de noticias # ================================================================================== cluster: build: context: . dockerfile: Dockerfile container_name: rss2_cluster_py command: bash -lc "python -m workers.cluster_worker" environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} EVENT_DIST_THRESHOLD: 0.35 EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 TZ: Europe/Madrid volumes: - ./workers:/app/workers networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '2' memory: 2G # ================================================================================== # LLM CATEGORIZER (Python) - Categorización con Ollama # ================================================================================== llm-categorizer: build: context: . dockerfile: Dockerfile container_name: rss2_llm_categorizer command: bash -lc "python -m workers.simple_categorizer_worker" environment: DB_HOST: db DB_PORT: 5432 DB_NAME: ${DB_NAME:-rss} DB_USER: ${DB_USER:-rss} DB_PASS: ${DB_PASS} CATEGORIZER_BATCH_SIZE: 10 CATEGORIZER_SLEEP_IDLE: 5 TZ: Europe/Madrid volumes: - ./workers:/app/workers networks: - backend depends_on: db: condition: service_healthy restart: unless-stopped deploy: resources: limits: cpus: '2' memory: 1G # ================================================================================== # MONITORING STACK - SECURED # ================================================================================== prometheus: image: prom/prometheus:latest container_name: rss2_prometheus volumes: - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' # SEGURIDAD: Sin exposición de puertos - acceso solo vía Grafana o túnel SSH # ports: # - "9090:9090" networks: - monitoring restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 2G grafana: image: grafana/grafana:latest container_name: rss2_grafana # SEGURIDAD: Acceso solo en localhost o vía túnel SSH # Para acceso remoto, usar túnel SSH: ssh -L 3001:localhost:3001 user@server ports: - "127.0.0.1:3001:3000" environment: # SEGURIDAD: Cambiar este password en producción - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-change_this_password} - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_ROOT_URL=http://localhost:3001 - GF_SECURITY_COOKIE_SECURE=false - GF_SECURITY_COOKIE_SAMESITE=lax volumes: - grafana_data:/var/lib/grafana networks: - monitoring depends_on: - prometheus restart: unless-stopped deploy: resources: limits: cpus: '1' memory: 1G cadvisor: image: gcr.io/cadvisor/cadvisor:latest container_name: rss2_cadvisor # SEGURIDAD: Sin exposición de puertos - solo acceso interno # ports: # - "8081:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro devices: - /dev/kmsg networks: - monitoring restart: unless-stopped deploy: resources: limits: cpus: '0.5' memory: 512M # ================================================================================== # REDES SEGMENTADAS # ================================================================================== networks: # Red frontal - Solo nginx y web app frontend: name: rss2_frontend driver: bridge internal: false # Red backend - Base de datos, workers, redis, qdrant backend: name: rss2_backend driver: bridge internal: false # Acceso externo permitido (necesario para ingestor) # Red de monitoreo - Prometheus, Grafana, cAdvisor monitoring: name: rss2_monitoring driver: bridge internal: true volumes: prometheus_data: grafana_data: torch_extensions: