go integration and wikipedia

2026-03-28 18:30:07 +01:00 · 2026-03-28 18:30:07 +01:00 · ee90335b92
commit ee90335b92
parent 47a252e339
7828 changed files with 1307913 additions and 20807 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -12,47 +12,16 @@ services:
      LC_ALL: C.UTF-8
      TZ: Europe/Madrid
      PGDATA: /var/lib/postgresql/data/18/main
-    command:
-      [
-        "postgres",
-        "-c",
-        "max_connections=200",
-        "-c",
-        "shared_buffers=4GB",
-        "-c",
-        "effective_cache_size=12GB",
-        "-c",
-        "work_mem=16MB",
-        "-c",
-        "maintenance_work_mem=512MB",
-        "-c",
-        "autovacuum_max_workers=3",
-        "-c",
-        "autovacuum_vacuum_scale_factor=0.02",
-        "-c",
-        "autovacuum_vacuum_cost_limit=1000",
-        # Parallel Query Optimization (Adjusted)
-        "-c",
-        "max_worker_processes=8",
-        "-c",
-        "max_parallel_workers=6",
-        "-c",
-        "max_parallel_workers_per_gather=2",
-        # Streaming Replication
-        "-c",
-        "wal_level=replica",
-        "-c",
-        "max_wal_senders=5",
-        "-c",
-        "wal_keep_size=1GB",
-        "-c",
-        "hot_standby=on"
-      ]
    volumes:
-      - ./pgdata:/var/lib/postgresql/data
-      - ./init-db:/docker-entrypoint-initdb.d:ro
+      - ./data/pgdata:/var/lib/postgresql/data
+      - ./init-db:/docker-entrypoint-initdb.d:rw
+      - ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro
+    entrypoint: ["bash", "/docker-entrypoint-db.sh"]
    networks:
-      - backend
+      backend:
+        aliases:
+          - db
+          - rss2_db
    restart: unless-stopped
    healthcheck:
      test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ]
@ -67,40 +36,6 @@ services:
        reservations:
          memory: 4G

-  db-replica:
-    build:
-      context: .
-      dockerfile: Dockerfile.replica
-    container_name: rss2_db_replica
-    shm_size: 2gb
-    environment:
-      POSTGRES_DB: ${POSTGRES_DB:-rss}
-      POSTGRES_USER: ${POSTGRES_USER:-rss}
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
-      PGDATA: /var/lib/postgresql/data
-      TZ: Europe/Madrid
-    command: [ "postgres", "-c", "max_connections=200", "-c", "shared_buffers=256MB", "-c", "effective_cache_size=2GB", "-c", "hot_standby=on", "-c", "max_worker_processes=16", "-c", "hot_standby_feedback=on", "-c", "max_standby_streaming_delay=300s" ]
-    volumes:
-      - ./pgdata-replica:/var/lib/postgresql/data
-    networks:
-      - backend
-    depends_on:
-      db:
-        condition: service_healthy
-    restart: unless-stopped
-    healthcheck:
-      test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U rss -d rss || exit 1" ]
-      interval: 5s
-      timeout: 5s
-      retries: 30
-      start_period: 30s
-    deploy:
-      resources:
-        limits:
-          memory: 4G
-        reservations:
-          memory: 2G
-
  redis:
    image: redis:7-alpine
    container_name: rss2_redis
@ -110,11 +45,14 @@ services:
    command: >
      redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD}
    volumes:
-      - ./redis-data:/data
+      - ./data/redis-data:/data
      - /etc/timezone:/etc/timezone:ro
      - /etc/localtime:/etc/localtime:ro
    networks:
-      - backend
+      backend:
+        aliases:
+          - redis
+          - rss2_redis
    restart: unless-stopped
    healthcheck:
      test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ]
@ -156,73 +94,80 @@ services:
        reservations:
          memory: 512M

-  rss-tasks:
-    build: .
-    container_name: rss2_tasks_py
-    command: bash -lc "python -m scheduler"
+  langdetect:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: rss2_langdetect_py
+    command: bash -lc "python -m workers.langdetect_worker"
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_NAME: ${DB_NAME:-rss}
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
-      REDIS_HOST: redis
-      REDIS_PORT: 6379
-      REDIS_PASSWORD: ${REDIS_PASSWORD}
+      LANG_DETECT_SLEEP: 60
+      LANG_DETECT_BATCH: 1000
      TZ: Europe/Madrid
+    volumes:
+      - ./workers:/app/workers
    networks:
      - backend
    depends_on:
      db:
        condition: service_healthy
-      redis:
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '0.5'
+          memory: 512M
+
+  # ==================================================================================
+  # SCRAPER WORKER (Go) - Extrae artículos de URLs
+  # ==================================================================================
+  scraper:
+    build:
+      context: .
+      dockerfile: Dockerfile.scraper
+    container_name: rss2_scraper
+    environment:
+      DB_HOST: db
+      DB_PORT: 5432
+      DB_NAME: ${DB_NAME:-rss}
+      DB_USER: ${DB_USER:-rss}
+      DB_PASS: ${DB_PASS}
+      SCRAPER_SLEEP: 60
+      SCRAPER_BATCH: 10
+      TZ: Europe/Madrid
+    networks:
+      - backend
+    depends_on:
+      db:
        condition: service_healthy
    restart: unless-stopped
    deploy:
      resources:
        limits:
          cpus: '1'
-          memory: 1G
+          memory: 512M

-  url-worker:
+  # ==================================================================================
+  # DISCOVERY WORKER (Go) - Descubre RSS feeds
+  # ==================================================================================
+  discovery:
    build:
      context: .
-      dockerfile: Dockerfile.url_worker
-    container_name: rss2_url_worker
-    command: bash -lc "python -m workers.url_worker_daemon"
+      dockerfile: Dockerfile.discovery
+    container_name: rss2_discovery
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_NAME: ${DB_NAME:-rss}
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
-      DB_READ_HOST: db
-      DB_WRITE_HOST: db
-      TZ: Europe/Madrid
-    networks:
-      - backend
-    depends_on:
-      db:
-        condition: service_healthy
-    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '2'
-          memory: 2G
-
-  url-discovery-worker:
-    build: .
-    container_name: rss2_url_discovery
-    command: bash -lc "python -m workers.url_discovery_worker"
-    environment:
-      DB_HOST: db
-      DB_PORT: 5432
-      DB_NAME: ${DB_NAME:-rss}
-      DB_USER: ${DB_USER:-rss}
-      DB_PASS: ${DB_PASS}
-      URL_DISCOVERY_INTERVAL_MIN: 15
-      URL_DISCOVERY_BATCH_SIZE: 10
+      DISCOVERY_INTERVAL: 900
+      DISCOVERY_BATCH: 10
      MAX_FEEDS_PER_URL: 5
      TZ: Europe/Madrid
    networks:
@ -235,104 +180,109 @@ services:
      resources:
        limits:
          cpus: '1'
-          memory: 1G
+          memory: 512M

-  rss2_web:
-    build: .
-    container_name: rss2_web
-    command: bash -lc "gunicorn --config gunicorn_config.py app:app"
-    volumes:
-      # SEGURIDAD: Código en read-only donde sea posible
-      - ./app.py:/app/app.py:ro
-      - ./routers:/app/routers:ro
-      - ./models:/app/models:ro
-      - ./utils:/app/utils:ro
-      - ./templates:/app/templates:ro
-      - ./static:/app/static:ro
-      - ./config.py:/app/config.py:ro
-      - ./db.py:/app/db.py:ro
-      - ./cache.py:/app/cache.py:ro
-      - ./gunicorn_config.py:/app/gunicorn_config.py:ro
-      # Directorios escribibles
-      - ./hf_cache:/app/hf_cache
-      - ./data:/app/data
+  # ==================================================================================
+  # WIKI WORKER (Go) - Wikipedia info and thumbnails
+  # ==================================================================================
+  wiki-worker:
+    build:
+      context: .
+      dockerfile: Dockerfile.wiki
+    container_name: rss2_wiki_worker
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_NAME: ${DB_NAME:-rss}
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
-      DB_READ_HOST: db
-      DB_WRITE_HOST: db
-      REDIS_HOST: redis
-      REDIS_PORT: 6379
-      REDIS_PASSWORD: ${REDIS_PASSWORD}
-      QDRANT_HOST: qdrant
-      QDRANT_PORT: 6333
-      QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
-      EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
-      SECRET_KEY: ${SECRET_KEY}
-      GUNICORN_WORKERS: 8
-      ALLTALK_URL: http://host.docker.internal:7851
+      WIKI_SLEEP: 10
      TZ: Europe/Madrid
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
+    volumes:
+      - ./data/wiki_images:/app/data/wiki_images
    networks:
-      - frontend
      - backend
    depends_on:
      db:
        condition: service_healthy
-      # db-replica:
-      #   condition: service_healthy
-      redis:
-        condition: service_healthy
-      qdrant:
-        condition: service_started
    restart: unless-stopped
    deploy:
      resources:
        limits:
-          cpus: '8'
-          memory: 8G
-        reservations:
-          memory: 4G
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [ gpu ]
+          cpus: '0.5'
+          memory: 256M

+  # ==================================================================================
+  # BACKEND GO (API REST)
+  # ==================================================================================
+  backend-go:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile
+    container_name: rss2_backend_go
+    environment:
+      TZ: Europe/Madrid
+      DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable
+      REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379
+      SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string}
+      SERVER_PORT: "8080"
+    volumes:
+      - ./data/wiki_images:/app/data/wiki_images
+    networks:
+      - backend
+      - frontend
+    depends_on:
+      db:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    restart: unless-stopped
+
+  # ==================================================================================
+  # FRONTEND REACT
+  # ==================================================================================
+  rss2_frontend:
+    build:
+      context: ./frontend
+      dockerfile: Dockerfile
+    container_name: rss2_frontend
+    environment:
+      TZ: Europe/Madrid
+      VITE_API_URL: /api
+    networks:
+      - frontend
+    depends_on:
+      - backend-go
+    restart: unless-stopped
+
+  # ==================================================================================
+  # NGINX (Puerto 8001 - sirve React + proxy API)
+  # ==================================================================================
  nginx:
    image: nginx:alpine
    container_name: rss2_nginx
-    environment:
-      TZ: Europe/Madrid
    ports:
-      # ÚNICO puerto expuesto públicamente
      - "8001:80"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
-      - ./static:/app/static:ro
-      - /etc/timezone:/etc/timezone:ro
-      - /etc/localtime:/etc/localtime:ro
    networks:
      - frontend
    depends_on:
-      - rss2_web
+      - rss2_frontend
+      - backend-go
    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '2'
-          memory: 512M

+  # ==================================================================================
+  # TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3
+  # ==================================================================================
  translator:
    build:
      context: .
-      dockerfile: Dockerfile
+      dockerfile: Dockerfile.translator
    image: rss2-translator:latest
-    container_name: rss2_translator_py
-    command: bash -lc "python -m workers.translation_worker"
+    command: bash -lc "python -m workers.ctranslator_worker"
+    security_opt:
+      - seccomp=unconfined
    environment:
      DB_HOST: db
      DB_PORT: 5432
@ -340,41 +290,36 @@ services:
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
      TARGET_LANGS: es
-      TRANSLATOR_BATCH: 128
-      ENQUEUE: 300
-      # CTranslate2 configuration
+      TRANSLATOR_BATCH: 32
      CT2_MODEL_PATH: /app/models/nllb-ct2
-      CT2_DEVICE: cuda
-      CT2_COMPUTE_TYPE: int8_float16
+      CT2_DEVICE: cpu
+      CT2_COMPUTE_TYPE: int8
      UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
      HF_HOME: /app/hf_cache
      TZ: Europe/Madrid
+      TRANSLATOR_ID: ${TRANSLATOR_ID:-}
    volumes:
+      - ./workers:/app/workers
      - ./hf_cache:/app/hf_cache
      - ./models:/app/models
    networks:
      - backend
-    deploy:
-      resources:
-        limits:
-          memory: 8G
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [ gpu ]
+    profiles:
+      - cpu-only
    depends_on:
      db:
        condition: service_healthy
    restart: unless-stopped

-  translator2:
+  # ==================================================================================
+  # TRANSLATION SCHEDULER - Creates translation jobs
+  # ==================================================================================
+  translation-scheduler:
    build:
      context: .
-      dockerfile: Dockerfile
-    image: rss2-translator2:latest
-    container_name: rss2_translator_py2
-    command: bash -lc "python -m workers.translation_worker"
+      dockerfile: Dockerfile.scheduler
+    image: rss2-scheduler:latest
+    container_name: rss2_translation_scheduler
    environment:
      DB_HOST: db
      DB_PORT: 5432
@ -382,40 +327,35 @@ services:
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
      TARGET_LANGS: es
-      TRANSLATOR_BATCH: 128
-      ENQUEUE: 300
-      CT2_MODEL_PATH: /app/models/nllb-ct2
-      CT2_DEVICE: cuda
-      CT2_COMPUTE_TYPE: int8_float16
-      UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
-      HF_HOME: /app/hf_cache
+      SCHEDULER_BATCH: 1000
+      SCHEDULER_SLEEP: 30
      TZ: Europe/Madrid
    volumes:
-      - ./hf_cache:/app/hf_cache
-      - ./models:/app/models
+      - ./workers:/app/workers
    networks:
      - backend
    deploy:
      resources:
        limits:
-          memory: 8G
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [ gpu ]
+          cpus: '0.5'
+          memory: 256M
    depends_on:
      db:
        condition: service_healthy
    restart: unless-stopped

-  translator3:
+  # ==================================================================================
+  # TRANSLATOR GPU (CTranslate2 with CUDA)
+  # ==================================================================================
+  translator-gpu:
    build:
      context: .
-      dockerfile: Dockerfile
-    image: rss2-translator3:latest
-    container_name: rss2_translator_py3
-    command: bash -lc "python -m workers.translation_worker"
+      dockerfile: Dockerfile.translator-gpu
+    image: rss2-translator-gpu:latest
+    container_name: rss2_translator_gpu
+    command: bash -lc "python -m workers.ctranslator_worker"
+    security_opt:
+      - seccomp=unconfined
    environment:
      DB_HOST: db
      DB_PORT: 5432
@ -423,14 +363,15 @@ services:
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
      TARGET_LANGS: es
-      TRANSLATOR_BATCH: 128
-      ENQUEUE: 300
+      TRANSLATOR_BATCH: 64
      CT2_MODEL_PATH: /app/models/nllb-ct2
      CT2_DEVICE: cuda
-      CT2_COMPUTE_TYPE: int8_float16
+      CT2_COMPUTE_TYPE: float16
      UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
      HF_HOME: /app/hf_cache
+      TZ: Europe/Madrid
    volumes:
+      - ./workers:/app/workers
      - ./hf_cache:/app/hf_cache
      - ./models:/app/models
    networks:
@ -438,7 +379,7 @@ services:
    deploy:
      resources:
        limits:
-          memory: 8G
+          memory: 4G
        reservations:
          devices:
            - driver: nvidia
@ -470,6 +411,7 @@ services:
      HF_HOME: /app/hf_cache
      TZ: Europe/Madrid
    volumes:
+      - ./workers:/app/workers
      - ./hf_cache:/app/hf_cache
    networks:
      - backend
@ -487,19 +429,53 @@ services:
        condition: service_healthy
    restart: unless-stopped

-  related:
+  # ==================================================================================
+  # TOPICS WORKER (Go) - Matching temas y países
+  # ==================================================================================
+  topics:
    build:
      context: .
-      dockerfile: Dockerfile
-    container_name: rss2_related_py
-    command: bash -lc "python -m workers.related_worker"
+      dockerfile: Dockerfile.topics
+    container_name: rss2_topics
    environment:
      DB_HOST: db
      DB_PORT: 5432
      DB_NAME: ${DB_NAME:-rss}
      DB_USER: ${DB_USER:-rss}
      DB_PASS: ${DB_PASS}
-      RELATED_WINDOW_H: 168
+      TOPICS_SLEEP: 10
+      TOPICS_BATCH: 500
+      TZ: Europe/Madrid
+    networks:
+      - backend
+    depends_on:
+      db:
+        condition: service_healthy
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 512M
+
+  # ==================================================================================
+  # RELATED WORKER (Go) - Noticias relacionadas
+  # ==================================================================================
+  related:
+    build:
+      context: .
+      dockerfile: Dockerfile.related
+    container_name: rss2_related
+    environment:
+      DB_HOST: db
+      DB_PORT: 5432
+      DB_NAME: ${DB_NAME:-rss}
+      DB_USER: ${DB_USER:-rss}
+      DB_PASS: ${DB_PASS}
+      RELATED_SLEEP: 10
+      RELATED_BATCH: 200
+      RELATED_TOPK: 10
+      EMB_MODEL: mxbai-embed-large
      TZ: Europe/Madrid
    networks:
      - backend
@ -513,6 +489,99 @@ services:
          cpus: '1'
          memory: 1G

+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: rss2_qdrant
+    environment:
+      TZ: Europe/Madrid
+      QDRANT__SERVICE__GRPC_PORT: 6334
+    volumes:
+      - ./data/qdrant_storage:/qdrant/storage
+      - /etc/timezone:/etc/timezone:ro
+      - /etc/localtime:/etc/localtime:ro
+    networks:
+      - backend
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '4'
+          memory: 4G
+        reservations:
+          memory: 2G
+
+  # ==================================================================================
+  # QDRANT WORKER (Go) - Vectorización y búsqueda semántica
+  # ==================================================================================
+  qdrant-worker:
+    build:
+      context: .
+      dockerfile: Dockerfile.qdrant
+    container_name: rss2_qdrant_worker
+    environment:
+      DB_HOST: db
+      DB_PORT: 5432
+      DB_NAME: ${DB_NAME:-rss}
+      DB_USER: ${DB_USER:-rss}
+      DB_PASS: ${DB_PASS}
+      QDRANT_HOST: qdrant
+      QDRANT_PORT: 6333
+      QDRANT_COLLECTION: news_vectors
+      OLLAMA_URL: http://ollama:11434
+      QDRANT_SLEEP: 30
+      QDRANT_BATCH: 100
+      TZ: Europe/Madrid
+    networks:
+      - backend
+    depends_on:
+      db:
+        condition: service_healthy
+      qdrant:
+        condition: service_started
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 1G
+
+  # ==================================================================================
+  # NER WORKER (Python) - Extracción de entidades
+  # ==================================================================================
+  ner:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: rss2_ner
+    command: bash -lc "python -m workers.ner_worker"
+    environment:
+      DB_HOST: db
+      DB_PORT: 5432
+      DB_NAME: ${DB_NAME:-rss}
+      DB_USER: ${DB_USER:-rss}
+      DB_PASS: ${DB_PASS}
+      NER_LANG: es
+      NER_BATCH: 64
+      HF_HOME: /app/hf_cache
+      TZ: Europe/Madrid
+    volumes:
+      - ./workers:/app/workers
+      - ./hf_cache:/app/hf_cache
+    networks:
+      - backend
+    depends_on:
+      db:
+        condition: service_healthy
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '2'
+          memory: 2G
+
+  # ==================================================================================
+  # CLUSTER WORKER (Python) - Agrupación de noticias
+  # ==================================================================================
  cluster:
    build:
      context: .
@ -528,34 +597,8 @@ services:
      EVENT_DIST_THRESHOLD: 0.35
      EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
      TZ: Europe/Madrid
-    networks:
-      - backend
-    depends_on:
-      db:
-        condition: service_healthy
-    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '2'
-          memory: 2G
-
-  ner:
-    build: .
-    container_name: rss2_ner
-    command: bash -lc "python -m workers.ner_worker"
-    environment:
-      DB_HOST: db
-      DB_PORT: 5432
-      DB_NAME: ${DB_NAME:-rss}
-      DB_USER: ${DB_USER:-rss}
-      DB_PASS: ${DB_PASS}
-      NER_LANG: es
-      NER_BATCH: 64
-      HF_HOME: /app/hf_cache
-      TZ: Europe/Madrid
    volumes:
-      - ./hf_cache:/app/hf_cache
+      - ./workers:/app/workers
    networks:
      - backend
    depends_on:
@ -568,33 +611,13 @@ services:
          cpus: '2'
          memory: 2G

-  topics:
+  # ==================================================================================
+  # LLM CATEGORIZER (Python) - Categorización con Ollama
+  # ==================================================================================
+  llm-categorizer:
    build:
      context: .
      dockerfile: Dockerfile
-    container_name: rss2_topics_worker
-    command: bash -lc "python -m workers.topics_worker"
-    environment:
-      DB_HOST: db
-      DB_PORT: 5432
-      DB_NAME: ${DB_NAME:-rss}
-      DB_USER: ${DB_USER:-rss}
-      DB_PASS: ${DB_PASS}
-      TZ: Europe/Madrid
-    networks:
-      - backend
-    depends_on:
-      db:
-        condition: service_healthy
-    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '1'
-          memory: 1G
-
-  llm-categorizer:
-    build: .
    container_name: rss2_llm_categorizer
    command: bash -lc "python -m workers.simple_categorizer_worker"
    environment:
@ -606,6 +629,8 @@ services:
      CATEGORIZER_BATCH_SIZE: 10
      CATEGORIZER_SLEEP_IDLE: 5
      TZ: Europe/Madrid
+    volumes:
+      - ./workers:/app/workers
    networks:
      - backend
    depends_on:
@ -618,72 +643,6 @@ services:
          cpus: '2'
          memory: 1G

-  qdrant:
-    image: qdrant/qdrant:latest
-    container_name: rss2_qdrant
-    environment:
-      TZ: Europe/Madrid
-      QDRANT__SERVICE__GRPC_PORT: 6334
-    # SEGURIDAD: Puertos NO expuestos - solo acceso interno
-    # ports:
-    #   - "6333:6333"
-    #   - "6334:6334"
-    volumes:
-      - ./qdrant_storage:/qdrant/storage
-      - /etc/timezone:/etc/timezone:ro
-      - /etc/localtime:/etc/localtime:ro
-    networks:
-      - backend
-    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '4'
-          memory: 4G
-        reservations:
-          memory: 2G
-
-  qdrant-worker:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: rss2_qdrant_worker
-    command: bash -lc "python -m workers.qdrant_worker"
-    environment:
-      DB_HOST: db
-      DB_PORT: 5432
-      DB_NAME: ${DB_NAME:-rss}
-      DB_USER: ${DB_USER:-rss}
-      DB_PASS: ${DB_PASS}
-      DB_READ_HOST: db
-      DB_WRITE_HOST: db
-      QDRANT_HOST: qdrant
-      QDRANT_PORT: 6333
-      QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
-      EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
-      EMB_DEVICE: cpu
-      QDRANT_BATCH_SIZE: ${QDRANT_BATCH_SIZE:-100}
-      QDRANT_SLEEP_IDLE: ${QDRANT_SLEEP_IDLE:-30}
-      HF_HOME: /app/hf_cache
-      TZ: Europe/Madrid
-    volumes:
-      - ./hf_cache:/app/hf_cache
-    networks:
-      - backend
-    depends_on:
-      db:
-        condition: service_healthy
-      # db-replica:
-      #   condition: service_healthy
-      qdrant:
-        condition: service_started
-    restart: unless-stopped
-    deploy:
-      resources:
-        limits:
-          cpus: '2'
-          memory: 4G
-
  # ==================================================================================
  # MONITORING STACK - SECURED
  # ==================================================================================