From 10d51c3c521d0224c78ee3601528c384b1c4d9a9 Mon Sep 17 00:00:00 2001 From: SITO Date: Mon, 30 Mar 2026 20:49:30 +0200 Subject: [PATCH] feat(deploy): despliegue nativo Debian sin Docker - Elimina todos los Dockerfiles y docker-compose.yml - Elimina scripts Docker (start_docker, reset_and_deploy, deploy-clean) - Agrega deploy/debian/ con despliegue nativo via systemd: - install.sh: instalacion completa en Debian (PostgreSQL, Redis, Qdrant binario, Go, Python venv, nginx, frontend compilado) - build.sh: recompila binarios Go y frontend sin reinstalar - env.example: variables de entorno sin referencias Docker - nginx.conf: sirve React estatico + proxy al API Go en localhost - systemd/*.service: 16 servicios (8 Go + 7 Python + Qdrant) Todos los hostnames Docker (db, redis, qdrant) reemplazados por 127.0.0.1 Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile | 50 -- Dockerfile.discovery | 31 - Dockerfile.qdrant | 34 - Dockerfile.related | 32 - Dockerfile.scheduler | 23 - Dockerfile.scraper | 32 - Dockerfile.topics | 30 - Dockerfile.translator | 43 - Dockerfile.translator-gpu | 48 -- Dockerfile.wiki | 31 - backend/Dockerfile | 24 - deploy-clean.sh | 47 -- deploy/debian/build.sh | 69 ++ deploy/debian/env.example | 104 +++ deploy/debian/install.sh | 294 +++++++ deploy/debian/nginx.conf | 91 +++ deploy/debian/systemd/rss2-backend.service | 24 + .../debian/systemd/rss2-categorizer.service | 25 + deploy/debian/systemd/rss2-cluster.service | 25 + deploy/debian/systemd/rss2-discovery.service | 26 + deploy/debian/systemd/rss2-embeddings.service | 30 + deploy/debian/systemd/rss2-ingestor.service | 26 + deploy/debian/systemd/rss2-langdetect.service | 25 + deploy/debian/systemd/rss2-ner.service | 26 + .../debian/systemd/rss2-qdrant-worker.service | 28 + deploy/debian/systemd/rss2-qdrant.service | 25 + deploy/debian/systemd/rss2-related.service | 26 + deploy/debian/systemd/rss2-scraper.service | 25 + deploy/debian/systemd/rss2-topics.service | 25 + .../rss2-translation-scheduler.service | 26 + deploy/debian/systemd/rss2-translator.service | 31 + deploy/debian/systemd/rss2-wiki.service | 24 + docker-compose.yml | 748 ------------------ docker-entrypoint-db.sh | 42 - frontend/Dockerfile | 19 - monitoring/prometheus.yml | 21 - reset_and_deploy.sh | 14 - rss-ingestor-go/Dockerfile | 27 - start_docker.sh | 23 - 39 files changed, 975 insertions(+), 1319 deletions(-) delete mode 100644 Dockerfile delete mode 100644 Dockerfile.discovery delete mode 100644 Dockerfile.qdrant delete mode 100644 Dockerfile.related delete mode 100644 Dockerfile.scheduler delete mode 100644 Dockerfile.scraper delete mode 100644 Dockerfile.topics delete mode 100644 Dockerfile.translator delete mode 100644 Dockerfile.translator-gpu delete mode 100644 Dockerfile.wiki delete mode 100644 backend/Dockerfile delete mode 100755 deploy-clean.sh create mode 100755 deploy/debian/build.sh create mode 100644 deploy/debian/env.example create mode 100755 deploy/debian/install.sh create mode 100644 deploy/debian/nginx.conf create mode 100644 deploy/debian/systemd/rss2-backend.service create mode 100644 deploy/debian/systemd/rss2-categorizer.service create mode 100644 deploy/debian/systemd/rss2-cluster.service create mode 100644 deploy/debian/systemd/rss2-discovery.service create mode 100644 deploy/debian/systemd/rss2-embeddings.service create mode 100644 deploy/debian/systemd/rss2-ingestor.service create mode 100644 deploy/debian/systemd/rss2-langdetect.service create mode 100644 deploy/debian/systemd/rss2-ner.service create mode 100644 deploy/debian/systemd/rss2-qdrant-worker.service create mode 100644 deploy/debian/systemd/rss2-qdrant.service create mode 100644 deploy/debian/systemd/rss2-related.service create mode 100644 deploy/debian/systemd/rss2-scraper.service create mode 100644 deploy/debian/systemd/rss2-topics.service create mode 100644 deploy/debian/systemd/rss2-translation-scheduler.service create mode 100644 deploy/debian/systemd/rss2-translator.service create mode 100644 deploy/debian/systemd/rss2-wiki.service delete mode 100644 docker-compose.yml delete mode 100755 docker-entrypoint-db.sh delete mode 100644 frontend/Dockerfile delete mode 100644 monitoring/prometheus.yml delete mode 100755 reset_and_deploy.sh delete mode 100644 rss-ingestor-go/Dockerfile delete mode 100755 start_docker.sh diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index abd9f82..0000000 --- a/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libpq-dev gcc git curl \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu121 - -RUN pip install --no-cache-dir \ - ctranslate2 \ - sentencepiece \ - transformers==4.44.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - redis \ - requests \ - beautifulsoup4 \ - lxml \ - langdetect \ - nltk \ - scikit-learn \ - pandas \ - sentence-transformers \ - spacy - -RUN python -m spacy download es_core_news_lg - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ -COPY entity_config.json . - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "-m", "workers.embeddings_worker"] diff --git a/Dockerfile.discovery b/Dockerfile.discovery deleted file mode 100644 index 90e405d..0000000 --- a/Dockerfile.discovery +++ /dev/null @@ -1,31 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/discovery ./cmd/discovery - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/discovery /bin/discovery - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - DISCOVERY_INTERVAL=900 \ - DISCOVERY_BATCH=10 \ - MAX_FEEDS_PER_URL=5 - -ENTRYPOINT ["/bin/discovery"] diff --git a/Dockerfile.qdrant b/Dockerfile.qdrant deleted file mode 100644 index e80bfae..0000000 --- a/Dockerfile.qdrant +++ /dev/null @@ -1,34 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/qdrant-worker ./cmd/qdrant - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/qdrant-worker /bin/qdrant-worker - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - QDRANT_HOST=qdrant \ - QDRANT_PORT=6333 \ - QDRANT_COLLECTION=news_vectors \ - OLLAMA_URL=http://ollama:11434 \ - QDRANT_SLEEP=30 \ - QDRANT_BATCH=100 - -ENTRYPOINT ["/bin/qdrant-worker"] diff --git a/Dockerfile.related b/Dockerfile.related deleted file mode 100644 index 12e011d..0000000 --- a/Dockerfile.related +++ /dev/null @@ -1,32 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/related ./cmd/related - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/related /bin/related - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - RELATED_SLEEP=10 \ - RELATED_BATCH=200 \ - RELATED_TOPK=10 \ - EMB_MODEL=mxbai-embed-large - -ENTRYPOINT ["/bin/related"] diff --git a/Dockerfile.scheduler b/Dockerfile.scheduler deleted file mode 100644 index 4a81d3e..0000000 --- a/Dockerfile.scheduler +++ /dev/null @@ -1,23 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libpq-dev \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip -RUN pip install --no-cache-dir psycopg2-binary langdetect - -COPY workers/translation_scheduler.py ./workers/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "workers/translation_scheduler.py"] diff --git a/Dockerfile.scraper b/Dockerfile.scraper deleted file mode 100644 index 9a32bff..0000000 --- a/Dockerfile.scraper +++ /dev/null @@ -1,32 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN go mod tidy - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/scraper ./cmd/scraper - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/scraper /bin/scraper - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - SCRAPER_SLEEP=60 \ - SCRAPER_BATCH=10 - -ENTRYPOINT ["/bin/scraper"] diff --git a/Dockerfile.topics b/Dockerfile.topics deleted file mode 100644 index fc82ea7..0000000 --- a/Dockerfile.topics +++ /dev/null @@ -1,30 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/topics ./cmd/topics - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/topics /bin/topics - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - TOPICS_SLEEP=10 \ - TOPICS_BATCH=500 - -ENTRYPOINT ["/bin/topics"] diff --git a/Dockerfile.translator b/Dockerfile.translator deleted file mode 100644 index e6a96be..0000000 --- a/Dockerfile.translator +++ /dev/null @@ -1,43 +0,0 @@ -FROM python:3.11-slim-bookworm - -RUN apt-get update && apt-get install -y --no-install-recommends \ - patchelf libpq-dev gcc git curl wget \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu - -RUN pip install --no-cache-dir \ - ctranslate2==3.24.0 \ - sentencepiece \ - transformers==4.36.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - langdetect - -# === ARREGLAR EL EXECUTABLE STACK === -RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \ - -name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \ - xargs -I {} patchelf --clear-execstack {} || true - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "-m", "workers.ctranslator_worker"] diff --git a/Dockerfile.translator-gpu b/Dockerfile.translator-gpu deleted file mode 100644 index c3a990b..0000000 --- a/Dockerfile.translator-gpu +++ /dev/null @@ -1,48 +0,0 @@ -FROM python:3.11-slim-bookworm - -RUN apt-get update && apt-get install -y --no-install-recommends \ - patchelf libpq-dev gcc git curl wget \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -# Install PyTorch with CUDA support (cu118 for broader compatibility) -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118 - -RUN pip install --no-cache-dir \ - ctranslate2==3.24.0 \ - sentencepiece \ - transformers==4.36.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - langdetect - -# Fix executable stack -RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \ - -name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \ - xargs -I {} patchelf --clear-execstack {} || true - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -# GPU Configuration - Override with: docker run --gpus all -ENV CT2_DEVICE=cuda -ENV CT2_COMPUTE_TYPE=float16 - -CMD ["python", "-m", "workers.ctranslator_worker"] diff --git a/Dockerfile.wiki b/Dockerfile.wiki deleted file mode 100644 index fbd84e0..0000000 --- a/Dockerfile.wiki +++ /dev/null @@ -1,31 +0,0 @@ -FROM golang:alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN go mod tidy - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/wiki_worker ./cmd/wiki_worker - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/wiki_worker /bin/wiki_worker - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - WIKI_SLEEP=10 - -ENTRYPOINT ["/bin/wiki_worker"] diff --git a/backend/Dockerfile b/backend/Dockerfile deleted file mode 100644 index 6d232b9..0000000 --- a/backend/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM golang:1.23 AS builder - -WORKDIR /app - -RUN apt-get update && apt-get install -y gcc musl-dev git - -COPY go.mod go.sum ./ -RUN go mod download - -COPY . . - -RUN CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o /server ./cmd/server - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata postgresql-client - -WORKDIR /app - -COPY --from=builder /server . - -EXPOSE 8080 - -CMD ["./server"] diff --git a/deploy-clean.sh b/deploy-clean.sh deleted file mode 100755 index 6f2ccb6..0000000 --- a/deploy-clean.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# Script para despliegue limpio de RSS2 - -echo "=== RSS2 Clean Deployment Script ===" -echo "" - -# Detener contenedores -echo "1. Deteniendo contenedores..." -docker compose down -v 2>/dev/null - -# Eliminar volúmenes de datos (si hay permisos) -echo "2. Eliminando volúmenes de datos..." -docker volume rm rss2_db 2>/dev/null || true -docker volume rm rss2_redis 2>/dev/null || true - -# Si los volúmenes Docker tienen problemas, intentar con rm -echo " Intentando limpiar /data/..." -sudo rm -rf /datos/rss2/data/pgdata 2>/dev/null || true -sudo rm -rf /datos/rss2/data/redis-data 2>/dev/null || true - -# Iniciar base de datos -echo "3. Iniciando base de datos..." -docker compose up -d db - -# Esperar a que esté lista -echo "4. Esperando a que la base de datos esté lista..." -sleep 10 - -# Verificar estado -if docker compose ps db | grep -q "healthy"; then - echo " ✓ Base de datos iniciada correctamente" - - # Ejecutar script de schema - echo "5. Ejecutando script de inicialización..." - docker compose exec -T db psql -U rss -d rss -f /docker-entrypoint-initdb.d/00-complete-schema.sql 2>&1 | tail -5 - - # Iniciar demás servicios - echo "6. Iniciando servicios..." - docker compose up -d redis backend-go rss2_frontend nginx rss-ingestor-go - - echo "" - echo "=== Despliegue completado ===" - echo "Accede a: http://localhost:8001" -else - echo " ✗ Error: La base de datos no está healthy" - docker compose logs db -fi diff --git a/deploy/debian/build.sh b/deploy/debian/build.sh new file mode 100755 index 0000000..2699652 --- /dev/null +++ b/deploy/debian/build.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# ============================================================================= +# RSS2 - Recompila binarios y frontend (sin reinstalar el sistema) +# Usar despues de actualizar el codigo: bash build.sh +# ============================================================================= +set -euo pipefail + +RSS2_HOME="/opt/rss2" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +export PATH=$PATH:/usr/local/go/bin + +GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +info() { echo -e "${GREEN}[BUILD]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } + +# --- Go Backend + Workers --- +if [[ -d "$REPO_ROOT/backend" ]]; then + info "Compilando backend Go..." + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/server" ./cmd/server) + info " [OK] server" + + for cmd in scraper discovery wiki_worker topics related qdrant; do + [[ -d "$REPO_ROOT/backend/cmd/$cmd" ]] || continue + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/$cmd" "./cmd/$cmd") + info " [OK] $cmd" + done +fi + +# --- Ingestor Go --- +if [[ -d "$REPO_ROOT/rss-ingestor-go" ]]; then + info "Compilando ingestor Go..." + (cd "$REPO_ROOT/rss-ingestor-go" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/ingestor" .) + info " [OK] ingestor" +fi + +# --- Frontend React --- +if [[ -d "$REPO_ROOT/frontend" ]]; then + info "Compilando frontend React..." + (cd "$REPO_ROOT/frontend" && \ + npm install --silent && \ + VITE_API_URL=/api npm run build -- --outDir "$RSS2_HOME/frontend/dist") + info " [OK] frontend" +fi + +# --- Workers Python --- +info "Sincronizando workers Python..." +rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" +cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true +info " [OK] workers Python" + +chown -R rss2:rss2 "$RSS2_HOME/bin" "$RSS2_HOME/frontend/dist" "$RSS2_HOME/src" + +# --- Restart servicios --- +info "Reiniciando servicios..." +GO_SERVICES=(rss2-backend rss2-ingestor rss2-scraper rss2-discovery rss2-wiki rss2-topics rss2-related rss2-qdrant-worker) +PY_SERVICES=(rss2-langdetect rss2-translation-scheduler rss2-translator rss2-embeddings rss2-ner rss2-cluster rss2-categorizer) + +for svc in "${GO_SERVICES[@]}" "${PY_SERVICES[@]}"; do + systemctl is-active --quiet "$svc" && systemctl restart "$svc" && info " restarted $svc" || true +done + +systemctl reload nginx 2>/dev/null || true + +info "Build completado." diff --git a/deploy/debian/env.example b/deploy/debian/env.example new file mode 100644 index 0000000..52b91e7 --- /dev/null +++ b/deploy/debian/env.example @@ -0,0 +1,104 @@ +# ============================================================================= +# RSS2 - Variables de entorno para despliegue Debian nativo +# Copiar a /opt/rss2/.env y editar valores antes de instalar +# ============================================================================= + +# --- PostgreSQL --- +POSTGRES_DB=rss +POSTGRES_USER=rss +POSTGRES_PASSWORD=CAMBIA_ESTO_postgres_password + +# Usadas por workers Go (equivalente a DATABASE_URL) +DB_HOST=127.0.0.1 +DB_PORT=5432 +DB_NAME=rss +DB_USER=rss +DB_PASS=CAMBIA_ESTO_postgres_password + +# URL completa para backend API Go +DATABASE_URL=postgres://rss:CAMBIA_ESTO_postgres_password@127.0.0.1:5432/rss?sslmode=disable + +# --- Redis --- +REDIS_PASSWORD=CAMBIA_ESTO_redis_password +REDIS_URL=redis://:CAMBIA_ESTO_redis_password@127.0.0.1:6379 + +# --- JWT Secret (minimo 32 caracteres, aleatorio) --- +SECRET_KEY=CAMBIA_ESTO_jwt_secret_muy_largo_y_aleatorio + +# --- Backend API --- +SERVER_PORT=8080 + +# --- Zona horaria --- +TZ=Europe/Madrid + +# --- HuggingFace cache (modelos ML) --- +HF_HOME=/opt/rss2/hf_cache + +# --- Qdrant (local, sin Docker) --- +QDRANT_HOST=127.0.0.1 +QDRANT_PORT=6333 +QDRANT_COLLECTION=news_vectors + +# --- Translator (NLLB-200 via CTranslate2) --- +TARGET_LANGS=es +TRANSLATOR_BATCH=32 +CT2_MODEL_PATH=/opt/rss2/models/nllb-ct2 +CT2_DEVICE=cpu +CT2_COMPUTE_TYPE=int8 +UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M + +# --- Embeddings --- +EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +EMB_BATCH=64 +EMB_SLEEP_IDLE=5 +EMB_LANGS=es +EMB_LIMIT=1000 +DEVICE=cpu + +# --- NER --- +NER_LANG=es +NER_BATCH=64 + +# --- Ingestor RSS --- +RSS_MAX_WORKERS=100 +RSS_POKE_INTERVAL_MIN=15 + +# --- Scraper --- +SCRAPER_SLEEP=60 +SCRAPER_BATCH=10 + +# --- Discovery --- +DISCOVERY_INTERVAL=900 +DISCOVERY_BATCH=10 +MAX_FEEDS_PER_URL=5 + +# --- Wiki Worker --- +WIKI_SLEEP=10 + +# --- Topics --- +TOPICS_SLEEP=10 +TOPICS_BATCH=500 + +# --- Related --- +RELATED_SLEEP=10 +RELATED_BATCH=200 +RELATED_TOPK=10 + +# --- Cluster --- +EVENT_DIST_THRESHOLD=0.35 + +# --- Categorizer --- +CATEGORIZER_BATCH_SIZE=10 +CATEGORIZER_SLEEP_IDLE=5 + +# --- Scheduler traduccion --- +SCHEDULER_BATCH=1000 +SCHEDULER_SLEEP=30 + +# --- Lang Detect --- +LANG_DETECT_SLEEP=60 +LANG_DETECT_BATCH=1000 + +# --- Qdrant Worker --- +QDRANT_SLEEP=30 +QDRANT_BATCH=100 diff --git a/deploy/debian/install.sh b/deploy/debian/install.sh new file mode 100755 index 0000000..00da025 --- /dev/null +++ b/deploy/debian/install.sh @@ -0,0 +1,294 @@ +#!/usr/bin/env bash +# ============================================================================= +# RSS2 - Instalacion en Debian (sin Docker) +# Ejecutar como root: bash install.sh +# ============================================================================= +set -euo pipefail + +RSS2_USER="rss2" +RSS2_HOME="/opt/rss2" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +info() { echo -e "${GREEN}[INFO]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } + +[[ "$EUID" -ne 0 ]] && error "Ejecutar como root: sudo bash install.sh" + +# ============================================================================= +# 1. DEPENDENCIAS DEL SISTEMA +# ============================================================================= +info "Instalando dependencias del sistema..." +apt-get update -qq +apt-get install -y --no-install-recommends \ + curl wget git build-essential \ + postgresql postgresql-client \ + redis-server \ + nginx \ + python3 python3-pip python3-venv python3-dev \ + nodejs npm \ + ca-certificates tzdata \ + libpq-dev + +# Go (si no esta instalado o version < 1.22) +if ! command -v go &>/dev/null || [[ "$(go version | awk '{print $3}' | tr -d 'go')" < "1.22" ]]; then + info "Instalando Go 1.23..." + GO_VERSION="1.23.4" + ARCH=$(dpkg --print-architecture) + case "$ARCH" in + amd64) GO_ARCH="amd64" ;; + arm64) GO_ARCH="arm64" ;; + *) error "Arquitectura no soportada: $ARCH" ;; + esac + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz" -o /tmp/go.tar.gz + rm -rf /usr/local/go + tar -C /usr/local -xzf /tmp/go.tar.gz + echo 'export PATH=$PATH:/usr/local/go/bin' > /etc/profile.d/go.sh + export PATH=$PATH:/usr/local/go/bin + rm /tmp/go.tar.gz +fi +info "Go: $(go version)" + +# Qdrant (binario oficial) +if [[ ! -f "$RSS2_HOME/qdrant/qdrant" ]]; then + info "Descargando Qdrant..." + QDRANT_VERSION="v1.12.1" + ARCH=$(dpkg --print-architecture) + case "$ARCH" in + amd64) QDRANT_ARCH="x86_64-unknown-linux-musl" ;; + arm64) QDRANT_ARCH="aarch64-unknown-linux-musl" ;; + *) error "Arquitectura no soportada para Qdrant: $ARCH" ;; + esac + mkdir -p "$RSS2_HOME/qdrant" + curl -fsSL "https://github.com/qdrant/qdrant/releases/download/${QDRANT_VERSION}/qdrant-${QDRANT_ARCH}.tar.gz" \ + -o /tmp/qdrant.tar.gz + tar -C "$RSS2_HOME/qdrant" -xzf /tmp/qdrant.tar.gz + chmod +x "$RSS2_HOME/qdrant/qdrant" + rm /tmp/qdrant.tar.gz +fi + +# ============================================================================= +# 2. USUARIO Y DIRECTORIOS +# ============================================================================= +info "Creando usuario $RSS2_USER y directorios..." +id "$RSS2_USER" &>/dev/null || useradd -r -m -d "$RSS2_HOME" -s /bin/bash "$RSS2_USER" + +mkdir -p \ + "$RSS2_HOME/bin" \ + "$RSS2_HOME/src" \ + "$RSS2_HOME/data/wiki_images" \ + "$RSS2_HOME/data/qdrant_storage" \ + "$RSS2_HOME/hf_cache" \ + "$RSS2_HOME/models" \ + "$RSS2_HOME/frontend/dist" \ + "$RSS2_HOME/logs" + +# ============================================================================= +# 3. CONFIGURACION ENTORNO +# ============================================================================= +if [[ ! -f "$RSS2_HOME/.env" ]]; then + if [[ -f "$SCRIPT_DIR/env.example" ]]; then + cp "$SCRIPT_DIR/env.example" "$RSS2_HOME/.env" + warn "Copia env.example en $RSS2_HOME/.env - EDITA LAS CONTRASENAS antes de continuar" + warn "Presiona Enter cuando hayas editado el .env, o Ctrl+C para salir" + read -r + else + error "No se encontro env.example en $SCRIPT_DIR" + fi +fi + +# ============================================================================= +# 4. POSTGRESQL +# ============================================================================= +info "Configurando PostgreSQL..." +source "$RSS2_HOME/.env" 2>/dev/null || true + +DB_NAME="${POSTGRES_DB:-rss}" +DB_USER="${POSTGRES_USER:-rss}" +DB_PASS="${POSTGRES_PASSWORD:-changeme}" + +systemctl enable --now postgresql + +# Crear usuario y base de datos si no existen +sudo -u postgres psql -tc "SELECT 1 FROM pg_roles WHERE rolname='$DB_USER'" | grep -q 1 || \ + sudo -u postgres psql -c "CREATE USER $DB_USER WITH PASSWORD '$DB_PASS';" + +sudo -u postgres psql -tc "SELECT 1 FROM pg_database WHERE datname='$DB_NAME'" | grep -q 1 || \ + sudo -u postgres createdb -O "$DB_USER" "$DB_NAME" + +# Ejecutar migraciones SQL +if [[ -d "$REPO_ROOT/migrations" ]]; then + info "Ejecutando migraciones..." + for sql_file in "$REPO_ROOT/migrations"/*.sql; do + [[ -f "$sql_file" ]] || continue + info " Aplicando $(basename "$sql_file")..." + sudo -u postgres psql -d "$DB_NAME" -f "$sql_file" 2>/dev/null || warn " (ya aplicada o error ignorado)" + done +fi + +# Ejecutar init-db scripts (schema inicial) +if [[ -d "$REPO_ROOT/init-db" ]]; then + info "Ejecutando scripts de init-db..." + for sql_file in "$REPO_ROOT/init-db"/*.sql; do + [[ -f "$sql_file" ]] || continue + info " $(basename "$sql_file")..." + sudo -u postgres psql -d "$DB_NAME" -f "$sql_file" 2>/dev/null || warn " (ya aplicada o error ignorado)" + done +fi + +# ============================================================================= +# 5. REDIS +# ============================================================================= +info "Configurando Redis..." +REDIS_PASS="${REDIS_PASSWORD:-changeme_redis}" + +# Agregar autenticacion y limites de memoria a redis.conf +REDIS_CONF="/etc/redis/redis.conf" +grep -q "requirepass $REDIS_PASS" "$REDIS_CONF" 2>/dev/null || { + echo "requirepass $REDIS_PASS" >> "$REDIS_CONF" + echo "maxmemory 512mb" >> "$REDIS_CONF" + echo "maxmemory-policy allkeys-lru" >> "$REDIS_CONF" + echo "appendonly yes" >> "$REDIS_CONF" +} +systemctl enable --now redis-server + +# ============================================================================= +# 6. PYTHON VIRTUALENV + DEPENDENCIAS ML +# ============================================================================= +info "Creando virtualenv Python y instalando dependencias..." +python3 -m venv "$RSS2_HOME/venv" +"$RSS2_HOME/venv/bin/pip" install --upgrade pip -q + +if [[ -f "$REPO_ROOT/requirements.txt" ]]; then + "$RSS2_HOME/venv/bin/pip" install -r "$REPO_ROOT/requirements.txt" -q +fi + +# spaCy modelo en español +"$RSS2_HOME/venv/bin/python" -m spacy download es_core_news_lg 2>/dev/null || \ + warn "spaCy model es_core_news_lg no se pudo descargar, hazlo manualmente" + +# Copiar workers Python al directorio de trabajo +info "Copiando workers Python..." +rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" +cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true + +# ============================================================================= +# 7. COMPILAR GO (backend + workers) +# ============================================================================= +info "Compilando binarios Go..." +export PATH=$PATH:/usr/local/go/bin +export GOPATH=/tmp/go-build-rss2 + +# Backend API +if [[ -d "$REPO_ROOT/backend" ]]; then + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/server" ./cmd/server && \ + info " [OK] server") || warn " [FAIL] server" + for cmd in scraper discovery wiki_worker topics related qdrant; do + [[ -d "$REPO_ROOT/backend/cmd/$cmd" ]] || continue + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/$cmd" "./cmd/$cmd" && \ + info " [OK] $cmd") || warn " [FAIL] $cmd" + done +fi + +# RSS Ingestor Go (repo separado) +if [[ -d "$REPO_ROOT/rss-ingestor-go" ]]; then + (cd "$REPO_ROOT/rss-ingestor-go" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/ingestor" . && \ + info " [OK] ingestor") || warn " [FAIL] ingestor" +fi + +# ============================================================================= +# 8. FRONTEND REACT +# ============================================================================= +info "Compilando frontend React..." +if [[ -d "$REPO_ROOT/frontend" ]]; then + (cd "$REPO_ROOT/frontend" && \ + npm install --silent && \ + VITE_API_URL=/api npm run build -- --outDir "$RSS2_HOME/frontend/dist" && \ + info " [OK] frontend compilado") || warn " [FAIL] frontend" +fi + +# ============================================================================= +# 9. NGINX +# ============================================================================= +info "Configurando Nginx..." +cp "$SCRIPT_DIR/nginx.conf" /etc/nginx/nginx.conf +nginx -t && systemctl enable --now nginx && systemctl reload nginx + +# ============================================================================= +# 10. SYSTEMD SERVICES +# ============================================================================= +info "Instalando servicios systemd..." +SERVICES=( + rss2-qdrant + rss2-backend + rss2-ingestor + rss2-scraper + rss2-discovery + rss2-wiki + rss2-topics + rss2-related + rss2-qdrant-worker + rss2-langdetect + rss2-translation-scheduler + rss2-translator + rss2-embeddings + rss2-ner + rss2-cluster + rss2-categorizer +) + +for svc in "${SERVICES[@]}"; do + svc_file="$SCRIPT_DIR/systemd/${svc}.service" + if [[ -f "$svc_file" ]]; then + cp "$svc_file" "/etc/systemd/system/${svc}.service" + else + warn "No se encontro $svc_file" + fi +done + +systemctl daemon-reload + +for svc in "${SERVICES[@]}"; do + systemctl enable "$svc" 2>/dev/null || true +done + +# ============================================================================= +# 11. PERMISOS FINALES +# ============================================================================= +info "Ajustando permisos..." +chown -R "$RSS2_USER:$RSS2_USER" "$RSS2_HOME" +chmod 600 "$RSS2_HOME/.env" + +# ============================================================================= +# 12. ARRANCAR SERVICIOS +# ============================================================================= +info "Arrancando servicios..." +# Infraestructura primero +systemctl start rss2-qdrant +sleep 3 + +# API y workers Go +for svc in rss2-backend rss2-ingestor rss2-scraper rss2-discovery rss2-wiki rss2-topics rss2-related rss2-qdrant-worker; do + systemctl start "$svc" || warn "No se pudo arrancar $svc" +done + +# Workers Python (modelos pesados, arrancan despues) +for svc in rss2-langdetect rss2-translation-scheduler rss2-translator rss2-embeddings rss2-ner rss2-cluster rss2-categorizer; do + systemctl start "$svc" || warn "No se pudo arrancar $svc" +done + +# ============================================================================= +echo "" +info "=============================================" +info " RSS2 instalado en $RSS2_HOME" +info " Acceder en: http://$(hostname -I | awk '{print $1}'):8001" +info "" +info " Ver logs: journalctl -u rss2-backend -f" +info " Ver estado: systemctl status rss2-backend" +info " Editar env: nano $RSS2_HOME/.env" +info "=============================================" diff --git a/deploy/debian/nginx.conf b/deploy/debian/nginx.conf new file mode 100644 index 0000000..5407ce8 --- /dev/null +++ b/deploy/debian/nginx.conf @@ -0,0 +1,91 @@ +user www-data; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /run/nginx.pid; + +events { + worker_connections 2048; + use epoll; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + client_max_body_size 100M; + + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/javascript + application/json application/javascript + application/xml text/xml; + + # Go API backend (proceso nativo en localhost) + upstream api_backend { + server 127.0.0.1:8080; + keepalive 32; + } + + server { + listen 8001; + server_name _; + + client_body_timeout 60s; + client_header_timeout 60s; + send_timeout 300s; + + # Frontend React (archivos estaticos compilados) + root /opt/rss2/frontend/dist; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + # Imagenes Wikipedia servidas directamente + location /wiki-images/ { + alias /opt/rss2/data/wiki_images/; + expires 7d; + add_header Cache-Control "public, immutable"; + } + + # Proxy al API Go + location /api/ { + proxy_pass http://api_backend/api/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Connection ""; + + proxy_connect_timeout 60s; + proxy_send_timeout 300s; + proxy_read_timeout 300s; + } + + location /health { + access_log off; + return 200 "ok"; + } + + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + } +} diff --git a/deploy/debian/systemd/rss2-backend.service b/deploy/debian/systemd/rss2-backend.service new file mode 100644 index 0000000..f50239f --- /dev/null +++ b/deploy/debian/systemd/rss2-backend.service @@ -0,0 +1,24 @@ +[Unit] +Description=RSS2 Backend API (Go) +After=network.target postgresql.service redis.service +Requires=postgresql.service redis.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +ExecStart=/opt/rss2/bin/server +Restart=always +RestartSec=5 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-backend + +# Limites de recursos +LimitNOFILE=65536 +MemoryMax=1G + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-categorizer.service b/deploy/debian/systemd/rss2-categorizer.service new file mode 100644 index 0000000..61ecf9a --- /dev/null +++ b/deploy/debian/systemd/rss2-categorizer.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Categorizer Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=CATEGORIZER_BATCH_SIZE=10 +Environment=CATEGORIZER_SLEEP_IDLE=5 +ExecStart=/opt/rss2/venv/bin/python -m workers.simple_categorizer_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-categorizer + +MemoryMax=1G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-cluster.service b/deploy/debian/systemd/rss2-cluster.service new file mode 100644 index 0000000..dd990fb --- /dev/null +++ b/deploy/debian/systemd/rss2-cluster.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Cluster Worker - Agrupacion de noticias (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=EVENT_DIST_THRESHOLD=0.35 +Environment=EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +ExecStart=/opt/rss2/venv/bin/python -m workers.cluster_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-cluster + +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-discovery.service b/deploy/debian/systemd/rss2-discovery.service new file mode 100644 index 0000000..c9a435b --- /dev/null +++ b/deploy/debian/systemd/rss2-discovery.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Discovery de Feeds (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=DISCOVERY_INTERVAL=900 +Environment=DISCOVERY_BATCH=10 +Environment=MAX_FEEDS_PER_URL=5 +ExecStart=/opt/rss2/bin/discovery +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-discovery + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-embeddings.service b/deploy/debian/systemd/rss2-embeddings.service new file mode 100644 index 0000000..efb0dbd --- /dev/null +++ b/deploy/debian/systemd/rss2-embeddings.service @@ -0,0 +1,30 @@ +[Unit] +Description=RSS2 Embeddings Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +Environment=EMB_BATCH=64 +Environment=EMB_SLEEP_IDLE=5 +Environment=EMB_LANGS=es +Environment=EMB_LIMIT=1000 +Environment=DEVICE=cpu +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.embeddings_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-embeddings + +MemoryMax=3G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-ingestor.service b/deploy/debian/systemd/rss2-ingestor.service new file mode 100644 index 0000000..8a29a74 --- /dev/null +++ b/deploy/debian/systemd/rss2-ingestor.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Ingestor RSS (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=RSS_MAX_WORKERS=100 +Environment=RSS_POKE_INTERVAL_MIN=15 +ExecStart=/opt/rss2/bin/ingestor +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-ingestor + +LimitNOFILE=65536 +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-langdetect.service b/deploy/debian/systemd/rss2-langdetect.service new file mode 100644 index 0000000..18c2732 --- /dev/null +++ b/deploy/debian/systemd/rss2-langdetect.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Language Detection Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=LANG_DETECT_SLEEP=60 +Environment=LANG_DETECT_BATCH=1000 +ExecStart=/opt/rss2/venv/bin/python -m workers.langdetect_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-langdetect + +MemoryMax=512M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-ner.service b/deploy/debian/systemd/rss2-ner.service new file mode 100644 index 0000000..6f43c78 --- /dev/null +++ b/deploy/debian/systemd/rss2-ner.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 NER Worker - Extraccion de Entidades (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=NER_LANG=es +Environment=NER_BATCH=64 +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.ner_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-ner + +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-qdrant-worker.service b/deploy/debian/systemd/rss2-qdrant-worker.service new file mode 100644 index 0000000..6334fd1 --- /dev/null +++ b/deploy/debian/systemd/rss2-qdrant-worker.service @@ -0,0 +1,28 @@ +[Unit] +Description=RSS2 Qdrant Sync Worker (Go) +After=network.target postgresql.service rss2-qdrant.service +Requires=postgresql.service rss2-qdrant.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=QDRANT_HOST=127.0.0.1 +Environment=QDRANT_PORT=6333 +Environment=QDRANT_COLLECTION=news_vectors +Environment=QDRANT_SLEEP=30 +Environment=QDRANT_BATCH=100 +ExecStart=/opt/rss2/bin/qdrant_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-qdrant-worker + +MemoryMax=1G +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-qdrant.service b/deploy/debian/systemd/rss2-qdrant.service new file mode 100644 index 0000000..59b8651 --- /dev/null +++ b/deploy/debian/systemd/rss2-qdrant.service @@ -0,0 +1,25 @@ +[Unit] +Description=Qdrant Vector Database +After=network.target + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/qdrant +ExecStart=/opt/rss2/qdrant/qdrant +Restart=always +RestartSec=5 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-qdrant + +Environment=QDRANT__SERVICE__HTTP_PORT=6333 +Environment=QDRANT__SERVICE__GRPC_PORT=6334 +Environment=QDRANT__STORAGE__STORAGE_PATH=/opt/rss2/data/qdrant_storage + +MemoryMax=4G +CPUQuota=400% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-related.service b/deploy/debian/systemd/rss2-related.service new file mode 100644 index 0000000..cc671f9 --- /dev/null +++ b/deploy/debian/systemd/rss2-related.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Related News Worker (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=RELATED_SLEEP=10 +Environment=RELATED_BATCH=200 +Environment=RELATED_TOPK=10 +ExecStart=/opt/rss2/bin/related +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-related + +MemoryMax=1G +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-scraper.service b/deploy/debian/systemd/rss2-scraper.service new file mode 100644 index 0000000..83929c4 --- /dev/null +++ b/deploy/debian/systemd/rss2-scraper.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Scraper HTML (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=SCRAPER_SLEEP=60 +Environment=SCRAPER_BATCH=10 +ExecStart=/opt/rss2/bin/scraper +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-scraper + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-topics.service b/deploy/debian/systemd/rss2-topics.service new file mode 100644 index 0000000..f9ab9b5 --- /dev/null +++ b/deploy/debian/systemd/rss2-topics.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Topics Worker (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=TOPICS_SLEEP=10 +Environment=TOPICS_BATCH=500 +ExecStart=/opt/rss2/bin/topics +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-topics + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-translation-scheduler.service b/deploy/debian/systemd/rss2-translation-scheduler.service new file mode 100644 index 0000000..a46f3ad --- /dev/null +++ b/deploy/debian/systemd/rss2-translation-scheduler.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Translation Scheduler (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=TARGET_LANGS=es +Environment=SCHEDULER_BATCH=1000 +Environment=SCHEDULER_SLEEP=30 +ExecStart=/opt/rss2/venv/bin/python -m workers.translation_scheduler +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-translation-scheduler + +MemoryMax=256M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-translator.service b/deploy/debian/systemd/rss2-translator.service new file mode 100644 index 0000000..90528e8 --- /dev/null +++ b/deploy/debian/systemd/rss2-translator.service @@ -0,0 +1,31 @@ +[Unit] +Description=RSS2 Translator Worker NLLB-200 (Python) +After=network.target postgresql.service rss2-translation-scheduler.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=TARGET_LANGS=es +Environment=TRANSLATOR_BATCH=32 +Environment=CT2_MODEL_PATH=/opt/rss2/models/nllb-ct2 +Environment=CT2_DEVICE=cpu +Environment=CT2_COMPUTE_TYPE=int8 +Environment=UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.ctranslator_worker +Restart=always +RestartSec=15 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-translator + +# El modelo NLLB-200 consume bastante RAM en CPU +MemoryMax=4G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-wiki.service b/deploy/debian/systemd/rss2-wiki.service new file mode 100644 index 0000000..01891dd --- /dev/null +++ b/deploy/debian/systemd/rss2-wiki.service @@ -0,0 +1,24 @@ +[Unit] +Description=RSS2 Wiki Worker - imagenes Wikipedia (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=WIKI_SLEEP=10 +ExecStart=/opt/rss2/bin/wiki_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-wiki + +MemoryMax=256M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index b126c81..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,748 +0,0 @@ -services: - db: - image: postgres:18-alpine - container_name: rss2_db - shm_size: 4gb - environment: - POSTGRES_DB: ${POSTGRES_DB:-rss} - POSTGRES_USER: ${POSTGRES_USER:-rss} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=C.UTF-8" - LANG: C.UTF-8 - LC_ALL: C.UTF-8 - TZ: Europe/Madrid - PGDATA: /var/lib/postgresql/data/18/main - volumes: - - ./data/pgdata:/var/lib/postgresql/data - - ./init-db:/docker-entrypoint-initdb.d:rw - - ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro - entrypoint: ["bash", "/docker-entrypoint-db.sh"] - networks: - backend: - aliases: - - db - - rss2_db - restart: unless-stopped - healthcheck: - test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ] - interval: 5s - timeout: 5s - retries: 30 - start_period: 20s - deploy: - resources: - limits: - memory: 8G - reservations: - memory: 4G - - redis: - image: redis:7-alpine - container_name: rss2_redis - environment: - TZ: Europe/Madrid - # SEGURIDAD: Redis con autenticación - command: > - redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD} - volumes: - - ./data/redis-data:/data - - /etc/timezone:/etc/timezone:ro - - /etc/localtime:/etc/localtime:ro - networks: - backend: - aliases: - - redis - - rss2_redis - restart: unless-stopped - healthcheck: - test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ] - interval: 5s - timeout: 3s - retries: 5 - deploy: - resources: - limits: - memory: 768M - reservations: - memory: 512M - - rss-ingestor-go: - build: - context: ./rss-ingestor-go - dockerfile: Dockerfile - container_name: rss2_ingestor_go - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - RSS_MAX_WORKERS: 100 - RSS_POKE_INTERVAL_MIN: 15 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - reservations: - memory: 512M - - langdetect: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_langdetect_py - command: bash -lc "python -m workers.langdetect_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - LANG_DETECT_SLEEP: 60 - LANG_DETECT_BATCH: 1000 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 512M - - # ================================================================================== - # SCRAPER WORKER (Go) - Extrae artículos de URLs - # ================================================================================== - scraper: - build: - context: . - dockerfile: Dockerfile.scraper - container_name: rss2_scraper - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - SCRAPER_SLEEP: 60 - SCRAPER_BATCH: 10 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # DISCOVERY WORKER (Go) - Descubre RSS feeds - # ================================================================================== - discovery: - build: - context: . - dockerfile: Dockerfile.discovery - container_name: rss2_discovery - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - DISCOVERY_INTERVAL: 900 - DISCOVERY_BATCH: 10 - MAX_FEEDS_PER_URL: 5 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # WIKI WORKER (Go) - Wikipedia info and thumbnails - # ================================================================================== - wiki-worker: - build: - context: . - dockerfile: Dockerfile.wiki - container_name: rss2_wiki_worker - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - WIKI_SLEEP: 10 - TZ: Europe/Madrid - volumes: - - ./data/wiki_images:/app/data/wiki_images - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 256M - - # ================================================================================== - # BACKEND GO (API REST) - # ================================================================================== - backend-go: - build: - context: ./backend - dockerfile: Dockerfile - container_name: rss2_backend_go - environment: - TZ: Europe/Madrid - DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable - REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379 - SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string} - SERVER_PORT: "8080" - volumes: - - ./data/wiki_images:/app/data/wiki_images - networks: - - backend - - frontend - depends_on: - db: - condition: service_healthy - redis: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # FRONTEND REACT - # ================================================================================== - rss2_frontend: - build: - context: ./frontend - dockerfile: Dockerfile - container_name: rss2_frontend - environment: - TZ: Europe/Madrid - VITE_API_URL: /api - networks: - - frontend - depends_on: - - backend-go - restart: unless-stopped - - # ================================================================================== - # NGINX (Puerto 8001 - sirve React + proxy API) - # ================================================================================== - nginx: - image: nginx:alpine - container_name: rss2_nginx - ports: - - "8001:80" - volumes: - - ./nginx.conf:/etc/nginx/nginx.conf:ro - networks: - - frontend - depends_on: - - rss2_frontend - - backend-go - restart: unless-stopped - - # ================================================================================== - # TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3 - # ================================================================================== - translator: - build: - context: . - dockerfile: Dockerfile.translator - image: rss2-translator:latest - command: bash -lc "python -m workers.ctranslator_worker" - security_opt: - - seccomp=unconfined - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - TRANSLATOR_BATCH: 32 - CT2_MODEL_PATH: /app/models/nllb-ct2 - CT2_DEVICE: cpu - CT2_COMPUTE_TYPE: int8 - UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - TRANSLATOR_ID: ${TRANSLATOR_ID:-} - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - - ./models:/app/models - networks: - - backend - profiles: - - cpu-only - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TRANSLATION SCHEDULER - Creates translation jobs - # ================================================================================== - translation-scheduler: - build: - context: . - dockerfile: Dockerfile.scheduler - image: rss2-scheduler:latest - container_name: rss2_translation_scheduler - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - SCHEDULER_BATCH: 1000 - SCHEDULER_SLEEP: 30 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - deploy: - resources: - limits: - cpus: '0.5' - memory: 256M - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TRANSLATOR GPU (CTranslate2 with CUDA) - # ================================================================================== - translator-gpu: - build: - context: . - dockerfile: Dockerfile.translator-gpu - image: rss2-translator-gpu:latest - container_name: rss2_translator_gpu - command: bash -lc "python -m workers.ctranslator_worker" - security_opt: - - seccomp=unconfined - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - TRANSLATOR_BATCH: 64 - CT2_MODEL_PATH: /app/models/nllb-ct2 - CT2_DEVICE: cuda - CT2_COMPUTE_TYPE: float16 - UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - - ./models:/app/models - networks: - - backend - deploy: - resources: - limits: - memory: 4G - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [ gpu ] - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - embeddings: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_embeddings_py - command: bash -lc "python -m workers.embeddings_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 - EMB_BATCH: 64 - EMB_SLEEP_IDLE: 5 - EMB_LANGS: es - EMB_LIMIT: 1000 - DEVICE: cuda - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - networks: - - backend - deploy: - resources: - limits: - memory: 6G - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [ gpu ] - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TOPICS WORKER (Go) - Matching temas y países - # ================================================================================== - topics: - build: - context: . - dockerfile: Dockerfile.topics - container_name: rss2_topics - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TOPICS_SLEEP: 10 - TOPICS_BATCH: 500 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # RELATED WORKER (Go) - Noticias relacionadas - # ================================================================================== - related: - build: - context: . - dockerfile: Dockerfile.related - container_name: rss2_related - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - RELATED_SLEEP: 10 - RELATED_BATCH: 200 - RELATED_TOPK: 10 - EMB_MODEL: mxbai-embed-large - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - qdrant: - image: qdrant/qdrant:latest - container_name: rss2_qdrant - environment: - TZ: Europe/Madrid - QDRANT__SERVICE__GRPC_PORT: 6334 - volumes: - - ./data/qdrant_storage:/qdrant/storage - - /etc/timezone:/etc/timezone:ro - - /etc/localtime:/etc/localtime:ro - networks: - - backend - restart: unless-stopped - deploy: - resources: - limits: - cpus: '4' - memory: 4G - reservations: - memory: 2G - - # ================================================================================== - # QDRANT WORKER (Go) - Vectorización y búsqueda semántica - # ================================================================================== - qdrant-worker: - build: - context: . - dockerfile: Dockerfile.qdrant - container_name: rss2_qdrant_worker - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - QDRANT_HOST: qdrant - QDRANT_PORT: 6333 - QDRANT_COLLECTION: news_vectors - OLLAMA_URL: http://ollama:11434 - QDRANT_SLEEP: 30 - QDRANT_BATCH: 100 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - qdrant: - condition: service_started - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - # ================================================================================== - # NER WORKER (Python) - Extracción de entidades - # ================================================================================== - ner: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_ner - command: bash -lc "python -m workers.ner_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - NER_LANG: es - NER_BATCH: 64 - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - - # ================================================================================== - # CLUSTER WORKER (Python) - Agrupación de noticias - # ================================================================================== - cluster: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_cluster_py - command: bash -lc "python -m workers.cluster_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - EVENT_DIST_THRESHOLD: 0.35 - EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - - # ================================================================================== - # LLM CATEGORIZER (Python) - Categorización con Ollama - # ================================================================================== - llm-categorizer: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_llm_categorizer - command: bash -lc "python -m workers.simple_categorizer_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - CATEGORIZER_BATCH_SIZE: 10 - CATEGORIZER_SLEEP_IDLE: 5 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 1G - - # ================================================================================== - # MONITORING STACK - SECURED - # ================================================================================== - - prometheus: - image: prom/prometheus:latest - container_name: rss2_prometheus - volumes: - - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - - prometheus_data:/prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/usr/share/prometheus/console_libraries' - - '--web.console.templates=/usr/share/prometheus/consoles' - # SEGURIDAD: Sin exposición de puertos - acceso solo vía Grafana o túnel SSH - # ports: - # - "9090:9090" - networks: - - monitoring - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 2G - - grafana: - image: grafana/grafana:latest - container_name: rss2_grafana - # SEGURIDAD: Acceso solo en localhost o vía túnel SSH - # Para acceso remoto, usar túnel SSH: ssh -L 3001:localhost:3001 user@server - ports: - - "127.0.0.1:3001:3000" - environment: - # SEGURIDAD: Cambiar este password en producción - - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-change_this_password} - - GF_USERS_ALLOW_SIGN_UP=false - - GF_SERVER_ROOT_URL=http://localhost:3001 - - GF_SECURITY_COOKIE_SECURE=false - - GF_SECURITY_COOKIE_SAMESITE=lax - volumes: - - grafana_data:/var/lib/grafana - networks: - - monitoring - depends_on: - - prometheus - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - cadvisor: - image: gcr.io/cadvisor/cadvisor:latest - container_name: rss2_cadvisor - # SEGURIDAD: Sin exposición de puertos - solo acceso interno - # ports: - # - "8081:8080" - volumes: - - /:/rootfs:ro - - /var/run:/var/run:ro - - /sys:/sys:ro - - /var/lib/docker/:/var/lib/docker:ro - - /dev/disk/:/dev/disk:ro - devices: - - /dev/kmsg - networks: - - monitoring - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 512M - -# ================================================================================== -# REDES SEGMENTADAS -# ================================================================================== -networks: - # Red frontal - Solo nginx y web app - frontend: - name: rss2_frontend - driver: bridge - internal: false - - # Red backend - Base de datos, workers, redis, qdrant - backend: - name: rss2_backend - driver: bridge - internal: false # Acceso externo permitido (necesario para ingestor) - - # Red de monitoreo - Prometheus, Grafana, cAdvisor - monitoring: - name: rss2_monitoring - driver: bridge - internal: true - -volumes: - prometheus_data: - grafana_data: - torch_extensions: diff --git a/docker-entrypoint-db.sh b/docker-entrypoint-db.sh deleted file mode 100755 index 1eb2722..0000000 --- a/docker-entrypoint-db.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -e - -# Detectar si la base de datos necesita reinicialización -PGDATA_DIR="/var/lib/postgresql/data/18/main" - -echo "RSS2: Checking database integrity..." - -# Si no existe el archivo de versión, es una base de datos nueva -if [ ! -f "$PGDATA_DIR/PG_VERSION" ]; then - echo "RSS2: New database - will be initialized by docker-entrypoint" -else - # Verificar si la base de datos es funcional - if ! pg_isready -h localhost -p 5432 -U "${POSTGRES_USER:-rss}" 2>/dev/null; then - echo "RSS2: Database appears corrupted - removing old data files for fresh initialization..." - # Eliminar solo los archivos de datos, no todo el directorio - rm -rf "$PGDATA_DIR"/* - echo "RSS2: Data files removed - docker-entrypoint will initialize fresh database" - else - echo "RSS2: Database is healthy" - fi -fi - -# Ejecutar el entrypoint original con los parámetros de PostgreSQL -exec docker-entrypoint.sh \ - postgres \ - -c max_connections=200 \ - -c shared_buffers=4GB \ - -c effective_cache_size=12GB \ - -c work_mem=16MB \ - -c maintenance_work_mem=512MB \ - -c autovacuum_max_workers=3 \ - -c autovacuum_vacuum_scale_factor=0.02 \ - -c autovacuum_vacuum_cost_limit=1000 \ - -c max_worker_processes=8 \ - -c max_parallel_workers=6 \ - -c max_parallel_workers_per_gather=2 \ - -c wal_level=replica \ - -c max_wal_senders=5 \ - -c wal_keep_size=1GB \ - -c hot_standby=on \ - "$@" diff --git a/frontend/Dockerfile b/frontend/Dockerfile deleted file mode 100644 index 5f06218..0000000 --- a/frontend/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM node:20-alpine AS builder - -WORKDIR /app - -COPY package*.json ./ -RUN npm install - -COPY . . -RUN npm run build - -FROM nginx:alpine - -COPY --from=builder /app/dist /usr/share/nginx/html - -COPY nginx.conf /etc/nginx/nginx.conf - -EXPOSE 80 - -CMD ["nginx", "-g", "daemon off;"] diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml deleted file mode 100644 index 6cc80ce..0000000 --- a/monitoring/prometheus.yml +++ /dev/null @@ -1,21 +0,0 @@ -global: - scrape_interval: 15s - -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - - job_name: 'cadvisor' - static_configs: - - targets: ['cadvisor:8080'] - - # If we had Node Exporter (for host metrics): - # - job_name: 'node_exporter' - # static_configs: - # - targets: ['node-exporter:9100'] - - # If the app exposes metrics (e.g. Flask/Gunicorn with prometheus_client) - # - job_name: 'rss2_web' - # static_configs: - # - targets: ['rss2_web:8000'] diff --git a/reset_and_deploy.sh b/reset_and_deploy.sh deleted file mode 100755 index c103aef..0000000 --- a/reset_and_deploy.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -echo "Stopping all containers..." -docker-compose down - -echo "Removing data volumes..." -# Use sudo if necessary, or ensure current user has permissions -rm -rf data/pgdata data/pgdata-replica data/redis-data data/qdrant_storage - -echo "Starting deployment from scratch..." -docker-compose up -d --build - -echo "Deployment complete. Checking status..." -docker-compose ps diff --git a/rss-ingestor-go/Dockerfile b/rss-ingestor-go/Dockerfile deleted file mode 100644 index b75cbaa..0000000 --- a/rss-ingestor-go/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM golang:alpine AS builder - -WORKDIR /app - -# Install git and SSL certs -RUN apk add --no-cache git ca-certificates - -# Copy source code immediately -COPY . . - -# Download dependencies -RUN go mod tidy && go mod download - -# Build the Go app -RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o rss-ingestor . - -# Final stage -FROM alpine:latest - -WORKDIR /root/ - -# Copy the Pre-built binary file from the previous stage -COPY --from=builder /app/rss-ingestor . -COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ - -# Command to run the executable -CMD ["./rss-ingestor"] diff --git a/start_docker.sh b/start_docker.sh deleted file mode 100755 index 1aef36d..0000000 --- a/start_docker.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Script para iniciar los servicios de Docker -# Ejecutar con: sudo ./start_docker.sh - -set -e -cd "$(dirname "$0")" - -echo "=== RSS2 Docker Services ===" - -# Verificación de modelo eliminada (script de conversión no disponible) - -echo "" -echo "Iniciando servicios Docker..." -docker compose up -d --build - -echo "" -echo "✓ Servicios iniciados" -echo "" -echo "Para ver los logs:" -echo " docker compose logs -f translator" -echo "" -echo "Para verificar el estado:" -echo " docker compose ps"