diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index abd9f82..0000000 --- a/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libpq-dev gcc git curl \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu121 - -RUN pip install --no-cache-dir \ - ctranslate2 \ - sentencepiece \ - transformers==4.44.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - redis \ - requests \ - beautifulsoup4 \ - lxml \ - langdetect \ - nltk \ - scikit-learn \ - pandas \ - sentence-transformers \ - spacy - -RUN python -m spacy download es_core_news_lg - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ -COPY entity_config.json . - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "-m", "workers.embeddings_worker"] diff --git a/Dockerfile.discovery b/Dockerfile.discovery deleted file mode 100644 index 90e405d..0000000 --- a/Dockerfile.discovery +++ /dev/null @@ -1,31 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/discovery ./cmd/discovery - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/discovery /bin/discovery - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - DISCOVERY_INTERVAL=900 \ - DISCOVERY_BATCH=10 \ - MAX_FEEDS_PER_URL=5 - -ENTRYPOINT ["/bin/discovery"] diff --git a/Dockerfile.qdrant b/Dockerfile.qdrant deleted file mode 100644 index e80bfae..0000000 --- a/Dockerfile.qdrant +++ /dev/null @@ -1,34 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/qdrant-worker ./cmd/qdrant - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/qdrant-worker /bin/qdrant-worker - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - QDRANT_HOST=qdrant \ - QDRANT_PORT=6333 \ - QDRANT_COLLECTION=news_vectors \ - OLLAMA_URL=http://ollama:11434 \ - QDRANT_SLEEP=30 \ - QDRANT_BATCH=100 - -ENTRYPOINT ["/bin/qdrant-worker"] diff --git a/Dockerfile.related b/Dockerfile.related deleted file mode 100644 index 12e011d..0000000 --- a/Dockerfile.related +++ /dev/null @@ -1,32 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/related ./cmd/related - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/related /bin/related - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - RELATED_SLEEP=10 \ - RELATED_BATCH=200 \ - RELATED_TOPK=10 \ - EMB_MODEL=mxbai-embed-large - -ENTRYPOINT ["/bin/related"] diff --git a/Dockerfile.scheduler b/Dockerfile.scheduler deleted file mode 100644 index 4a81d3e..0000000 --- a/Dockerfile.scheduler +++ /dev/null @@ -1,23 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libpq-dev \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip -RUN pip install --no-cache-dir psycopg2-binary langdetect - -COPY workers/translation_scheduler.py ./workers/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "workers/translation_scheduler.py"] diff --git a/Dockerfile.scraper b/Dockerfile.scraper deleted file mode 100644 index 9a32bff..0000000 --- a/Dockerfile.scraper +++ /dev/null @@ -1,32 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN go mod tidy - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/scraper ./cmd/scraper - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/scraper /bin/scraper - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - SCRAPER_SLEEP=60 \ - SCRAPER_BATCH=10 - -ENTRYPOINT ["/bin/scraper"] diff --git a/Dockerfile.topics b/Dockerfile.topics deleted file mode 100644 index fc82ea7..0000000 --- a/Dockerfile.topics +++ /dev/null @@ -1,30 +0,0 @@ -FROM golang:1.22-alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/topics ./cmd/topics - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/topics /bin/topics - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - TOPICS_SLEEP=10 \ - TOPICS_BATCH=500 - -ENTRYPOINT ["/bin/topics"] diff --git a/Dockerfile.translator b/Dockerfile.translator deleted file mode 100644 index e6a96be..0000000 --- a/Dockerfile.translator +++ /dev/null @@ -1,43 +0,0 @@ -FROM python:3.11-slim-bookworm - -RUN apt-get update && apt-get install -y --no-install-recommends \ - patchelf libpq-dev gcc git curl wget \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu - -RUN pip install --no-cache-dir \ - ctranslate2==3.24.0 \ - sentencepiece \ - transformers==4.36.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - langdetect - -# === ARREGLAR EL EXECUTABLE STACK === -RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \ - -name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \ - xargs -I {} patchelf --clear-execstack {} || true - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -CMD ["python", "-m", "workers.ctranslator_worker"] diff --git a/Dockerfile.translator-gpu b/Dockerfile.translator-gpu deleted file mode 100644 index c3a990b..0000000 --- a/Dockerfile.translator-gpu +++ /dev/null @@ -1,48 +0,0 @@ -FROM python:3.11-slim-bookworm - -RUN apt-get update && apt-get install -y --no-install-recommends \ - patchelf libpq-dev gcc git curl wget \ - && rm -rf /var/lib/apt/lists/* - -ENV PYTHONUNBUFFERED=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - TOKENIZERS_PARALLELISM=false \ - HF_HOME=/root/.cache/huggingface - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip - -# Install PyTorch with CUDA support (cu118 for broader compatibility) -RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118 - -RUN pip install --no-cache-dir \ - ctranslate2==3.24.0 \ - sentencepiece \ - transformers==4.36.0 \ - protobuf==3.20.3 \ - "numpy<2" \ - psycopg2-binary \ - langdetect - -# Fix executable stack -RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \ - -name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \ - xargs -I {} patchelf --clear-execstack {} || true - -COPY workers/ ./workers/ -COPY init-db/ ./init-db/ -COPY migrations/ ./migrations/ - -ENV DB_HOST=db -ENV DB_PORT=5432 -ENV DB_NAME=rss -ENV DB_USER=rss -ENV DB_PASS=x - -# GPU Configuration - Override with: docker run --gpus all -ENV CT2_DEVICE=cuda -ENV CT2_COMPUTE_TYPE=float16 - -CMD ["python", "-m", "workers.ctranslator_worker"] diff --git a/Dockerfile.wiki b/Dockerfile.wiki deleted file mode 100644 index fbd84e0..0000000 --- a/Dockerfile.wiki +++ /dev/null @@ -1,31 +0,0 @@ -FROM golang:alpine AS builder - -ENV GOTOOLCHAIN=auto - -RUN apk add --no-cache git - -WORKDIR /app - -COPY backend/go.mod backend/go.sum ./ -RUN go mod download - -COPY backend/ ./ - -RUN go mod tidy - -RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/wiki_worker ./cmd/wiki_worker - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=builder /bin/wiki_worker /bin/wiki_worker - -ENV DB_HOST=db \ - DB_PORT=5432 \ - DB_NAME=rss \ - DB_USER=rss \ - DB_PASS=rss \ - WIKI_SLEEP=10 - -ENTRYPOINT ["/bin/wiki_worker"] diff --git a/backend/Dockerfile b/backend/Dockerfile deleted file mode 100644 index 6d232b9..0000000 --- a/backend/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM golang:1.23 AS builder - -WORKDIR /app - -RUN apt-get update && apt-get install -y gcc musl-dev git - -COPY go.mod go.sum ./ -RUN go mod download - -COPY . . - -RUN CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o /server ./cmd/server - -FROM alpine:3.19 - -RUN apk add --no-cache ca-certificates tzdata postgresql-client - -WORKDIR /app - -COPY --from=builder /server . - -EXPOSE 8080 - -CMD ["./server"] diff --git a/deploy-clean.sh b/deploy-clean.sh deleted file mode 100755 index 6f2ccb6..0000000 --- a/deploy-clean.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# Script para despliegue limpio de RSS2 - -echo "=== RSS2 Clean Deployment Script ===" -echo "" - -# Detener contenedores -echo "1. Deteniendo contenedores..." -docker compose down -v 2>/dev/null - -# Eliminar volúmenes de datos (si hay permisos) -echo "2. Eliminando volúmenes de datos..." -docker volume rm rss2_db 2>/dev/null || true -docker volume rm rss2_redis 2>/dev/null || true - -# Si los volúmenes Docker tienen problemas, intentar con rm -echo " Intentando limpiar /data/..." -sudo rm -rf /datos/rss2/data/pgdata 2>/dev/null || true -sudo rm -rf /datos/rss2/data/redis-data 2>/dev/null || true - -# Iniciar base de datos -echo "3. Iniciando base de datos..." -docker compose up -d db - -# Esperar a que esté lista -echo "4. Esperando a que la base de datos esté lista..." -sleep 10 - -# Verificar estado -if docker compose ps db | grep -q "healthy"; then - echo " ✓ Base de datos iniciada correctamente" - - # Ejecutar script de schema - echo "5. Ejecutando script de inicialización..." - docker compose exec -T db psql -U rss -d rss -f /docker-entrypoint-initdb.d/00-complete-schema.sql 2>&1 | tail -5 - - # Iniciar demás servicios - echo "6. Iniciando servicios..." - docker compose up -d redis backend-go rss2_frontend nginx rss-ingestor-go - - echo "" - echo "=== Despliegue completado ===" - echo "Accede a: http://localhost:8001" -else - echo " ✗ Error: La base de datos no está healthy" - docker compose logs db -fi diff --git a/deploy/debian/build.sh b/deploy/debian/build.sh new file mode 100755 index 0000000..2699652 --- /dev/null +++ b/deploy/debian/build.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# ============================================================================= +# RSS2 - Recompila binarios y frontend (sin reinstalar el sistema) +# Usar despues de actualizar el codigo: bash build.sh +# ============================================================================= +set -euo pipefail + +RSS2_HOME="/opt/rss2" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +export PATH=$PATH:/usr/local/go/bin + +GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +info() { echo -e "${GREEN}[BUILD]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } + +# --- Go Backend + Workers --- +if [[ -d "$REPO_ROOT/backend" ]]; then + info "Compilando backend Go..." + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/server" ./cmd/server) + info " [OK] server" + + for cmd in scraper discovery wiki_worker topics related qdrant; do + [[ -d "$REPO_ROOT/backend/cmd/$cmd" ]] || continue + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/$cmd" "./cmd/$cmd") + info " [OK] $cmd" + done +fi + +# --- Ingestor Go --- +if [[ -d "$REPO_ROOT/rss-ingestor-go" ]]; then + info "Compilando ingestor Go..." + (cd "$REPO_ROOT/rss-ingestor-go" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/ingestor" .) + info " [OK] ingestor" +fi + +# --- Frontend React --- +if [[ -d "$REPO_ROOT/frontend" ]]; then + info "Compilando frontend React..." + (cd "$REPO_ROOT/frontend" && \ + npm install --silent && \ + VITE_API_URL=/api npm run build -- --outDir "$RSS2_HOME/frontend/dist") + info " [OK] frontend" +fi + +# --- Workers Python --- +info "Sincronizando workers Python..." +rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" +cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true +info " [OK] workers Python" + +chown -R rss2:rss2 "$RSS2_HOME/bin" "$RSS2_HOME/frontend/dist" "$RSS2_HOME/src" + +# --- Restart servicios --- +info "Reiniciando servicios..." +GO_SERVICES=(rss2-backend rss2-ingestor rss2-scraper rss2-discovery rss2-wiki rss2-topics rss2-related rss2-qdrant-worker) +PY_SERVICES=(rss2-langdetect rss2-translation-scheduler rss2-translator rss2-embeddings rss2-ner rss2-cluster rss2-categorizer) + +for svc in "${GO_SERVICES[@]}" "${PY_SERVICES[@]}"; do + systemctl is-active --quiet "$svc" && systemctl restart "$svc" && info " restarted $svc" || true +done + +systemctl reload nginx 2>/dev/null || true + +info "Build completado." diff --git a/deploy/debian/env.example b/deploy/debian/env.example new file mode 100644 index 0000000..52b91e7 --- /dev/null +++ b/deploy/debian/env.example @@ -0,0 +1,104 @@ +# ============================================================================= +# RSS2 - Variables de entorno para despliegue Debian nativo +# Copiar a /opt/rss2/.env y editar valores antes de instalar +# ============================================================================= + +# --- PostgreSQL --- +POSTGRES_DB=rss +POSTGRES_USER=rss +POSTGRES_PASSWORD=CAMBIA_ESTO_postgres_password + +# Usadas por workers Go (equivalente a DATABASE_URL) +DB_HOST=127.0.0.1 +DB_PORT=5432 +DB_NAME=rss +DB_USER=rss +DB_PASS=CAMBIA_ESTO_postgres_password + +# URL completa para backend API Go +DATABASE_URL=postgres://rss:CAMBIA_ESTO_postgres_password@127.0.0.1:5432/rss?sslmode=disable + +# --- Redis --- +REDIS_PASSWORD=CAMBIA_ESTO_redis_password +REDIS_URL=redis://:CAMBIA_ESTO_redis_password@127.0.0.1:6379 + +# --- JWT Secret (minimo 32 caracteres, aleatorio) --- +SECRET_KEY=CAMBIA_ESTO_jwt_secret_muy_largo_y_aleatorio + +# --- Backend API --- +SERVER_PORT=8080 + +# --- Zona horaria --- +TZ=Europe/Madrid + +# --- HuggingFace cache (modelos ML) --- +HF_HOME=/opt/rss2/hf_cache + +# --- Qdrant (local, sin Docker) --- +QDRANT_HOST=127.0.0.1 +QDRANT_PORT=6333 +QDRANT_COLLECTION=news_vectors + +# --- Translator (NLLB-200 via CTranslate2) --- +TARGET_LANGS=es +TRANSLATOR_BATCH=32 +CT2_MODEL_PATH=/opt/rss2/models/nllb-ct2 +CT2_DEVICE=cpu +CT2_COMPUTE_TYPE=int8 +UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M + +# --- Embeddings --- +EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +EMB_BATCH=64 +EMB_SLEEP_IDLE=5 +EMB_LANGS=es +EMB_LIMIT=1000 +DEVICE=cpu + +# --- NER --- +NER_LANG=es +NER_BATCH=64 + +# --- Ingestor RSS --- +RSS_MAX_WORKERS=100 +RSS_POKE_INTERVAL_MIN=15 + +# --- Scraper --- +SCRAPER_SLEEP=60 +SCRAPER_BATCH=10 + +# --- Discovery --- +DISCOVERY_INTERVAL=900 +DISCOVERY_BATCH=10 +MAX_FEEDS_PER_URL=5 + +# --- Wiki Worker --- +WIKI_SLEEP=10 + +# --- Topics --- +TOPICS_SLEEP=10 +TOPICS_BATCH=500 + +# --- Related --- +RELATED_SLEEP=10 +RELATED_BATCH=200 +RELATED_TOPK=10 + +# --- Cluster --- +EVENT_DIST_THRESHOLD=0.35 + +# --- Categorizer --- +CATEGORIZER_BATCH_SIZE=10 +CATEGORIZER_SLEEP_IDLE=5 + +# --- Scheduler traduccion --- +SCHEDULER_BATCH=1000 +SCHEDULER_SLEEP=30 + +# --- Lang Detect --- +LANG_DETECT_SLEEP=60 +LANG_DETECT_BATCH=1000 + +# --- Qdrant Worker --- +QDRANT_SLEEP=30 +QDRANT_BATCH=100 diff --git a/deploy/debian/install.sh b/deploy/debian/install.sh new file mode 100755 index 0000000..00da025 --- /dev/null +++ b/deploy/debian/install.sh @@ -0,0 +1,294 @@ +#!/usr/bin/env bash +# ============================================================================= +# RSS2 - Instalacion en Debian (sin Docker) +# Ejecutar como root: bash install.sh +# ============================================================================= +set -euo pipefail + +RSS2_USER="rss2" +RSS2_HOME="/opt/rss2" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +info() { echo -e "${GREEN}[INFO]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*"; exit 1; } + +[[ "$EUID" -ne 0 ]] && error "Ejecutar como root: sudo bash install.sh" + +# ============================================================================= +# 1. DEPENDENCIAS DEL SISTEMA +# ============================================================================= +info "Instalando dependencias del sistema..." +apt-get update -qq +apt-get install -y --no-install-recommends \ + curl wget git build-essential \ + postgresql postgresql-client \ + redis-server \ + nginx \ + python3 python3-pip python3-venv python3-dev \ + nodejs npm \ + ca-certificates tzdata \ + libpq-dev + +# Go (si no esta instalado o version < 1.22) +if ! command -v go &>/dev/null || [[ "$(go version | awk '{print $3}' | tr -d 'go')" < "1.22" ]]; then + info "Instalando Go 1.23..." + GO_VERSION="1.23.4" + ARCH=$(dpkg --print-architecture) + case "$ARCH" in + amd64) GO_ARCH="amd64" ;; + arm64) GO_ARCH="arm64" ;; + *) error "Arquitectura no soportada: $ARCH" ;; + esac + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz" -o /tmp/go.tar.gz + rm -rf /usr/local/go + tar -C /usr/local -xzf /tmp/go.tar.gz + echo 'export PATH=$PATH:/usr/local/go/bin' > /etc/profile.d/go.sh + export PATH=$PATH:/usr/local/go/bin + rm /tmp/go.tar.gz +fi +info "Go: $(go version)" + +# Qdrant (binario oficial) +if [[ ! -f "$RSS2_HOME/qdrant/qdrant" ]]; then + info "Descargando Qdrant..." + QDRANT_VERSION="v1.12.1" + ARCH=$(dpkg --print-architecture) + case "$ARCH" in + amd64) QDRANT_ARCH="x86_64-unknown-linux-musl" ;; + arm64) QDRANT_ARCH="aarch64-unknown-linux-musl" ;; + *) error "Arquitectura no soportada para Qdrant: $ARCH" ;; + esac + mkdir -p "$RSS2_HOME/qdrant" + curl -fsSL "https://github.com/qdrant/qdrant/releases/download/${QDRANT_VERSION}/qdrant-${QDRANT_ARCH}.tar.gz" \ + -o /tmp/qdrant.tar.gz + tar -C "$RSS2_HOME/qdrant" -xzf /tmp/qdrant.tar.gz + chmod +x "$RSS2_HOME/qdrant/qdrant" + rm /tmp/qdrant.tar.gz +fi + +# ============================================================================= +# 2. USUARIO Y DIRECTORIOS +# ============================================================================= +info "Creando usuario $RSS2_USER y directorios..." +id "$RSS2_USER" &>/dev/null || useradd -r -m -d "$RSS2_HOME" -s /bin/bash "$RSS2_USER" + +mkdir -p \ + "$RSS2_HOME/bin" \ + "$RSS2_HOME/src" \ + "$RSS2_HOME/data/wiki_images" \ + "$RSS2_HOME/data/qdrant_storage" \ + "$RSS2_HOME/hf_cache" \ + "$RSS2_HOME/models" \ + "$RSS2_HOME/frontend/dist" \ + "$RSS2_HOME/logs" + +# ============================================================================= +# 3. CONFIGURACION ENTORNO +# ============================================================================= +if [[ ! -f "$RSS2_HOME/.env" ]]; then + if [[ -f "$SCRIPT_DIR/env.example" ]]; then + cp "$SCRIPT_DIR/env.example" "$RSS2_HOME/.env" + warn "Copia env.example en $RSS2_HOME/.env - EDITA LAS CONTRASENAS antes de continuar" + warn "Presiona Enter cuando hayas editado el .env, o Ctrl+C para salir" + read -r + else + error "No se encontro env.example en $SCRIPT_DIR" + fi +fi + +# ============================================================================= +# 4. POSTGRESQL +# ============================================================================= +info "Configurando PostgreSQL..." +source "$RSS2_HOME/.env" 2>/dev/null || true + +DB_NAME="${POSTGRES_DB:-rss}" +DB_USER="${POSTGRES_USER:-rss}" +DB_PASS="${POSTGRES_PASSWORD:-changeme}" + +systemctl enable --now postgresql + +# Crear usuario y base de datos si no existen +sudo -u postgres psql -tc "SELECT 1 FROM pg_roles WHERE rolname='$DB_USER'" | grep -q 1 || \ + sudo -u postgres psql -c "CREATE USER $DB_USER WITH PASSWORD '$DB_PASS';" + +sudo -u postgres psql -tc "SELECT 1 FROM pg_database WHERE datname='$DB_NAME'" | grep -q 1 || \ + sudo -u postgres createdb -O "$DB_USER" "$DB_NAME" + +# Ejecutar migraciones SQL +if [[ -d "$REPO_ROOT/migrations" ]]; then + info "Ejecutando migraciones..." + for sql_file in "$REPO_ROOT/migrations"/*.sql; do + [[ -f "$sql_file" ]] || continue + info " Aplicando $(basename "$sql_file")..." + sudo -u postgres psql -d "$DB_NAME" -f "$sql_file" 2>/dev/null || warn " (ya aplicada o error ignorado)" + done +fi + +# Ejecutar init-db scripts (schema inicial) +if [[ -d "$REPO_ROOT/init-db" ]]; then + info "Ejecutando scripts de init-db..." + for sql_file in "$REPO_ROOT/init-db"/*.sql; do + [[ -f "$sql_file" ]] || continue + info " $(basename "$sql_file")..." + sudo -u postgres psql -d "$DB_NAME" -f "$sql_file" 2>/dev/null || warn " (ya aplicada o error ignorado)" + done +fi + +# ============================================================================= +# 5. REDIS +# ============================================================================= +info "Configurando Redis..." +REDIS_PASS="${REDIS_PASSWORD:-changeme_redis}" + +# Agregar autenticacion y limites de memoria a redis.conf +REDIS_CONF="/etc/redis/redis.conf" +grep -q "requirepass $REDIS_PASS" "$REDIS_CONF" 2>/dev/null || { + echo "requirepass $REDIS_PASS" >> "$REDIS_CONF" + echo "maxmemory 512mb" >> "$REDIS_CONF" + echo "maxmemory-policy allkeys-lru" >> "$REDIS_CONF" + echo "appendonly yes" >> "$REDIS_CONF" +} +systemctl enable --now redis-server + +# ============================================================================= +# 6. PYTHON VIRTUALENV + DEPENDENCIAS ML +# ============================================================================= +info "Creando virtualenv Python y instalando dependencias..." +python3 -m venv "$RSS2_HOME/venv" +"$RSS2_HOME/venv/bin/pip" install --upgrade pip -q + +if [[ -f "$REPO_ROOT/requirements.txt" ]]; then + "$RSS2_HOME/venv/bin/pip" install -r "$REPO_ROOT/requirements.txt" -q +fi + +# spaCy modelo en español +"$RSS2_HOME/venv/bin/python" -m spacy download es_core_news_lg 2>/dev/null || \ + warn "spaCy model es_core_news_lg no se pudo descargar, hazlo manualmente" + +# Copiar workers Python al directorio de trabajo +info "Copiando workers Python..." +rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" +cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true + +# ============================================================================= +# 7. COMPILAR GO (backend + workers) +# ============================================================================= +info "Compilando binarios Go..." +export PATH=$PATH:/usr/local/go/bin +export GOPATH=/tmp/go-build-rss2 + +# Backend API +if [[ -d "$REPO_ROOT/backend" ]]; then + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/server" ./cmd/server && \ + info " [OK] server") || warn " [FAIL] server" + for cmd in scraper discovery wiki_worker topics related qdrant; do + [[ -d "$REPO_ROOT/backend/cmd/$cmd" ]] || continue + (cd "$REPO_ROOT/backend" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/$cmd" "./cmd/$cmd" && \ + info " [OK] $cmd") || warn " [FAIL] $cmd" + done +fi + +# RSS Ingestor Go (repo separado) +if [[ -d "$REPO_ROOT/rss-ingestor-go" ]]; then + (cd "$REPO_ROOT/rss-ingestor-go" && \ + CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o "$RSS2_HOME/bin/ingestor" . && \ + info " [OK] ingestor") || warn " [FAIL] ingestor" +fi + +# ============================================================================= +# 8. FRONTEND REACT +# ============================================================================= +info "Compilando frontend React..." +if [[ -d "$REPO_ROOT/frontend" ]]; then + (cd "$REPO_ROOT/frontend" && \ + npm install --silent && \ + VITE_API_URL=/api npm run build -- --outDir "$RSS2_HOME/frontend/dist" && \ + info " [OK] frontend compilado") || warn " [FAIL] frontend" +fi + +# ============================================================================= +# 9. NGINX +# ============================================================================= +info "Configurando Nginx..." +cp "$SCRIPT_DIR/nginx.conf" /etc/nginx/nginx.conf +nginx -t && systemctl enable --now nginx && systemctl reload nginx + +# ============================================================================= +# 10. SYSTEMD SERVICES +# ============================================================================= +info "Instalando servicios systemd..." +SERVICES=( + rss2-qdrant + rss2-backend + rss2-ingestor + rss2-scraper + rss2-discovery + rss2-wiki + rss2-topics + rss2-related + rss2-qdrant-worker + rss2-langdetect + rss2-translation-scheduler + rss2-translator + rss2-embeddings + rss2-ner + rss2-cluster + rss2-categorizer +) + +for svc in "${SERVICES[@]}"; do + svc_file="$SCRIPT_DIR/systemd/${svc}.service" + if [[ -f "$svc_file" ]]; then + cp "$svc_file" "/etc/systemd/system/${svc}.service" + else + warn "No se encontro $svc_file" + fi +done + +systemctl daemon-reload + +for svc in "${SERVICES[@]}"; do + systemctl enable "$svc" 2>/dev/null || true +done + +# ============================================================================= +# 11. PERMISOS FINALES +# ============================================================================= +info "Ajustando permisos..." +chown -R "$RSS2_USER:$RSS2_USER" "$RSS2_HOME" +chmod 600 "$RSS2_HOME/.env" + +# ============================================================================= +# 12. ARRANCAR SERVICIOS +# ============================================================================= +info "Arrancando servicios..." +# Infraestructura primero +systemctl start rss2-qdrant +sleep 3 + +# API y workers Go +for svc in rss2-backend rss2-ingestor rss2-scraper rss2-discovery rss2-wiki rss2-topics rss2-related rss2-qdrant-worker; do + systemctl start "$svc" || warn "No se pudo arrancar $svc" +done + +# Workers Python (modelos pesados, arrancan despues) +for svc in rss2-langdetect rss2-translation-scheduler rss2-translator rss2-embeddings rss2-ner rss2-cluster rss2-categorizer; do + systemctl start "$svc" || warn "No se pudo arrancar $svc" +done + +# ============================================================================= +echo "" +info "=============================================" +info " RSS2 instalado en $RSS2_HOME" +info " Acceder en: http://$(hostname -I | awk '{print $1}'):8001" +info "" +info " Ver logs: journalctl -u rss2-backend -f" +info " Ver estado: systemctl status rss2-backend" +info " Editar env: nano $RSS2_HOME/.env" +info "=============================================" diff --git a/deploy/debian/nginx.conf b/deploy/debian/nginx.conf new file mode 100644 index 0000000..5407ce8 --- /dev/null +++ b/deploy/debian/nginx.conf @@ -0,0 +1,91 @@ +user www-data; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /run/nginx.pid; + +events { + worker_connections 2048; + use epoll; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + client_max_body_size 100M; + + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_types text/plain text/css text/javascript + application/json application/javascript + application/xml text/xml; + + # Go API backend (proceso nativo en localhost) + upstream api_backend { + server 127.0.0.1:8080; + keepalive 32; + } + + server { + listen 8001; + server_name _; + + client_body_timeout 60s; + client_header_timeout 60s; + send_timeout 300s; + + # Frontend React (archivos estaticos compilados) + root /opt/rss2/frontend/dist; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + # Imagenes Wikipedia servidas directamente + location /wiki-images/ { + alias /opt/rss2/data/wiki_images/; + expires 7d; + add_header Cache-Control "public, immutable"; + } + + # Proxy al API Go + location /api/ { + proxy_pass http://api_backend/api/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Connection ""; + + proxy_connect_timeout 60s; + proxy_send_timeout 300s; + proxy_read_timeout 300s; + } + + location /health { + access_log off; + return 200 "ok"; + } + + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + } +} diff --git a/deploy/debian/systemd/rss2-backend.service b/deploy/debian/systemd/rss2-backend.service new file mode 100644 index 0000000..f50239f --- /dev/null +++ b/deploy/debian/systemd/rss2-backend.service @@ -0,0 +1,24 @@ +[Unit] +Description=RSS2 Backend API (Go) +After=network.target postgresql.service redis.service +Requires=postgresql.service redis.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +ExecStart=/opt/rss2/bin/server +Restart=always +RestartSec=5 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-backend + +# Limites de recursos +LimitNOFILE=65536 +MemoryMax=1G + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-categorizer.service b/deploy/debian/systemd/rss2-categorizer.service new file mode 100644 index 0000000..61ecf9a --- /dev/null +++ b/deploy/debian/systemd/rss2-categorizer.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Categorizer Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=CATEGORIZER_BATCH_SIZE=10 +Environment=CATEGORIZER_SLEEP_IDLE=5 +ExecStart=/opt/rss2/venv/bin/python -m workers.simple_categorizer_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-categorizer + +MemoryMax=1G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-cluster.service b/deploy/debian/systemd/rss2-cluster.service new file mode 100644 index 0000000..dd990fb --- /dev/null +++ b/deploy/debian/systemd/rss2-cluster.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Cluster Worker - Agrupacion de noticias (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=EVENT_DIST_THRESHOLD=0.35 +Environment=EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +ExecStart=/opt/rss2/venv/bin/python -m workers.cluster_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-cluster + +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-discovery.service b/deploy/debian/systemd/rss2-discovery.service new file mode 100644 index 0000000..c9a435b --- /dev/null +++ b/deploy/debian/systemd/rss2-discovery.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Discovery de Feeds (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=DISCOVERY_INTERVAL=900 +Environment=DISCOVERY_BATCH=10 +Environment=MAX_FEEDS_PER_URL=5 +ExecStart=/opt/rss2/bin/discovery +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-discovery + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-embeddings.service b/deploy/debian/systemd/rss2-embeddings.service new file mode 100644 index 0000000..efb0dbd --- /dev/null +++ b/deploy/debian/systemd/rss2-embeddings.service @@ -0,0 +1,30 @@ +[Unit] +Description=RSS2 Embeddings Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 +Environment=EMB_BATCH=64 +Environment=EMB_SLEEP_IDLE=5 +Environment=EMB_LANGS=es +Environment=EMB_LIMIT=1000 +Environment=DEVICE=cpu +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.embeddings_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-embeddings + +MemoryMax=3G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-ingestor.service b/deploy/debian/systemd/rss2-ingestor.service new file mode 100644 index 0000000..8a29a74 --- /dev/null +++ b/deploy/debian/systemd/rss2-ingestor.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Ingestor RSS (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=RSS_MAX_WORKERS=100 +Environment=RSS_POKE_INTERVAL_MIN=15 +ExecStart=/opt/rss2/bin/ingestor +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-ingestor + +LimitNOFILE=65536 +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-langdetect.service b/deploy/debian/systemd/rss2-langdetect.service new file mode 100644 index 0000000..18c2732 --- /dev/null +++ b/deploy/debian/systemd/rss2-langdetect.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Language Detection Worker (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=LANG_DETECT_SLEEP=60 +Environment=LANG_DETECT_BATCH=1000 +ExecStart=/opt/rss2/venv/bin/python -m workers.langdetect_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-langdetect + +MemoryMax=512M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-ner.service b/deploy/debian/systemd/rss2-ner.service new file mode 100644 index 0000000..6f43c78 --- /dev/null +++ b/deploy/debian/systemd/rss2-ner.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 NER Worker - Extraccion de Entidades (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=NER_LANG=es +Environment=NER_BATCH=64 +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.ner_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-ner + +MemoryMax=2G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-qdrant-worker.service b/deploy/debian/systemd/rss2-qdrant-worker.service new file mode 100644 index 0000000..6334fd1 --- /dev/null +++ b/deploy/debian/systemd/rss2-qdrant-worker.service @@ -0,0 +1,28 @@ +[Unit] +Description=RSS2 Qdrant Sync Worker (Go) +After=network.target postgresql.service rss2-qdrant.service +Requires=postgresql.service rss2-qdrant.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=QDRANT_HOST=127.0.0.1 +Environment=QDRANT_PORT=6333 +Environment=QDRANT_COLLECTION=news_vectors +Environment=QDRANT_SLEEP=30 +Environment=QDRANT_BATCH=100 +ExecStart=/opt/rss2/bin/qdrant_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-qdrant-worker + +MemoryMax=1G +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-qdrant.service b/deploy/debian/systemd/rss2-qdrant.service new file mode 100644 index 0000000..59b8651 --- /dev/null +++ b/deploy/debian/systemd/rss2-qdrant.service @@ -0,0 +1,25 @@ +[Unit] +Description=Qdrant Vector Database +After=network.target + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/qdrant +ExecStart=/opt/rss2/qdrant/qdrant +Restart=always +RestartSec=5 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-qdrant + +Environment=QDRANT__SERVICE__HTTP_PORT=6333 +Environment=QDRANT__SERVICE__GRPC_PORT=6334 +Environment=QDRANT__STORAGE__STORAGE_PATH=/opt/rss2/data/qdrant_storage + +MemoryMax=4G +CPUQuota=400% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-related.service b/deploy/debian/systemd/rss2-related.service new file mode 100644 index 0000000..cc671f9 --- /dev/null +++ b/deploy/debian/systemd/rss2-related.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Related News Worker (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=RELATED_SLEEP=10 +Environment=RELATED_BATCH=200 +Environment=RELATED_TOPK=10 +ExecStart=/opt/rss2/bin/related +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-related + +MemoryMax=1G +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-scraper.service b/deploy/debian/systemd/rss2-scraper.service new file mode 100644 index 0000000..83929c4 --- /dev/null +++ b/deploy/debian/systemd/rss2-scraper.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Scraper HTML (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=SCRAPER_SLEEP=60 +Environment=SCRAPER_BATCH=10 +ExecStart=/opt/rss2/bin/scraper +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-scraper + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-topics.service b/deploy/debian/systemd/rss2-topics.service new file mode 100644 index 0000000..f9ab9b5 --- /dev/null +++ b/deploy/debian/systemd/rss2-topics.service @@ -0,0 +1,25 @@ +[Unit] +Description=RSS2 Topics Worker (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=TOPICS_SLEEP=10 +Environment=TOPICS_BATCH=500 +ExecStart=/opt/rss2/bin/topics +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-topics + +MemoryMax=512M +CPUQuota=100% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-translation-scheduler.service b/deploy/debian/systemd/rss2-translation-scheduler.service new file mode 100644 index 0000000..a46f3ad --- /dev/null +++ b/deploy/debian/systemd/rss2-translation-scheduler.service @@ -0,0 +1,26 @@ +[Unit] +Description=RSS2 Translation Scheduler (Python) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=TARGET_LANGS=es +Environment=SCHEDULER_BATCH=1000 +Environment=SCHEDULER_SLEEP=30 +ExecStart=/opt/rss2/venv/bin/python -m workers.translation_scheduler +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-translation-scheduler + +MemoryMax=256M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-translator.service b/deploy/debian/systemd/rss2-translator.service new file mode 100644 index 0000000..90528e8 --- /dev/null +++ b/deploy/debian/systemd/rss2-translator.service @@ -0,0 +1,31 @@ +[Unit] +Description=RSS2 Translator Worker NLLB-200 (Python) +After=network.target postgresql.service rss2-translation-scheduler.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2/src +EnvironmentFile=/opt/rss2/.env +Environment=TARGET_LANGS=es +Environment=TRANSLATOR_BATCH=32 +Environment=CT2_MODEL_PATH=/opt/rss2/models/nllb-ct2 +Environment=CT2_DEVICE=cpu +Environment=CT2_COMPUTE_TYPE=int8 +Environment=UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M +Environment=HF_HOME=/opt/rss2/hf_cache +ExecStart=/opt/rss2/venv/bin/python -m workers.ctranslator_worker +Restart=always +RestartSec=15 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-translator + +# El modelo NLLB-200 consume bastante RAM en CPU +MemoryMax=4G +CPUQuota=200% + +[Install] +WantedBy=multi-user.target diff --git a/deploy/debian/systemd/rss2-wiki.service b/deploy/debian/systemd/rss2-wiki.service new file mode 100644 index 0000000..01891dd --- /dev/null +++ b/deploy/debian/systemd/rss2-wiki.service @@ -0,0 +1,24 @@ +[Unit] +Description=RSS2 Wiki Worker - imagenes Wikipedia (Go) +After=network.target postgresql.service +Requires=postgresql.service + +[Service] +Type=simple +User=rss2 +Group=rss2 +WorkingDirectory=/opt/rss2 +EnvironmentFile=/opt/rss2/.env +Environment=WIKI_SLEEP=10 +ExecStart=/opt/rss2/bin/wiki_worker +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=rss2-wiki + +MemoryMax=256M +CPUQuota=50% + +[Install] +WantedBy=multi-user.target diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index b126c81..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,748 +0,0 @@ -services: - db: - image: postgres:18-alpine - container_name: rss2_db - shm_size: 4gb - environment: - POSTGRES_DB: ${POSTGRES_DB:-rss} - POSTGRES_USER: ${POSTGRES_USER:-rss} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=C.UTF-8" - LANG: C.UTF-8 - LC_ALL: C.UTF-8 - TZ: Europe/Madrid - PGDATA: /var/lib/postgresql/data/18/main - volumes: - - ./data/pgdata:/var/lib/postgresql/data - - ./init-db:/docker-entrypoint-initdb.d:rw - - ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro - entrypoint: ["bash", "/docker-entrypoint-db.sh"] - networks: - backend: - aliases: - - db - - rss2_db - restart: unless-stopped - healthcheck: - test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ] - interval: 5s - timeout: 5s - retries: 30 - start_period: 20s - deploy: - resources: - limits: - memory: 8G - reservations: - memory: 4G - - redis: - image: redis:7-alpine - container_name: rss2_redis - environment: - TZ: Europe/Madrid - # SEGURIDAD: Redis con autenticación - command: > - redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD} - volumes: - - ./data/redis-data:/data - - /etc/timezone:/etc/timezone:ro - - /etc/localtime:/etc/localtime:ro - networks: - backend: - aliases: - - redis - - rss2_redis - restart: unless-stopped - healthcheck: - test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ] - interval: 5s - timeout: 3s - retries: 5 - deploy: - resources: - limits: - memory: 768M - reservations: - memory: 512M - - rss-ingestor-go: - build: - context: ./rss-ingestor-go - dockerfile: Dockerfile - container_name: rss2_ingestor_go - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - RSS_MAX_WORKERS: 100 - RSS_POKE_INTERVAL_MIN: 15 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - reservations: - memory: 512M - - langdetect: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_langdetect_py - command: bash -lc "python -m workers.langdetect_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - LANG_DETECT_SLEEP: 60 - LANG_DETECT_BATCH: 1000 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 512M - - # ================================================================================== - # SCRAPER WORKER (Go) - Extrae artículos de URLs - # ================================================================================== - scraper: - build: - context: . - dockerfile: Dockerfile.scraper - container_name: rss2_scraper - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - SCRAPER_SLEEP: 60 - SCRAPER_BATCH: 10 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # DISCOVERY WORKER (Go) - Descubre RSS feeds - # ================================================================================== - discovery: - build: - context: . - dockerfile: Dockerfile.discovery - container_name: rss2_discovery - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - DISCOVERY_INTERVAL: 900 - DISCOVERY_BATCH: 10 - MAX_FEEDS_PER_URL: 5 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # WIKI WORKER (Go) - Wikipedia info and thumbnails - # ================================================================================== - wiki-worker: - build: - context: . - dockerfile: Dockerfile.wiki - container_name: rss2_wiki_worker - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - WIKI_SLEEP: 10 - TZ: Europe/Madrid - volumes: - - ./data/wiki_images:/app/data/wiki_images - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 256M - - # ================================================================================== - # BACKEND GO (API REST) - # ================================================================================== - backend-go: - build: - context: ./backend - dockerfile: Dockerfile - container_name: rss2_backend_go - environment: - TZ: Europe/Madrid - DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable - REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379 - SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string} - SERVER_PORT: "8080" - volumes: - - ./data/wiki_images:/app/data/wiki_images - networks: - - backend - - frontend - depends_on: - db: - condition: service_healthy - redis: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # FRONTEND REACT - # ================================================================================== - rss2_frontend: - build: - context: ./frontend - dockerfile: Dockerfile - container_name: rss2_frontend - environment: - TZ: Europe/Madrid - VITE_API_URL: /api - networks: - - frontend - depends_on: - - backend-go - restart: unless-stopped - - # ================================================================================== - # NGINX (Puerto 8001 - sirve React + proxy API) - # ================================================================================== - nginx: - image: nginx:alpine - container_name: rss2_nginx - ports: - - "8001:80" - volumes: - - ./nginx.conf:/etc/nginx/nginx.conf:ro - networks: - - frontend - depends_on: - - rss2_frontend - - backend-go - restart: unless-stopped - - # ================================================================================== - # TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3 - # ================================================================================== - translator: - build: - context: . - dockerfile: Dockerfile.translator - image: rss2-translator:latest - command: bash -lc "python -m workers.ctranslator_worker" - security_opt: - - seccomp=unconfined - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - TRANSLATOR_BATCH: 32 - CT2_MODEL_PATH: /app/models/nllb-ct2 - CT2_DEVICE: cpu - CT2_COMPUTE_TYPE: int8 - UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - TRANSLATOR_ID: ${TRANSLATOR_ID:-} - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - - ./models:/app/models - networks: - - backend - profiles: - - cpu-only - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TRANSLATION SCHEDULER - Creates translation jobs - # ================================================================================== - translation-scheduler: - build: - context: . - dockerfile: Dockerfile.scheduler - image: rss2-scheduler:latest - container_name: rss2_translation_scheduler - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - SCHEDULER_BATCH: 1000 - SCHEDULER_SLEEP: 30 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - deploy: - resources: - limits: - cpus: '0.5' - memory: 256M - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TRANSLATOR GPU (CTranslate2 with CUDA) - # ================================================================================== - translator-gpu: - build: - context: . - dockerfile: Dockerfile.translator-gpu - image: rss2-translator-gpu:latest - container_name: rss2_translator_gpu - command: bash -lc "python -m workers.ctranslator_worker" - security_opt: - - seccomp=unconfined - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TARGET_LANGS: es - TRANSLATOR_BATCH: 64 - CT2_MODEL_PATH: /app/models/nllb-ct2 - CT2_DEVICE: cuda - CT2_COMPUTE_TYPE: float16 - UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - - ./models:/app/models - networks: - - backend - deploy: - resources: - limits: - memory: 4G - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [ gpu ] - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - embeddings: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_embeddings_py - command: bash -lc "python -m workers.embeddings_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 - EMB_BATCH: 64 - EMB_SLEEP_IDLE: 5 - EMB_LANGS: es - EMB_LIMIT: 1000 - DEVICE: cuda - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - networks: - - backend - deploy: - resources: - limits: - memory: 6G - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [ gpu ] - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - # ================================================================================== - # TOPICS WORKER (Go) - Matching temas y países - # ================================================================================== - topics: - build: - context: . - dockerfile: Dockerfile.topics - container_name: rss2_topics - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - TOPICS_SLEEP: 10 - TOPICS_BATCH: 500 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 512M - - # ================================================================================== - # RELATED WORKER (Go) - Noticias relacionadas - # ================================================================================== - related: - build: - context: . - dockerfile: Dockerfile.related - container_name: rss2_related - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - RELATED_SLEEP: 10 - RELATED_BATCH: 200 - RELATED_TOPK: 10 - EMB_MODEL: mxbai-embed-large - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - qdrant: - image: qdrant/qdrant:latest - container_name: rss2_qdrant - environment: - TZ: Europe/Madrid - QDRANT__SERVICE__GRPC_PORT: 6334 - volumes: - - ./data/qdrant_storage:/qdrant/storage - - /etc/timezone:/etc/timezone:ro - - /etc/localtime:/etc/localtime:ro - networks: - - backend - restart: unless-stopped - deploy: - resources: - limits: - cpus: '4' - memory: 4G - reservations: - memory: 2G - - # ================================================================================== - # QDRANT WORKER (Go) - Vectorización y búsqueda semántica - # ================================================================================== - qdrant-worker: - build: - context: . - dockerfile: Dockerfile.qdrant - container_name: rss2_qdrant_worker - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - QDRANT_HOST: qdrant - QDRANT_PORT: 6333 - QDRANT_COLLECTION: news_vectors - OLLAMA_URL: http://ollama:11434 - QDRANT_SLEEP: 30 - QDRANT_BATCH: 100 - TZ: Europe/Madrid - networks: - - backend - depends_on: - db: - condition: service_healthy - qdrant: - condition: service_started - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - # ================================================================================== - # NER WORKER (Python) - Extracción de entidades - # ================================================================================== - ner: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_ner - command: bash -lc "python -m workers.ner_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - NER_LANG: es - NER_BATCH: 64 - HF_HOME: /app/hf_cache - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - - ./hf_cache:/app/hf_cache - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - - # ================================================================================== - # CLUSTER WORKER (Python) - Agrupación de noticias - # ================================================================================== - cluster: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_cluster_py - command: bash -lc "python -m workers.cluster_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - EVENT_DIST_THRESHOLD: 0.35 - EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 2G - - # ================================================================================== - # LLM CATEGORIZER (Python) - Categorización con Ollama - # ================================================================================== - llm-categorizer: - build: - context: . - dockerfile: Dockerfile - container_name: rss2_llm_categorizer - command: bash -lc "python -m workers.simple_categorizer_worker" - environment: - DB_HOST: db - DB_PORT: 5432 - DB_NAME: ${DB_NAME:-rss} - DB_USER: ${DB_USER:-rss} - DB_PASS: ${DB_PASS} - CATEGORIZER_BATCH_SIZE: 10 - CATEGORIZER_SLEEP_IDLE: 5 - TZ: Europe/Madrid - volumes: - - ./workers:/app/workers - networks: - - backend - depends_on: - db: - condition: service_healthy - restart: unless-stopped - deploy: - resources: - limits: - cpus: '2' - memory: 1G - - # ================================================================================== - # MONITORING STACK - SECURED - # ================================================================================== - - prometheus: - image: prom/prometheus:latest - container_name: rss2_prometheus - volumes: - - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - - prometheus_data:/prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/usr/share/prometheus/console_libraries' - - '--web.console.templates=/usr/share/prometheus/consoles' - # SEGURIDAD: Sin exposición de puertos - acceso solo vía Grafana o túnel SSH - # ports: - # - "9090:9090" - networks: - - monitoring - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 2G - - grafana: - image: grafana/grafana:latest - container_name: rss2_grafana - # SEGURIDAD: Acceso solo en localhost o vía túnel SSH - # Para acceso remoto, usar túnel SSH: ssh -L 3001:localhost:3001 user@server - ports: - - "127.0.0.1:3001:3000" - environment: - # SEGURIDAD: Cambiar este password en producción - - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-change_this_password} - - GF_USERS_ALLOW_SIGN_UP=false - - GF_SERVER_ROOT_URL=http://localhost:3001 - - GF_SECURITY_COOKIE_SECURE=false - - GF_SECURITY_COOKIE_SAMESITE=lax - volumes: - - grafana_data:/var/lib/grafana - networks: - - monitoring - depends_on: - - prometheus - restart: unless-stopped - deploy: - resources: - limits: - cpus: '1' - memory: 1G - - cadvisor: - image: gcr.io/cadvisor/cadvisor:latest - container_name: rss2_cadvisor - # SEGURIDAD: Sin exposición de puertos - solo acceso interno - # ports: - # - "8081:8080" - volumes: - - /:/rootfs:ro - - /var/run:/var/run:ro - - /sys:/sys:ro - - /var/lib/docker/:/var/lib/docker:ro - - /dev/disk/:/dev/disk:ro - devices: - - /dev/kmsg - networks: - - monitoring - restart: unless-stopped - deploy: - resources: - limits: - cpus: '0.5' - memory: 512M - -# ================================================================================== -# REDES SEGMENTADAS -# ================================================================================== -networks: - # Red frontal - Solo nginx y web app - frontend: - name: rss2_frontend - driver: bridge - internal: false - - # Red backend - Base de datos, workers, redis, qdrant - backend: - name: rss2_backend - driver: bridge - internal: false # Acceso externo permitido (necesario para ingestor) - - # Red de monitoreo - Prometheus, Grafana, cAdvisor - monitoring: - name: rss2_monitoring - driver: bridge - internal: true - -volumes: - prometheus_data: - grafana_data: - torch_extensions: diff --git a/docker-entrypoint-db.sh b/docker-entrypoint-db.sh deleted file mode 100755 index 1eb2722..0000000 --- a/docker-entrypoint-db.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -e - -# Detectar si la base de datos necesita reinicialización -PGDATA_DIR="/var/lib/postgresql/data/18/main" - -echo "RSS2: Checking database integrity..." - -# Si no existe el archivo de versión, es una base de datos nueva -if [ ! -f "$PGDATA_DIR/PG_VERSION" ]; then - echo "RSS2: New database - will be initialized by docker-entrypoint" -else - # Verificar si la base de datos es funcional - if ! pg_isready -h localhost -p 5432 -U "${POSTGRES_USER:-rss}" 2>/dev/null; then - echo "RSS2: Database appears corrupted - removing old data files for fresh initialization..." - # Eliminar solo los archivos de datos, no todo el directorio - rm -rf "$PGDATA_DIR"/* - echo "RSS2: Data files removed - docker-entrypoint will initialize fresh database" - else - echo "RSS2: Database is healthy" - fi -fi - -# Ejecutar el entrypoint original con los parámetros de PostgreSQL -exec docker-entrypoint.sh \ - postgres \ - -c max_connections=200 \ - -c shared_buffers=4GB \ - -c effective_cache_size=12GB \ - -c work_mem=16MB \ - -c maintenance_work_mem=512MB \ - -c autovacuum_max_workers=3 \ - -c autovacuum_vacuum_scale_factor=0.02 \ - -c autovacuum_vacuum_cost_limit=1000 \ - -c max_worker_processes=8 \ - -c max_parallel_workers=6 \ - -c max_parallel_workers_per_gather=2 \ - -c wal_level=replica \ - -c max_wal_senders=5 \ - -c wal_keep_size=1GB \ - -c hot_standby=on \ - "$@" diff --git a/frontend/Dockerfile b/frontend/Dockerfile deleted file mode 100644 index 5f06218..0000000 --- a/frontend/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM node:20-alpine AS builder - -WORKDIR /app - -COPY package*.json ./ -RUN npm install - -COPY . . -RUN npm run build - -FROM nginx:alpine - -COPY --from=builder /app/dist /usr/share/nginx/html - -COPY nginx.conf /etc/nginx/nginx.conf - -EXPOSE 80 - -CMD ["nginx", "-g", "daemon off;"] diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml deleted file mode 100644 index 6cc80ce..0000000 --- a/monitoring/prometheus.yml +++ /dev/null @@ -1,21 +0,0 @@ -global: - scrape_interval: 15s - -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - - job_name: 'cadvisor' - static_configs: - - targets: ['cadvisor:8080'] - - # If we had Node Exporter (for host metrics): - # - job_name: 'node_exporter' - # static_configs: - # - targets: ['node-exporter:9100'] - - # If the app exposes metrics (e.g. Flask/Gunicorn with prometheus_client) - # - job_name: 'rss2_web' - # static_configs: - # - targets: ['rss2_web:8000'] diff --git a/reset_and_deploy.sh b/reset_and_deploy.sh deleted file mode 100755 index c103aef..0000000 --- a/reset_and_deploy.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -echo "Stopping all containers..." -docker-compose down - -echo "Removing data volumes..." -# Use sudo if necessary, or ensure current user has permissions -rm -rf data/pgdata data/pgdata-replica data/redis-data data/qdrant_storage - -echo "Starting deployment from scratch..." -docker-compose up -d --build - -echo "Deployment complete. Checking status..." -docker-compose ps diff --git a/rss-ingestor-go/Dockerfile b/rss-ingestor-go/Dockerfile deleted file mode 100644 index b75cbaa..0000000 --- a/rss-ingestor-go/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM golang:alpine AS builder - -WORKDIR /app - -# Install git and SSL certs -RUN apk add --no-cache git ca-certificates - -# Copy source code immediately -COPY . . - -# Download dependencies -RUN go mod tidy && go mod download - -# Build the Go app -RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o rss-ingestor . - -# Final stage -FROM alpine:latest - -WORKDIR /root/ - -# Copy the Pre-built binary file from the previous stage -COPY --from=builder /app/rss-ingestor . -COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ - -# Command to run the executable -CMD ["./rss-ingestor"] diff --git a/start_docker.sh b/start_docker.sh deleted file mode 100755 index 1aef36d..0000000 --- a/start_docker.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Script para iniciar los servicios de Docker -# Ejecutar con: sudo ./start_docker.sh - -set -e -cd "$(dirname "$0")" - -echo "=== RSS2 Docker Services ===" - -# Verificación de modelo eliminada (script de conversión no disponible) - -echo "" -echo "Iniciando servicios Docker..." -docker compose up -d --build - -echo "" -echo "✓ Servicios iniciados" -echo "" -echo "Para ver los logs:" -echo " docker compose logs -f translator" -echo "" -echo "Para verificar el estado:" -echo " docker compose ps"