refactor: reorganizar estructura de archivos en raiz

Antes la raiz tenia 20+ archivos sueltos. Ahora organizado en:

  docs/       10 archivos .md de documentacion tecnica
  scripts/    3 scripts utilitarios (credentials, migrate, verify)
  config/     entity_config.json (aliases y blacklist NER)
  data/       feeds.csv (feeds precargados)

Eliminados restos de Docker que ya no aplican:
  .dockerignore, .env.example, .env.secure.example, nginx.conf (raiz)

Makefile: eliminados targets docker-build, añadidos install/rebuild/check/poc

Referencias actualizadas en:
  deploy/debian/install.sh  entity_config.json -> config/entity_config.json
  deploy/debian/build.sh    entity_config.json -> config/entity_config.json
  README.md                 links a docs/ y data/ actualizados,
                            arbol de estructura del repo reescrito

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SITO 2026-03-30 22:29:50 +02:00
parent ec839b5b54
commit b3bf3d7a7f
23 changed files with 59 additions and 335 deletions

View file

@ -1,13 +0,0 @@
.git
pgdata
pgdata-replica
pgdata-replica.old.*
pgdata.failed_restore
redis-data
hf_cache
qdrant_storage
venv
__pycache__
*.pyc
*.log

View file

@ -1,67 +0,0 @@
# Database Configuration
POSTGRES_DB=rss
POSTGRES_USER=rss
POSTGRES_PASSWORD=change_this_password
DB_NAME=rss
DB_USER=rss
DB_PASS=change_this_password
DB_HOST=db
DB_PORT=5432
DB_WRITE_HOST=db
DB_READ_HOST=db-replica
# Redis Configuration
REDIS_HOST=redis
REDIS_PORT=6379
# Application Secrets
SECRET_KEY=change_this_to_a_long_random_string
# External Services
ALLTALK_URL=http://host.docker.internal:7851
# AI Models & Workers
RSS_MAX_WORKERS=3
# Translation Pipeline
TARGET_LANGS=es
TRANSLATOR_BATCH=16
SCHEDULER_BATCH=2000
SCHEDULER_SLEEP=30
LANG_DETECT_BATCH=1000
LANG_DETECT_SLEEP=60
# RSS Ingestor Configuration
RSS_POKE_INTERVAL_MIN=15
RSS_MAX_FAILURES=10
RSS_FEED_TIMEOUT=60
# URL Feed Discovery Worker
URL_DISCOVERY_INTERVAL_MIN=15
URL_DISCOVERY_BATCH_SIZE=10
MAX_FEEDS_PER_URL=5
# CTranslate2 / AI Model Paths
CT2_MODEL_PATH=/app/models/nllb-ct2
CT2_DEVICE=cuda
CT2_COMPUTE_TYPE=int8_float16
UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
# Embeddings
EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
EMB_BATCH=64
EMB_DEVICE=cuda
# NER
NER_LANG=es
NER_BATCH=64
# Flask / Gunicorn
GUNICORN_WORKERS=8
FLASK_DEBUG=0
# Qdrant Configuration
QDRANT_HOST=qdrant
QDRANT_PORT=6333
QDRANT_COLLECTION_NAME=news_vectors
QDRANT_BATCH_SIZE=100
QDRANT_SLEEP_IDLE=30

View file

@ -1,117 +0,0 @@
# ==================================================================================
# SEGURIDAD: CONFIGURACIÓN DE PRODUCCIÓN
# ==================================================================================
#
# IMPORTANTE:
# 1. Copia este archivo a .env
# 2. Cambia TODOS los valores de contraseñas y secrets
# 3. NO compartas este archivo en repositorios públicos
# 4. Añade .env al .gitignore
#
# ==================================================================================
# ==================================================================================
# DATABASE CONFIGURATION - PostgreSQL
# ==================================================================================
POSTGRES_DB=rss
POSTGRES_USER=rss
# CRÍTICO: Genera una contraseña fuerte (mínimo 32 caracteres aleatorios)
# Ejemplo para generar: openssl rand -base64 32
POSTGRES_PASSWORD=CAMBIAR_ESTO_POR_UNA_CONTRASEÑA_FUERTE_DE_32_CARACTERES
DB_NAME=rss
DB_USER=rss
DB_PASS=CAMBIAR_ESTO_POR_UNA_CONTRASEÑA_FUERTE_DE_32_CARACTERES
DB_HOST=db
DB_PORT=5432
DB_WRITE_HOST=db
DB_READ_HOST=db-replica
# ==================================================================================
# REDIS CONFIGURATION - Autenticación habilitada
# ==================================================================================
REDIS_HOST=redis
REDIS_PORT=6379
# CRÍTICO: Genera una contraseña fuerte para Redis
# Ejemplo: openssl rand -base64 32
REDIS_PASSWORD=CAMBIAR_ESTO_POR_UNA_CONTRASEÑA_FUERTE_REDIS
# ==================================================================================
# APPLICATION SECRETS
# ==================================================================================
# CRÍTICO: Secret key para Flask - debe ser único y secreto
# Genera con: python -c "import secrets; print(secrets.token_hex(32))"
SECRET_KEY=CAMBIAR_ESTO_POR_UN_TOKEN_HEX_DE_64_CARACTERES
# ==================================================================================
# MONITORING - Grafana
# ==================================================================================
# IMPORTANTE: Cambia el password de admin de Grafana
GRAFANA_PASSWORD=CAMBIAR_ESTO_POR_UNA_CONTRASEÑA_FUERTE_GRAFANA
# ==================================================================================
# EXTERNAL SERVICES
# ==================================================================================
ALLTALK_URL=http://host.docker.internal:7851
# ==================================================================================
# AI MODELS & WORKERS
# ==================================================================================
RSS_MAX_WORKERS=3
TARGET_LANGS=es
TRANSLATOR_BATCH=128
ENQUEUE=300
# RSS Ingestor Configuration
RSS_POKE_INTERVAL_MIN=15
RSS_MAX_FAILURES=10
RSS_FEED_TIMEOUT=60
# URL Feed Discovery Worker
URL_DISCOVERY_INTERVAL_MIN=15
URL_DISCOVERY_BATCH_SIZE=10
MAX_FEEDS_PER_URL=5
# CTranslate2 / AI Model Paths
CT2_MODEL_PATH=/app/models/nllb-ct2
CT2_DEVICE=cuda
CT2_COMPUTE_TYPE=int8_float16
UNIVERSAL_MODEL=facebook/nllb-200-distilled-600M
# Embeddings
EMB_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
EMB_BATCH=64
EMB_DEVICE=cuda
# NER
NER_LANG=es
NER_BATCH=64
# Flask / Gunicorn
GUNICORN_WORKERS=8
FLASK_DEBUG=0
# Qdrant Configuration
QDRANT_HOST=qdrant
QDRANT_PORT=6333
QDRANT_COLLECTION_NAME=news_vectors
QDRANT_BATCH_SIZE=100
QDRANT_SLEEP_IDLE=30
# ==================================================================================
# COMANDOS ÚTILES PARA GENERAR CONTRASEÑAS SEGURAS
# ==================================================================================
#
# PostgreSQL Password (32 caracteres):
# openssl rand -base64 32
#
# Redis Password (32 caracteres):
# openssl rand -base64 32
#
# Flask Secret Key (64 hex chars):
# python -c "import secrets; print(secrets.token_hex(32))"
#
# Grafana Password (fuerte):
# openssl rand -base64 24
#
# ==================================================================================

View file

@ -1,6 +1,6 @@
# RSS2 Workers Makefile # RSS2 Workers Makefile
.PHONY: all build clean deps ingestor scraper discovery topics related qdrant server .PHONY: all build clean deps ingestor scraper discovery topics related qdrant server install rebuild check poc
# Binary output directory # Binary output directory
BIN_DIR := bin BIN_DIR := bin
@ -69,21 +69,16 @@ run-qdrant:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss \ DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss \
QDRANT_HOST=localhost QDRANT_PORT=6333 OLLAMA_URL=http://localhost:11434 $(QDRANT) QDRANT_HOST=localhost QDRANT_PORT=6333 OLLAMA_URL=http://localhost:11434 $(QDRANT)
# Docker builds # Despliegue en Debian (sin Docker)
docker-build: install:
docker build -t rss2-ingestor -f rss-ingestor-go/Dockerfile ./rss-ingestor-go sudo bash deploy/debian/prerequisites.sh
docker build -t rss2-server -f backend/Dockerfile ./backend sudo bash deploy/debian/install.sh
docker build -t rss2-scraper -f Dockerfile.scraper ./backend
docker build -t rss2-discovery -f Dockerfile.discovery ./backend rebuild:
docker build -t rss2-topics -f Dockerfile.topics ./backend sudo bash deploy/debian/build.sh
docker build -t rss2-related -f Dockerfile.related ./backend
docker build -t rss2-qdrant -f Dockerfile.qdrant ./backend check:
docker build -t rss2-langdetect -f Dockerfile . bash deploy/debian/check.sh
docker build -t rss2-scheduler -f Dockerfile.scheduler .
docker build -t rss2-translator -f Dockerfile.translator . poc:
docker build -t rss2-translator-gpu -f Dockerfile.translator-gpu . bash poc/poc.sh
docker build -t rss2-embeddings -f Dockerfile.embeddings_worker .
docker build -t rss2-ner -f Dockerfile .
docker build -t rss2-llm-categorizer -f Dockerfile.llm_worker .
docker build -t rss2-frontend -f frontend/Dockerfile ./frontend
docker build -t rss2-nginx -f Dockerfile.nginx .

View file

@ -109,7 +109,7 @@ Compila los binarios Go, el frontend React, crea los servicios systemd y arranca
http://IP_DEL_SERVIDOR:8001 http://IP_DEL_SERVIDOR:8001
``` ```
Guía completa: [DEPLOY_DEBIAN.md](DEPLOY_DEBIAN.md) Guía completa: [docs/DEPLOY_DEBIAN.md](docs/DEPLOY_DEBIAN.md)
--- ---
@ -155,23 +155,46 @@ sudo bash deploy/debian/build.sh
## Estructura del repositorio ## Estructura del repositorio
``` ```
├── backend/ Go — API REST + workers (scraper, discovery, wiki, topics, related, qdrant) ├── README.md
├── rss-ingestor-go/ Go — Ingestor de feeds RSS ├── requirements.txt Dependencias Python para workers ML
├── frontend/ React + TypeScript + Tailwind ├── Makefile Compilación local de binarios Go
├── workers/ Python — ML workers (traducción, embeddings, NER, cluster, categorización)
├── init-db/ SQL — Schema y datos iniciales ├── backend/ Go — API REST + workers (scraper, discovery, wiki, topics, related, qdrant)
├── migrations/ SQL — Migraciones incrementales ├── rss-ingestor-go/ Go — Ingestor de feeds RSS
├── deploy/debian/ Scripts de despliegue para Debian sin Docker ├── frontend/ React + TypeScript + Tailwind
│ ├── prerequisites.sh Instala todas las dependencias del sistema ├── workers/ Python — Workers ML (traducción, embeddings, NER, cluster, categorización)
│ ├── install.sh Instalación completa
│ ├── build.sh Recompila y reinicia tras actualizar código ├── init-db/ SQL — Schema completo y datos iniciales
│ ├── env.example Plantilla de variables de entorno ├── migrations/ SQL — Migraciones incrementales
│ ├── nginx.conf Configuración nginx para despliegue nativo
│ └── systemd/ Ficheros de servicio systemd (16 servicios) ├── config/
├── poc/ │ └── entity_config.json Aliases y blacklist para normalización de entidades NER
│ ├── poc.sh POC local con datos de prueba (sin Docker, sin ML)
│ └── seed.sql Datos de muestra para el POC ├── data/
├── feeds.csv Feeds RSS precargados para importar desde el admin │ └── feeds.csv Feeds RSS precargados para importar desde el admin
├── entity_config.json Aliases y blacklist para normalización de entidades NER
└── DEPLOY_DEBIAN.md Guía detallada de despliegue en Debian ├── docs/
│ ├── DEPLOY_DEBIAN.md Guía detallada de despliegue en Debian
│ ├── SECURITY_GUIDE.md Guía de seguridad
│ ├── SECURITY_AUDIT.md Resultado del audit de seguridad
│ ├── QDRANT_SETUP.md Configuración de Qdrant
│ └── ... Resto de documentación técnica
├── scripts/
│ ├── generate_secure_credentials.sh
│ ├── migrate_to_secure.sh
│ └── verify_security.sh
├── deploy/debian/ Despliegue nativo en Debian (sin Docker)
│ ├── prerequisites.sh Instala todas las dependencias del sistema
│ ├── install.sh Instalación completa
│ ├── build.sh Recompila y reinicia tras actualizar código
│ ├── check.sh Diagnóstico del sistema
│ ├── env.example Plantilla de variables de entorno
│ ├── nginx.conf Configuración nginx
│ └── systemd/ 16 ficheros de servicio systemd
└── poc/
├── poc.sh POC local en 2 minutos (sin Docker, sin ML)
└── seed.sql 10 noticias de muestra en español
``` ```

View file

@ -55,7 +55,7 @@ fi
# --- Workers Python --- # --- Workers Python ---
info "Sincronizando workers Python..." info "Sincronizando workers Python..."
rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/"
cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true cp "$REPO_ROOT/config/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true
info " [OK] workers Python" info " [OK] workers Python"
chown -R rss2:rss2 "$RSS2_HOME/bin" "$RSS2_HOME/frontend/dist" "$RSS2_HOME/src" chown -R rss2:rss2 "$RSS2_HOME/bin" "$RSS2_HOME/frontend/dist" "$RSS2_HOME/src"

View file

@ -172,7 +172,7 @@ fi
# Copiar workers Python al directorio de trabajo # Copiar workers Python al directorio de trabajo
info "Copiando workers Python..." info "Copiando workers Python..."
rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/" rsync -a --delete "$REPO_ROOT/workers/" "$RSS2_HOME/src/workers/"
cp "$REPO_ROOT/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true cp "$REPO_ROOT/config/entity_config.json" "$RSS2_HOME/src/" 2>/dev/null || true
# ============================================================================= # =============================================================================
# 7. COMPILAR GO (backend + workers) # 7. COMPILAR GO (backend + workers)

View file

@ -1,97 +0,0 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 2048;
use epoll;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
client_max_body_size 100M;
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_types text/plain text/css text/javascript
application/json application/javascript
application/xml text/xml;
# Upstream for Go API
upstream api_backend {
server backend-go:8080;
keepalive 32;
}
# Upstream for React Frontend
upstream frontend {
server rss2_frontend:80;
keepalive 16;
}
server {
listen 80;
server_name _;
client_body_timeout 60s;
client_header_timeout 60s;
send_timeout 300s;
# Serve React Frontend
location / {
proxy_pass http://frontend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_cache_bypass $http_upgrade;
}
# Proxy to Go API
location /api/ {
proxy_pass http://api_backend/api/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Connection "";
proxy_connect_timeout 60s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;
}
# Health check
location /health {
access_log off;
return 200 "ok";
}
# Block sensitive files
location ~ /\. {
deny all;
access_log off;
log_not_found off;
}
}
}