go integration and wikipedia

This commit is contained in:
jlimolina 2026-03-28 18:30:07 +01:00
parent 47a252e339
commit ee90335b92
7828 changed files with 1307913 additions and 20807 deletions

View file

@ -22,9 +22,13 @@ ALLTALK_URL=http://host.docker.internal:7851
# AI Models & Workers
RSS_MAX_WORKERS=3
# Translation Pipeline
TARGET_LANGS=es
TRANSLATOR_BATCH=128
ENQUEUE=300
TRANSLATOR_BATCH=16
SCHEDULER_BATCH=2000
SCHEDULER_SLEEP=30
LANG_DETECT_BATCH=1000
LANG_DETECT_SLEEP=60
# RSS Ingestor Configuration
RSS_POKE_INTERVAL_MIN=15

View file

@ -1,67 +1,50 @@
FROM python:3.11-slim
# CUDA o CPU
ARG TORCH_CUDA=cu121
WORKDIR /app
# --------------------------------------------------------
# Dependencias del sistema
# --------------------------------------------------------
RUN apt-get update && apt-get install -y --no-install-recommends \
libpq-dev \
gcc \
git \
libcairo2 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libgdk-pixbuf-2.0-0 \
libffi-dev \
shared-mime-info \
libpq-dev gcc git curl \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
TOKENIZERS_PARALLELISM=false \
HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
HF_HOME=/root/.cache/huggingface
# --------------------------------------------------------
# Instalación de requirements
# --------------------------------------------------------
COPY requirements.txt .
RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir --upgrade pip
# Instalar PyTorch según GPU/CPU
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
else \
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
fi
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu121
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir \
ctranslate2 \
sentencepiece \
transformers==4.44.0 \
protobuf==3.20.3 \
"numpy<2" \
psycopg2-binary \
redis \
requests \
beautifulsoup4 \
lxml \
langdetect \
nltk \
scikit-learn \
pandas \
sentence-transformers \
spacy
# Instalar ctranslate2 con soporte CUDA
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
pip install --no-cache-dir ctranslate2 ; \
else \
pip install --no-cache-dir ctranslate2 ; \
fi
RUN python -m spacy download es_core_news_lg
# Descargar modelo spaCy ES
RUN python -m spacy download es_core_news_md || true
COPY workers/ ./workers/
COPY init-db/ ./init-db/
COPY migrations/ ./migrations/
COPY entity_config.json .
# --------------------------------------------------------
# Copiar TODO el proyecto rss2/
# --------------------------------------------------------
COPY . .
# --------------------------------------------------------
# Puede descargar modelos NLLB o Sentence-BERT si existe
# --------------------------------------------------------
RUN python download_models.py || true
EXPOSE 8000
ENV DB_HOST=db
ENV DB_PORT=5432
ENV DB_NAME=rss
ENV DB_USER=rss
ENV DB_PASS=x
CMD ["python", "-m", "workers.embeddings_worker"]

31
Dockerfile.discovery Normal file
View file

@ -0,0 +1,31 @@
FROM golang:1.22-alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/discovery ./cmd/discovery
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/discovery /bin/discovery
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
DISCOVERY_INTERVAL=900 \
DISCOVERY_BATCH=10 \
MAX_FEEDS_PER_URL=5
ENTRYPOINT ["/bin/discovery"]

View file

@ -1,53 +0,0 @@
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
# Evitar prompts interactivos
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
# Instalar dependencias del sistema
RUN apt-get update && apt-get install -y \
python3.10 \
python3-pip \
git \
wget \
&& rm -rf /var/lib/apt/lists/*
# Crear directorio de trabajo
WORKDIR /app
# Actualizar pip
RUN pip3 install --upgrade pip setuptools wheel
# Instalar dependencias de PyTorch (CUDA 12.1)
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Instalar ExLlamaV2
RUN pip3 install exllamav2
# Instalar otras dependencias
RUN pip3 install \
psycopg2-binary \
huggingface-hub \
sentencepiece \
ninja
# Instalar python-is-python3 para compatibilidad
RUN apt-get update && apt-get install -y python-is-python3 && rm -rf /var/lib/apt/lists/*
# Copiar código del worker
COPY workers/llm_categorizer_worker.py /app/workers/llm_categorizer_worker.py
COPY workers/__init__.py /app/workers/__init__.py
# Crear directorios para modelos y cache
RUN mkdir -p /app/models/llm /app/hf_cache
# Variables de entorno
ENV HF_HOME=/app/hf_cache
ENV TRANSFORMERS_CACHE=/app/hf_cache
# Healthcheck opcional
HEALTHCHECK --interval=60s --timeout=10s --start-period=120s \
CMD python3 -c "import sys; sys.exit(0)" || exit 1
# Comando por defecto
CMD ["python3", "-m", "workers.llm_categorizer_worker"]

34
Dockerfile.qdrant Normal file
View file

@ -0,0 +1,34 @@
FROM golang:1.22-alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/qdrant-worker ./cmd/qdrant
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/qdrant-worker /bin/qdrant-worker
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
QDRANT_HOST=qdrant \
QDRANT_PORT=6333 \
QDRANT_COLLECTION=news_vectors \
OLLAMA_URL=http://ollama:11434 \
QDRANT_SLEEP=30 \
QDRANT_BATCH=100
ENTRYPOINT ["/bin/qdrant-worker"]

32
Dockerfile.related Normal file
View file

@ -0,0 +1,32 @@
FROM golang:1.22-alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/related ./cmd/related
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/related /bin/related
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
RELATED_SLEEP=10 \
RELATED_BATCH=200 \
RELATED_TOPK=10 \
EMB_MODEL=mxbai-embed-large
ENTRYPOINT ["/bin/related"]

View file

@ -1,12 +0,0 @@
FROM postgres:18-alpine
# Copy initialization script
COPY init-replica/init-replica.sh /docker-entrypoint-initdb.d/
# Make script executable
RUN chmod +x /docker-entrypoint-initdb.d/init-replica.sh
# Set environment for replication
ENV PRIMARY_HOST=db
ENV REPLICATION_USER=replicator
ENV REPLICATION_PASSWORD=replica_password

23
Dockerfile.scheduler Normal file
View file

@ -0,0 +1,23 @@
FROM python:3.11-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip
RUN pip install --no-cache-dir psycopg2-binary langdetect
COPY workers/translation_scheduler.py ./workers/
ENV DB_HOST=db
ENV DB_PORT=5432
ENV DB_NAME=rss
ENV DB_USER=rss
ENV DB_PASS=x
CMD ["python", "workers/translation_scheduler.py"]

32
Dockerfile.scraper Normal file
View file

@ -0,0 +1,32 @@
FROM golang:1.22-alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN go mod tidy
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/scraper ./cmd/scraper
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/scraper /bin/scraper
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
SCRAPER_SLEEP=60 \
SCRAPER_BATCH=10
ENTRYPOINT ["/bin/scraper"]

30
Dockerfile.topics Normal file
View file

@ -0,0 +1,30 @@
FROM golang:1.22-alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/topics ./cmd/topics
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/topics /bin/topics
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
TOPICS_SLEEP=10 \
TOPICS_BATCH=500
ENTRYPOINT ["/bin/topics"]

43
Dockerfile.translator Normal file
View file

@ -0,0 +1,43 @@
FROM python:3.11-slim-bookworm
RUN apt-get update && apt-get install -y --no-install-recommends \
patchelf libpq-dev gcc git curl wget \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
TOKENIZERS_PARALLELISM=false \
HF_HOME=/root/.cache/huggingface
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir \
ctranslate2==3.24.0 \
sentencepiece \
transformers==4.36.0 \
protobuf==3.20.3 \
"numpy<2" \
psycopg2-binary \
langdetect
# === ARREGLAR EL EXECUTABLE STACK ===
RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \
-name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \
xargs -I {} patchelf --clear-execstack {} || true
COPY workers/ ./workers/
COPY init-db/ ./init-db/
COPY migrations/ ./migrations/
ENV DB_HOST=db
ENV DB_PORT=5432
ENV DB_NAME=rss
ENV DB_USER=rss
ENV DB_PASS=x
CMD ["python", "-m", "workers.ctranslator_worker"]

48
Dockerfile.translator-gpu Normal file
View file

@ -0,0 +1,48 @@
FROM python:3.11-slim-bookworm
RUN apt-get update && apt-get install -y --no-install-recommends \
patchelf libpq-dev gcc git curl wget \
&& rm -rf /var/lib/apt/lists/*
ENV PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
TOKENIZERS_PARALLELISM=false \
HF_HOME=/root/.cache/huggingface
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip
# Install PyTorch with CUDA support (cu118 for broader compatibility)
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118
RUN pip install --no-cache-dir \
ctranslate2==3.24.0 \
sentencepiece \
transformers==4.36.0 \
protobuf==3.20.3 \
"numpy<2" \
psycopg2-binary \
langdetect
# Fix executable stack
RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \
-name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \
xargs -I {} patchelf --clear-execstack {} || true
COPY workers/ ./workers/
COPY init-db/ ./init-db/
COPY migrations/ ./migrations/
ENV DB_HOST=db
ENV DB_PORT=5432
ENV DB_NAME=rss
ENV DB_USER=rss
ENV DB_PASS=x
# GPU Configuration - Override with: docker run --gpus all
ENV CT2_DEVICE=cuda
ENV CT2_COMPUTE_TYPE=float16
CMD ["python", "-m", "workers.ctranslator_worker"]

View file

@ -1,29 +0,0 @@
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies for lxml and general build
RUN apt-get update && apt-get install -y \
gcc \
libxml2-dev \
libxslt-dev \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Install python dependencies
RUN pip install --no-cache-dir \
psycopg2-binary \
requests \
newspaper3k \
lxml_html_clean \
python-dotenv
# Copy application code
COPY . /app
# Set environment
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
# Run the worker daemon
CMD ["python", "-m", "workers.url_worker_daemon"]

31
Dockerfile.wiki Normal file
View file

@ -0,0 +1,31 @@
FROM golang:alpine AS builder
ENV GOTOOLCHAIN=auto
RUN apk add --no-cache git
WORKDIR /app
COPY backend/go.mod backend/go.sum ./
RUN go mod download
COPY backend/ ./
RUN go mod tidy
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/wiki_worker ./cmd/wiki_worker
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /bin/wiki_worker /bin/wiki_worker
ENV DB_HOST=db \
DB_PORT=5432 \
DB_NAME=rss \
DB_USER=rss \
DB_PASS=rss \
WIKI_SLEEP=10
ENTRYPOINT ["/bin/wiki_worker"]

89
Makefile Normal file
View file

@ -0,0 +1,89 @@
# RSS2 Workers Makefile
.PHONY: all build clean deps ingestor scraper discovery topics related qdrant server
# Binary output directory
BIN_DIR := bin
# Main binaries
SERVER := $(BIN_DIR)/server
INGESTOR := $(BIN_DIR)/rss-ingestor
SCRAPER := $(BIN_DIR)/scraper
DISCOVERY := $(BIN_DIR)/discovery
TOPICS := $(BIN_DIR)/topics
RELATED := $(BIN_DIR)/related
QDRANT := $(BIN_DIR)/qdrant-worker
all: deps build
deps:
cd backend && go mod download
cd backend && go mod tidy
# Build all workers
build: ingestor scraper discovery topics related qdrant server
# Ingestor
ingestor:
cd rss-ingestor-go && go build -o ../$(INGESTOR) .
# Server
server:
cd backend && go build -o $(SERVER) ./cmd/server
# Workers
scraper:
cd backend && go build -o $(SCRAPER) ./cmd/scraper
discovery:
cd backend && go build -o $(DISCOVERY) ./cmd/discovery
topics:
cd backend && go build -o $(TOPICS) ./cmd/topics
related:
cd backend && go build -o $(RELATED) ./cmd/related
qdrant:
cd backend && go build -o $(QDRANT) ./cmd/qdrant
# Clean
clean:
rm -rf $(BIN_DIR)
cd backend && go clean
# Run workers locally (requires DB and services)
run-scraper:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(SCRAPER)
run-discovery:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(DISCOVERY)
run-topics:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(TOPICS)
run-related:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss RELATED_SLEEP=10 $(RELATED)
run-qdrant:
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss \
QDRANT_HOST=localhost QDRANT_PORT=6333 OLLAMA_URL=http://localhost:11434 $(QDRANT)
# Docker builds
docker-build:
docker build -t rss2-ingestor -f rss-ingestor-go/Dockerfile ./rss-ingestor-go
docker build -t rss2-server -f backend/Dockerfile ./backend
docker build -t rss2-scraper -f Dockerfile.scraper ./backend
docker build -t rss2-discovery -f Dockerfile.discovery ./backend
docker build -t rss2-topics -f Dockerfile.topics ./backend
docker build -t rss2-related -f Dockerfile.related ./backend
docker build -t rss2-qdrant -f Dockerfile.qdrant ./backend
docker build -t rss2-langdetect -f Dockerfile .
docker build -t rss2-scheduler -f Dockerfile.scheduler .
docker build -t rss2-translator -f Dockerfile.translator .
docker build -t rss2-translator-gpu -f Dockerfile.translator-gpu .
docker build -t rss2-embeddings -f Dockerfile.embeddings_worker .
docker build -t rss2-ner -f Dockerfile .
docker build -t rss2-llm-categorizer -f Dockerfile.llm_worker .
docker build -t rss2-frontend -f frontend/Dockerfile ./frontend
docker build -t rss2-nginx -f Dockerfile.nginx .

175
README.md
View file

@ -1,135 +1,124 @@
# RSS2 - Plataforma de Inteligencia de Noticias con IA 🚀
# RSS2 - AI-Powered News Intelligence Platform
RSS2 es una plataforma avanzada de agregación, traducción, análisis y vectorización de noticias diseñada para transformar flujos masivos de información en inteligencia accionable. Utiliza una arquitectura de **microservicios híbrida (Go + Python)** con modelos de **Inteligencia Artificial** de vanguardia para ofrecer búsqueda semántica, clasificación inteligente y automatización de contenidos.
RSS2 es una plataforma avanzada de agregación, traducción, análisis y vectorización de noticias, diseñada para transformar flujos masivos de información en inteligencia accionable. Utiliza una arquitectura híbrida de microservicios (Go + Python) integrada con modelos de inteligencia artificial de última generación para ofrecer búsqueda semántica, clasificación inteligente y automatización de contenidos.
---
## ✨ Características Principales
## 🚀 Capacidades Principales
* 🤖 **Categorización Inteligente (LLM)**: Clasificación de noticias mediante **Mistral-7B** local (ExLlamaV2/GPTQ), procesando lotes de alta velocidad.
* 🔍 **Búsqueda Semántica**: Motor vectorial **Qdrant** para encontrar noticias por contexto y significado, no solo por palabras clave.
* 🌍 **Traducción Neuronal de Alta Calidad**: Integración con **NLLB-200** para traducir noticias de múltiples idiomas al español con validación post-proceso para evitar repeticiones.
* 📊 **Inteligencia de Entidades**: Extracción automática y normalización de Personas, Organizaciones y Lugares para análisis de tendencias.
* 📺 **Automatización de Video**: Generación automática de noticias en formato video y gestión de "parrillas" de programación.
* 📄 **Exportación Inteligente**: Generación de informes en **PDF** con diseño profesional y limpieza de ruido de red.
* 🔔 **Notificaciones en Tiempo Real**: API de monitoreo para detectar eventos importantes al instante.
* ⭐ **Gestión de Favoritos**: Sistema robusto para guardar y organizar noticias, compatible con usuarios y sesiones temporales.
* **Enriquecimiento con Wikipedia**: Sistema automatizado que detecta personas y organizaciones, descarga sus biografías e imágenes oficiales de Wikipedia para mostrarlas en tooltips interactivos con avatares circulares.
* **Categorización Inteligente (LLM)**: Clasificación de noticias mediante una instancia local de Mistral-7B / Llama-3 (vía Ollama), procesando contenido en tiempo real.
* **Búsqueda Semántica**: Motor vectorial Qdrant para descubrir noticias por contexto y significado, yendo más allá de las palabras clave tradicionales.
* **Traducción Neuronal de Alta Calidad**: Integración de NLLB-200 (vía CTranslate2) para traducir noticias de múltiples idiomas al español con precisión profesional.
* **Inteligencia de Entidades (NER)**: Extracción y normalización automática de Personas, Organizaciones y Lugares para análisis de tendencias y mapeo de relaciones.
* **Búsqueda de Noticias Relacionadas**: Algoritmos de similitud que agrupan noticias sobre el mismo tema automáticamente.
---
## 🏗️ Arquitectura de Servicios (Docker)
El sistema está orquestado mediante Docker Compose, garantizando aislamiento y escalabilidad.
El sistema se orquestra mediante Docker Compose y se divide en capas especializadas:
### 🌐 Core & Acceso (Frontend)
### Capa de Acceso y API
| Servicio | Tecnología | Descripción |
|----------|------------|-------------|
|---------|------------|-------------|
| **`nginx`** | Nginx Alpine | Gateway y Proxy Inverso (Puerto **8001**). |
| **`rss2_web`** | Flask + Gunicorn | API principal e Interfaz Web de usuario. |
| **`rss2_frontend`** | React + Vite | Interfaz web de usuario moderna y responsiva. |
| **`backend-go`** | Go + Gin | API REST principal y gestión de lógica de negocio. |
### 📥 Ingesta y Descubrimiento (Backend)
### Ingesta y Descubrimiento (Go)
| Servicio | Tecnología | Descripción |
|----------|------------|-------------|
| **`rss-ingestor-go`** | **Go** | Crawler de ultra-alto rendimiento (Cientos de feeds/min). |
| **`url-worker`** | Python | Scraper profundo con limpieza de HTML via `newspaper3k`. |
| **`url-discovery`** | Python | Agente autónomo para el descubrimiento de nuevos feeds. |
|---------|------------|-------------|
| **`rss-ingestor-go`** | Go | Crawler de alto rendimiento para feeds RSS. |
| **`scraper`** | Go | Scraper profundo con sanitización de HTML y extracción de texto. |
| **`discovery`** | Go | Agente autónomo para descubrir nuevos feeds a partir de URLs. |
### 🧠 Procesamiento de IA (Background Workers)
| Servicio | Modelo / Función | Descripción |
|----------|-------------------|-------------|
| **`llm-categorizer`** | **Mistral-7B** | Categorización contextual avanzada (15 categorías). |
| **`translator`** (x3) | **NLLB-200** | Traducción neural masiva escalada horizontalmente. |
| **`embeddings`** | **S-Transformers** | Conversión de texto a vectores para búsqueda semántica. |
| **`ner`** | **Spacy/BERT** | Extracción de entidades (Personas, Lugares, Orgs). |
| **`cluster` & `related`**| Algoritmos Propios | Agrupación de eventos y detección de noticias relacionadas. |
### Procesamiento de Datos e IA (Go & Python)
| Servicio | Tecnología | Descripción |
|---------|------------|-------------|
| **`translator`** | NLLB-200 (CPU) | Traducción neuronal optimizada con CTranslate2. |
| **`translator-gpu`**| NLLB-200 (GPU) | Traducción acelerada por hardware (CUDA). |
| **`wiki-worker`** | Go | **[NUEVO]** Integración con Wikipedia y gestión de imágenes locales. |
| **`embeddings`** | S-Transformers | Generación de vectores para búsqueda semántica. |
| **`ner`** | Spacy / BERT | Reconocimiento de entidades nombradas (NER). |
| **`llm-categorizer`**| Ollama / Mistral | Clasificación avanzada mediante modelos de lenguaje. |
| **`topics`** | Go | Matcher automático de países y temas predefinidos. |
| **`related`** | Go | Motor de detección de noticias relacionadas. |
### 💾 Almacenamiento y Datos
| Servicio | Rol | Descripción |
|----------|-----|-------------|
| **`db`** | **PostgreSQL 18** | Almacenamiento relacional principal y metadatos. |
| **`qdrant`** | **Vector DB** | Motor de búsqueda por similitud de alta velocidad. |
| **`redis`** | **Redis 7** | Gestión de colas de tareas (Celery-style) y caché. |
### Capa de Almacenamiento
| Servicio | Tecnología | Descripción |
|---------|------------|-------------|
| **`db`** | PostgreSQL 18 | Base de datos relacional principal. |
| **`qdrant`** | Qdrant | Base de datos vectorial para búsqueda por similitud. |
| **`redis`** | Redis 7 | Colas de mensajes y caché de alto desempeño. |
---
## 🚀 Guía de Inicio Rápido
## ⚙️ Guía de Configuración
### 1. Preparación
### 1. Requisitos de Hardware
* **Modo Básico (CPU)**: 4+ Cores CPU, 8GB RAM.
* **Modo Avanzado (IA)**: NVIDIA GPU con 8GB+ VRAM (mínimo recomendado para LLM y Traducción GPU).
### 2. Instalación Rápida
```bash
git clone <repo>
git clone <repo_url>
cd rss2
./generate_secure_credentials.sh # Genera .env seguro y contraseñas robustas
cp .env.example .env
# Edita .env con tus credenciales
docker compose up -d
```
### 2. Configuración de Modelos (IA)
Para activar la categorización inteligente y traducción, descarga los modelos:
```bash
./scripts/download_llm_model.sh # Recomendado: Mistral-7B GPTQ
python3 scripts/download_models.py # Modelos NLLB y Embeddings
```
### 3. Escalado de Workers (¡Importante!)
Para aumentar la velocidad de procesamiento (especialmente la traducción), puedes escalar los workers:
### 3. Arranque del Sistema
```bash
./start_docker.sh # Script de inicio con verificación de dependencias
# Ejecutar 4 traductores en paralelo
docker compose up -d --scale translator=4
# Si usas GPU y tienes capacidad
docker compose up -d --scale translator-gpu=2
```
---
## 📖 Documentación Especializada
## 🛡️ Administración y Mantenimiento
Consulte nuestras guías detalladas para configuraciones específicas:
### Copias de Seguridad (Backups)
Desde el panel de Administración (`/admin/settings`), puedes realizar:
* **Backup Completo**: Volcado SQL de toda la base de datos.
* **Backup de Noticias (ZIP)**: **[NUEVO]** Genera un archivo comprimido que incluye las tablas de noticias, traducciones y todas sus etiquetas. Ideal para migraciones de contenido.
* 📘 **[QUICKSTART_LLM.md](QUICKSTART_LLM.md)**: Guía rápida para el categorizador Mistral-7B.
* 🚀 **[DEPLOY.md](DEPLOY.md)**: Guía detallada de despliegue en nuevos servidores.
* 📊 **[TRANSLATION_FIX_SUMMARY.md](TRANSLATION_FIX_SUMMARY.md)**: Resumen de mejoras en calidad de traducción.
* 🛡️ **[SECURITY_GUIDE.md](SECURITY_GUIDE.md)**: Manual avanzado de seguridad y endurecimiento.
* 🏗️ **[QDRANT_SETUP.md](QDRANT_SETUP.md)**: Configuración y migración de la base de datos vectorial.
* 📑 **[FUNCIONES_DE_ARCHIVOS.md](FUNCIONES_DE_ARCHIVOS.md)**: Inventario detallado de la lógica del proyecto.
### Variables de Entorno Clave (`.env`)
| Variable | Descripción |
|----------|-------------|
| `WIKI_SLEEP` | Tiempo de espera entre peticiones a Wikipedia (evita bloqueos). |
| `SCHEDULER_BATCH`| Cantidad de noticias a enviar a traducir por ciclo. |
| `TARGET_LANGS` | Idiomas destino (ej: `es`). |
| `OLLAMA_HOST` | Dirección del servidor Ollama para categorización. |
---
## 💻 Requisitos de Hardware
## 📖 Documentación de la API (Campos Wikipedia)
Para un rendimiento óptimo, se recomienda:
* **GPU**: NVIDIA (mínimo 12GB VRAM para Mistral-7B y traducción simultánea).
* **Drivers**: NVIDIA Container Toolkit instalado.
* **AllTalk TTS**: Instancia activa (puerto 7851) para la generación de audio en videos.
Las respuestas de noticias ahora incluyen el objeto `entities` enriquecido:
---
## 🔧 Operaciones y Mantenimiento
### Verificación de Calidad de Traducción
El sistema incluye herramientas para asegurar la calidad de los datos:
```bash
# Monitorear calidad en tiempo real
docker exec rss2_web python3 scripts/monitor_translation_quality.py --watch
# Limpiar automáticamente traducciones defectuosas
docker exec rss2_web python3 scripts/clean_repetitive_translations.py
```
### Gestión de Contenidos
```bash
# Generar videos de noticias destacadas
docker exec rss2_web python3 scripts/generar_videos_noticias.py
# Iniciar migración a Qdrant (Vectores)
docker exec rss2_web python3 scripts/migrate_to_qdrant.py
```
### Diagnóstico de Ingesta (Feeds)
```bash
docker exec rss2_web python3 scripts/diagnose_rss.py --url <FEED_URL>
```json
{
"id": 67449,
"titulo": "...",
"entities": [
{
"valor": "Apple",
"tipo": "organizacion",
"wiki_summary": "Apple Inc. es una empresa estadounidense...",
"wiki_url": "https://es.wikipedia.org/wiki/Apple",
"image_path": "/api/wiki-images/wiki_5723.png"
}
]
}
```
---
## 📊 Observabilidad
Acceso a métricas de rendimiento (Solo vía Localhost/Tunel):
* **Grafana**: [http://localhost:3001](http://localhost:3001) (Admin/Pass en `.env`)
* **Proxy Nginx**: [http://localhost:8001](http://localhost:8001)
---
**RSS2** - *Transformando noticias en inteligencia con IA Local.*
**RSS2** - *Transformando noticias en inteligencia con IA localizada.*

61
app.py
View file

@ -1,61 +0,0 @@
from flask import Flask
from config import SECRET_KEY
from utils import safe_html, format_date, country_flag
from routers.home import home_bp
from routers.feeds import feeds_bp
from routers.urls import urls_bp
from routers.noticia import noticia_bp
from routers.backup import backup_bp
from routers.config import config_bp
from routers.favoritos import favoritos_bp
from routers.search import search_bp
from routers.rss import rss_bp
from routers.stats import stats_bp
from routers.pdf import pdf_bp
from routers.notifications import notifications_bp
from routers.auth import auth_bp
from routers.account import account_bp
from routers.parrillas import parrillas_bp
def create_app() -> Flask:
app = Flask(__name__)
app.config["SECRET_KEY"] = SECRET_KEY
app.jinja_env.filters["safe_html"] = safe_html
app.jinja_env.filters["format_date"] = format_date
app.jinja_env.filters["country_flag"] = country_flag
app.register_blueprint(home_bp)
app.register_blueprint(feeds_bp)
app.register_blueprint(urls_bp)
app.register_blueprint(noticia_bp)
app.register_blueprint(backup_bp)
app.register_blueprint(config_bp)
app.register_blueprint(favoritos_bp)
app.register_blueprint(search_bp)
app.register_blueprint(rss_bp)
app.register_blueprint(stats_bp)
app.register_blueprint(pdf_bp)
app.register_blueprint(notifications_bp)
from routers.conflicts import conflicts_bp
from routers.topics import topics_bp
app.register_blueprint(conflicts_bp)
app.register_blueprint(topics_bp)
app.register_blueprint(auth_bp)
app.register_blueprint(account_bp)
app.register_blueprint(parrillas_bp)
return app
app = create_app()
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8001, debug=True)

24
backend/Dockerfile Normal file
View file

@ -0,0 +1,24 @@
FROM golang:1.23 AS builder
WORKDIR /app
RUN apt-get update && apt-get install -y gcc musl-dev git
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o /server ./cmd/server
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata postgresql-client
WORKDIR /app
COPY --from=builder /server .
EXPOSE 8080
CMD ["./server"]

View file

@ -0,0 +1,468 @@
package main
import (
"context"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/mmcdole/gofeed"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
pool *workers.Config
dbPool *pgxpool.Pool
sleepSec = 900 // 15 minutes
batchSize = 10
)
type URLSource struct {
ID int64
Nombre string
URL string
CategoriaID *int64
PaisID *int64
Idioma *string
}
func init() {
logger = log.New(os.Stdout, "[DISCOVERY] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("DISCOVERY_INTERVAL", 900)
batchSize = getEnvInt("DISCOVERY_BATCH", 10)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getPendingURLs(ctx context.Context) ([]URLSource, error) {
rows, err := dbPool.Query(ctx, `
SELECT id, nombre, url, categoria_id, pais_id, idioma
FROM fuentes_url
WHERE active = TRUE
ORDER BY
CASE
WHEN last_check IS NULL THEN 1
WHEN last_status = 'error' THEN 2
WHEN last_status = 'no_feeds' THEN 3
ELSE 4
END,
last_check ASC NULLS FIRST
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var sources []URLSource
for rows.Next() {
var s URLSource
if err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma); err != nil {
continue
}
sources = append(sources, s)
}
return sources, nil
}
func updateURLStatus(ctx context.Context, urlID int64, status, message string, httpCode int) error {
_, err := dbPool.Exec(ctx, `
UPDATE fuentes_url
SET last_check = NOW(),
last_status = $1,
status_message = $2,
last_http_code = $3
WHERE id = $4
`, status, message, httpCode, urlID)
return err
}
func discoverFeeds(pageURL string) ([]string, error) {
client := &http.Client{
Timeout: 15 * time.Second,
}
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml, text/html")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Try to parse as feed first
parser := gofeed.NewParser()
feed, err := parser.Parse(resp.Body)
if err == nil && feed != nil && len(feed.Items) > 0 {
// It's a valid feed
return []string{pageURL}, nil
}
// If not a feed, try to find feeds in HTML
return findFeedLinksInHTML(pageURL)
}
func findFeedLinksInHTML(baseURL string) ([]string, error) {
// Simple feed link finder - returns empty for now
// In production, use goquery to parse HTML and find RSS/Atom links
return []string{}, nil
}
func parseFeed(feedURL string) (*gofeed.Feed, error) {
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
parser := gofeed.NewParser()
return parser.Parse(resp.Body)
}
func getFeedMetadata(feedURL string) (title, description, language string, entryCount int, err error) {
feed, err := parseFeed(feedURL)
if err != nil {
return "", "", "", 0, err
}
title = feed.Title
if title == "" {
title = "Feed sin título"
}
description = feed.Description
if len(description) > 500 {
description = description[:500]
}
language = feed.Language
entryCount = len(feed.Items)
return title, description, language, entryCount, nil
}
func analyzeFeed(title, url, description string) (country, category string) {
// Simple heuristics - in production use ML or API
lowerTitle := strings.ToLower(title)
lowerDesc := strings.ToLower(description)
combined := lowerTitle + " " + lowerDesc
// Detect country
countries := map[string][]string{
"España": {"españa", "español", "madrid", "barcelona"},
"Argentina": {"argentino", "buenos aires"},
"México": {"méxico", "mexicano", "cdmx", "ciudad de méxico"},
"Colombia": {"colombiano", "bogotá"},
"Chile": {"chileno", "santiago"},
"Perú": {"peruano", "lima"},
"EE.UU.": {"estados unidos", "washington", "trump", "biden"},
"Reino Unido": {"reino unido", "londres", "uk"},
"Francia": {"francia", "parís"},
"Alemania": {"alemania", "berlín"},
}
for country, keywords := range countries {
for _, kw := range keywords {
if strings.Contains(combined, kw) {
return country, ""
}
}
}
return "", ""
}
func getCountryIDByName(ctx context.Context, countryName string) (*int64, error) {
var id int64
err := dbPool.QueryRow(ctx, "SELECT id FROM paises WHERE LOWER(nombre) = LOWER($1)", countryName).Scan(&id)
if err != nil {
return nil, err
}
return &id, nil
}
func getCategoryIDByName(ctx context.Context, categoryName string) (*int64, error) {
var id int64
err := dbPool.QueryRow(ctx, "SELECT id FROM categorias WHERE LOWER(nombre) = LOWER($1)", categoryName).Scan(&id)
if err != nil {
return nil, err
}
return &id, nil
}
func createPendingFeed(ctx context.Context, fuenteURLID int64, feedURL string, metadata map[string]interface{}) error {
feedTitle := metadata["title"].(string)
if feedTitle == "" {
feedTitle = "Feed sin título"
}
description := ""
if d, ok := metadata["description"].(string); ok {
description = d
}
language := ""
if l, ok := metadata["language"].(string); ok {
language = l
}
entryCount := 0
if c, ok := metadata["entry_count"].(int); ok {
entryCount = c
}
detectedCountry := ""
if dc, ok := metadata["detected_country"].(string); ok {
detectedCountry = dc
}
var detectedCountryID *int64
if detectedCountry != "" {
if cid, err := getCountryIDByName(ctx, detectedCountry); err == nil {
detectedCountryID = cid
}
}
suggestedCategory := ""
if sc, ok := metadata["suggested_category"].(string); ok {
suggestedCategory = sc
}
var suggestedCategoryID *int64
if suggestedCategory != "" {
if caid, err := getCategoryIDByName(ctx, suggestedCategory); err == nil {
suggestedCategoryID = caid
}
}
_, err := dbPool.Exec(ctx, `
INSERT INTO feeds_pending (
fuente_url_id, feed_url, feed_title, feed_description,
feed_language, feed_type, entry_count,
detected_country_id, suggested_categoria_id,
discovered_at
)
VALUES ($1, $2, $3, $4, $5, 'rss', $6, $7, $8, NOW())
ON CONFLICT (feed_url) DO UPDATE
SET feed_title = EXCLUDED.feed_title,
discovered_at = NOW()
`, fuenteURLID, feedURL, feedTitle, description, language, entryCount, detectedCountryID, suggestedCategoryID)
return err
}
func createFeedDirectly(ctx context.Context, feedURL string, fuenteURLID *int64, categoriaID, paisID *int64, idioma *string) (bool, error) {
title, description, language, _, err := getFeedMetadata(feedURL)
if err != nil {
return false, err
}
if language == "" && idioma != nil {
language = *idioma
}
var feedID int64
err = dbPool.QueryRow(ctx, `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, fuente_url_id, activo)
VALUES ($1, $2, $3, $4, $5, $6, $7, TRUE)
ON CONFLICT (url) DO NOTHING
RETURNING id
`, title, description, feedURL, categoriaID, paisID, language, fuenteURLID).Scan(&feedID)
if err != nil {
return false, err
}
return feedID > 0, nil
}
func processURLSource(ctx context.Context, source URLSource) {
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
// Try to find feeds on this URL
feeds, err := discoverFeeds(source.URL)
if err != nil {
logger.Printf("Error discovering feeds: %v", err)
updateURLStatus(ctx, source.ID, "error", err.Error()[:200], 0)
return
}
if len(feeds) == 0 {
logger.Printf("No feeds found for: %s", source.URL)
updateURLStatus(ctx, source.ID, "no_feeds", "No feeds found", 200)
return
}
logger.Printf("Found %d feeds for %s", len(feeds), source.URL)
maxFeeds := getEnvInt("MAX_FEEDS_PER_URL", 5)
if len(feeds) > maxFeeds {
feeds = feeds[:maxFeeds]
}
autoApprove := source.CategoriaID != nil && source.PaisID != nil
created := 0
pending := 0
existing := 0
errors := 0
for _, feedURL := range feeds {
// Get feed metadata
title, description, language, entryCount, err := getFeedMetadata(feedURL)
if err != nil {
logger.Printf("Error parsing feed %s: %v", feedURL, err)
errors++
continue
}
// Analyze for country/category
detectedCountry, suggestedCategory := analyzeFeed(title, feedURL, description)
metadata := map[string]interface{}{
"title": title,
"description": description,
"language": language,
"entry_count": entryCount,
"detected_country": detectedCountry,
"suggested_category": suggestedCategory,
}
if !autoApprove {
// Create pending feed for review
if err := createPendingFeed(ctx, source.ID, feedURL, metadata); err != nil {
logger.Printf("Error creating pending feed: %v", err)
errors++
} else {
pending++
}
} else {
// Create feed directly
createdFeed, err := createFeedDirectly(ctx, feedURL, &source.ID, source.CategoriaID, source.PaisID, source.Idioma)
if err != nil {
logger.Printf("Error creating feed: %v", err)
errors++
} else if createdFeed {
created++
} else {
existing++
}
}
time.Sleep(1 * time.Second) // Rate limiting
}
// Update status
var status string
var message string
if created > 0 || pending > 0 {
status = "success"
parts := []string{}
if created > 0 {
parts = append(parts, fmt.Sprintf("%d created", created))
}
if pending > 0 {
parts = append(parts, fmt.Sprintf("%d pending", pending))
}
message = strings.Join(parts, ", ")
} else if existing > 0 {
status = "existing"
message = fmt.Sprintf("%d already existed", existing)
} else {
status = "error"
message = fmt.Sprintf("%d errors", errors)
}
updateURLStatus(ctx, source.ID, status, message, 200)
logger.Printf("Processed %s: created=%d, pending=%d, existing=%d, errors=%d",
source.URL, created, pending, existing, errors)
}
func main() {
loadConfig()
logger.Println("Starting RSS Discovery Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: interval=%ds, batch=%d", sleepSec, batchSize)
ticker := time.NewTicker(time.Duration(sleepSec) * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sources, err := getPendingURLs(ctx)
if err != nil {
logger.Printf("Error fetching URLs: %v", err)
continue
}
if len(sources) == 0 {
logger.Println("No pending URLs to process")
continue
}
logger.Printf("Processing %d sources", len(sources))
for _, source := range sources {
processURLSource(ctx, source)
time.Sleep(2 * time.Second)
}
}
}
}

391
backend/cmd/qdrant/main.go Normal file
View file

@ -0,0 +1,391 @@
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"syscall"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
qdrantURL string
ollamaURL string
collection = "news_vectors"
sleepSec = 30
batchSize = 100
)
func init() {
logger = log.New(os.Stdout, "[QDRANT] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("QDRANT_SLEEP", 30)
batchSize = getEnvInt("QDRANT_BATCH", 100)
qdrantHost := getEnv("QDRANT_HOST", "localhost")
qdrantPort := getEnvInt("QDRANT_PORT", 6333)
qdrantURL = fmt.Sprintf("http://%s:%d", qdrantHost, qdrantPort)
ollamaURL = getEnv("OLLAMA_URL", "http://ollama:11434")
collection = getEnv("QDRANT_COLLECTION", "news_vectors")
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
type Translation struct {
ID int64
NoticiaID int64
Lang string
Titulo string
Resumen string
URL string
Fecha *time.Time
FuenteNombre string
CategoriaID *int64
PaisID *int64
}
func getPendingTranslations(ctx context.Context) ([]Translation, error) {
rows, err := dbPool.Query(ctx, `
SELECT
t.id as traduccion_id,
t.noticia_id,
TRIM(t.lang_to) as lang,
t.titulo_trad as titulo,
t.resumen_trad as resumen,
n.url,
n.fecha,
n.fuente_nombre,
n.categoria_id,
n.pais_id
FROM traducciones t
INNER JOIN noticias n ON t.noticia_id = n.id
WHERE t.vectorized = FALSE
AND t.status = 'done'
ORDER BY t.created_at ASC
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var translations []Translation
for rows.Next() {
var t Translation
if err := rows.Scan(
&t.ID, &t.NoticiaID, &t.Lang, &t.Titulo, &t.Resumen,
&t.URL, &t.Fecha, &t.FuenteNombre, &t.CategoriaID, &t.PaisID,
); err != nil {
continue
}
translations = append(translations, t)
}
return translations, nil
}
type EmbeddingRequest struct {
Model string `json:"model"`
Input string `json:"input"`
}
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
func generateEmbedding(text string) ([]float64, error) {
reqBody := EmbeddingRequest{
Model: "mxbai-embed-large",
Input: text,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 60 * time.Second}
resp, err := client.Post(ollamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Ollama returned status %d", resp.StatusCode)
}
var result EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Embedding, nil
}
type QdrantPoint struct {
ID interface{} `json:"id"`
Vector []float64 `json:"vector"`
Payload map[string]interface{} `json:"payload"`
}
type QdrantUpsertRequest struct {
Points []QdrantPoint `json:"points"`
}
func ensureCollection() error {
req, err := http.NewRequest("GET", qdrantURL+"/collections/"+collection, nil)
if err != nil {
return err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == 200 {
logger.Printf("Collection %s already exists", collection)
return nil
}
// Get embedding dimension
emb, err := generateEmbedding("test")
if err != nil {
return fmt.Errorf("failed to get embedding dimension: %w", err)
}
dimension := len(emb)
// Create collection
createReq := map[string]interface{}{
"name": collection,
"vectors": map[string]interface{}{
"size": dimension,
"distance": "Cosine",
},
}
body, _ := json.Marshal(createReq)
resp2, err := http.Post(qdrantURL+"/collections", "application/json", bytes.NewReader(body))
if err != nil {
return err
}
defer resp2.Body.Close()
logger.Printf("Created collection %s with dimension %d", collection, dimension)
return nil
}
func uploadToQdrant(translations []Translation, embeddings [][]float64) error {
points := make([]QdrantPoint, 0, len(translations))
for i, t := range translations {
if embeddings[i] == nil {
continue
}
pointID := uuid.New().String()
payload := map[string]interface{}{
"news_id": t.NoticiaID,
"traduccion_id": t.ID,
"titulo": t.Titulo,
"resumen": t.Resumen,
"url": t.URL,
"fuente_nombre": t.FuenteNombre,
"lang": t.Lang,
}
if t.Fecha != nil {
payload["fecha"] = t.Fecha.Format(time.RFC3339)
}
if t.CategoriaID != nil {
payload["categoria_id"] = *t.CategoriaID
}
if t.PaisID != nil {
payload["pais_id"] = *t.PaisID
}
points = append(points, QdrantPoint{
ID: pointID,
Vector: embeddings[i],
Payload: payload,
})
}
if len(points) == 0 {
return nil
}
reqBody := QdrantUpsertRequest{Points: points}
body, err := json.Marshal(reqBody)
if err != nil {
return err
}
url := fmt.Sprintf("%s/collections/%s/points", qdrantURL, collection)
resp, err := http.Post(url, "application/json", bytes.NewReader(body))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 && resp.StatusCode != 202 {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("Qdrant returned status %d: %s", resp.StatusCode, string(respBody))
}
return nil
}
func updateTranslationStatus(ctx context.Context, translations []Translation, pointIDs []string) error {
for i, t := range translations {
if i >= len(pointIDs) || pointIDs[i] == "" {
continue
}
_, err := dbPool.Exec(ctx, `
UPDATE traducciones
SET
vectorized = TRUE,
vectorization_date = NOW(),
qdrant_point_id = $1
WHERE id = $2
`, pointIDs[i], t.ID)
if err != nil {
logger.Printf("Error updating translation %d: %v", t.ID, err)
}
}
return nil
}
func getStats(ctx context.Context) (total, vectorized, pending int, err error) {
err = dbPool.QueryRow(ctx, `
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE vectorized = TRUE) as vectorized,
COUNT(*) FILTER (WHERE vectorized = FALSE AND status = 'done') as pending
FROM traducciones
WHERE lang_to = 'es'
`).Scan(&total, &vectorized, &pending)
return total, vectorized, pending, err
}
func main() {
loadConfig()
logger.Println("Starting Qdrant Vectorization Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
if err := ensureCollection(); err != nil {
logger.Printf("Warning: Could not ensure collection: %v", err)
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: qdrant=%s, ollama=%s, collection=%s, sleep=%ds, batch=%d",
qdrantURL, ollamaURL, collection, sleepSec, batchSize)
totalProcessed := 0
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
translations, err := getPendingTranslations(ctx)
if err != nil {
logger.Printf("Error fetching pending translations: %v", err)
continue
}
if len(translations) == 0 {
logger.Println("No pending translations to process")
continue
}
logger.Printf("Processing %d translations...", len(translations))
// Generate embeddings
embeddings := make([][]float64, len(translations))
for i, t := range translations {
text := fmt.Sprintf("%s %s", t.Titulo, t.Resumen)
emb, err := generateEmbedding(text)
if err != nil {
logger.Printf("Error generating embedding for %d: %v", t.ID, err)
continue
}
embeddings[i] = emb
}
// Upload to Qdrant
if err := uploadToQdrant(translations, embeddings); err != nil {
logger.Printf("Error uploading to Qdrant: %v", err)
continue
}
// Update DB status
pointIDs := make([]string, len(translations))
for i := range translations {
pointIDs[i] = uuid.New().String()
}
if err := updateTranslationStatus(ctx, translations, pointIDs); err != nil {
logger.Printf("Error updating status: %v", err)
}
totalProcessed += len(translations)
logger.Printf("Processed %d translations (total: %d)", len(translations), totalProcessed)
total, vectorized, pending, err := getStats(ctx)
if err == nil {
logger.Printf("Stats: total=%d, vectorized=%d, pending=%d", total, vectorized, pending)
}
}
}
}

384
backend/cmd/related/main.go Normal file
View file

@ -0,0 +1,384 @@
package main
import (
"context"
"log"
"os"
"os/signal"
"strconv"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
sleepSec = 10
topK = 10
batchSz = 200
minScore = 0.0
)
func init() {
logger = log.New(os.Stdout, "[RELATED] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("RELATED_SLEEP", 10)
topK = getEnvInt("RELATED_TOPK", 10)
batchSz = getEnvInt("RELATED_BATCH", 200)
minScore = getEnvFloat("RELATED_MIN_SCORE", 0.0)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getEnvFloat(key string, defaultValue float64) float64 {
if value := os.Getenv(key); value != "" {
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
}
return defaultValue
}
type Translation struct {
ID int64
Titulo string
Resumen string
Embedding []float64
}
func ensureSchema(ctx context.Context) error {
_, err := dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS related_noticias (
traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
related_traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
score FLOAT NOT NULL DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (traduccion_id, related_traduccion_id)
);
`)
if err != nil {
return err
}
// Ensure traduccion_embeddings table exists
_, err = dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS traduccion_embeddings (
id SERIAL PRIMARY KEY,
traduccion_id INTEGER NOT NULL REFERENCES traducciones(id) ON DELETE CASCADE,
model TEXT NOT NULL,
dim INTEGER NOT NULL,
embedding DOUBLE PRECISION[] NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE (traduccion_id, model)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE INDEX IF NOT EXISTS idx_tr_emb_model ON traduccion_embeddings(model);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE INDEX IF NOT EXISTS idx_tr_emb_traduccion_id ON traduccion_embeddings(traduccion_id);
`)
return err
}
func fetchAllEmbeddings(ctx context.Context, model string) ([]Translation, error) {
rows, err := dbPool.Query(ctx, `
SELECT e.traduccion_id,
COALESCE(NULLIF(t.titulo_trad,''), ''),
COALESCE(NULLIF(t.resumen_trad,''), ''),
e.embedding
FROM traduccion_embeddings e
JOIN traducciones t ON t.id = e.traduccion_id
WHERE e.model = $1
AND t.status = 'done'
AND t.lang_to = 'es'
`, model)
if err != nil {
return nil, err
}
defer rows.Close()
var translations []Translation
for rows.Next() {
var t Translation
if err := rows.Scan(&t.ID, &t.Titulo, &t.Resumen, &t.Embedding); err != nil {
continue
}
translations = append(translations, t)
}
return translations, nil
}
func fetchPendingIDs(ctx context.Context, model string, limit int) ([]int64, error) {
rows, err := dbPool.Query(ctx, `
SELECT t.id
FROM traducciones t
JOIN traduccion_embeddings e ON e.traduccion_id = t.id AND e.model = $1
LEFT JOIN related_noticias r ON r.traduccion_id = t.id
WHERE t.lang_to = 'es'
AND t.status = 'done'
GROUP BY t.id
HAVING COUNT(r.related_traduccion_id) = 0
ORDER BY t.id DESC
LIMIT $2
`, model, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var ids []int64
for rows.Next() {
var id int64
if err := rows.Scan(&id); err != nil {
continue
}
ids = append(ids, id)
}
return ids, nil
}
func cosineSimilarity(a, b []float64) float64 {
if len(a) != len(b) || len(a) == 0 {
return 0
}
var dotProduct, normA, normB float64
for i := range a {
dotProduct += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}
normA = sqrt(normA)
normB = sqrt(normB)
if normA == 0 || normB == 0 {
return 0
}
return dotProduct / (normA * normB)
}
func sqrt(x float64) float64 {
if x <= 0 {
return 0
}
// Simple Newton-Raphson
z := x
for i := 0; i < 20; i++ {
z = (z + x/z) / 2
}
return z
}
func findTopK(query Embedding, candidates []Translation, k int, minScore float64) []struct {
ID int64
Score float64
} {
type sim struct {
id int64
score float64
}
var similarities []sim
for _, c := range candidates {
if int64(c.ID) == query.ID {
continue
}
score := cosineSimilarity(query.Embedding, c.Embedding)
if score <= minScore {
continue
}
similarities = append(similarities, sim{int64(c.ID), score})
}
// Sort by score descending
for i := 0; i < len(similarities)-1; i++ {
for j := i + 1; j < len(similarities); j++ {
if similarities[j].score > similarities[i].score {
similarities[i], similarities[j] = similarities[j], similarities[i]
}
}
}
if len(similarities) > k {
similarities = similarities[:k]
}
result := make([]struct {
ID int64
Score float64
}, len(similarities))
for i, s := range similarities {
result[i] = struct {
ID int64
Score float64
}{s.id, s.score}
}
return result
}
type Embedding struct {
ID int64
Embedding []float64
}
func findEmbeddingByID(embeddings []Embedding, id int64) *Embedding {
for i := range embeddings {
if embeddings[i].ID == id {
return &embeddings[i]
}
}
return nil
}
func insertRelated(ctx context.Context, traduccionID int64, related []struct {
ID int64
Score float64
}) error {
if len(related) == 0 {
return nil
}
for _, r := range related {
if r.Score <= 0 {
continue
}
_, err := dbPool.Exec(ctx, `
INSERT INTO related_noticias (traduccion_id, related_traduccion_id, score)
VALUES ($1, $2, $3)
ON CONFLICT (traduccion_id, related_traduccion_id)
DO UPDATE SET score = EXCLUDED.score
`, traduccionID, r.ID, r.Score)
if err != nil {
logger.Printf("Error inserting related: %v", err)
}
}
return nil
}
func processBatch(ctx context.Context, model string) (int, error) {
// Fetch all embeddings once
allTranslations, err := fetchAllEmbeddings(ctx, model)
if err != nil {
return 0, err
}
if len(allTranslations) == 0 {
return 0, nil
}
// Convert to Embedding format for easier lookup
var allEmbeddings []Embedding
for _, t := range allTranslations {
if t.Embedding != nil {
allEmbeddings = append(allEmbeddings, Embedding{ID: t.ID, Embedding: t.Embedding})
}
}
// Get pending IDs
pendingIDs, err := fetchPendingIDs(ctx, model, batchSz)
if err != nil {
return 0, err
}
if len(pendingIDs) == 0 {
return 0, nil
}
processed := 0
for _, tradID := range pendingIDs {
emb := findEmbeddingByID(allEmbeddings, tradID)
if emb == nil {
continue
}
topRelated := findTopK(*emb, allTranslations, topK, minScore)
if err := insertRelated(ctx, tradID, topRelated); err != nil {
logger.Printf("Error inserting related for %d: %v", tradID, err)
continue
}
processed++
}
return processed, nil
}
func main() {
loadConfig()
logger.Println("Starting Related News Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
// Ensure schema
if err := ensureSchema(ctx); err != nil {
logger.Printf("Error ensuring schema: %v", err)
}
model := os.Getenv("EMB_MODEL")
if model == "" {
model = "mxbai-embed-large"
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, topK=%d, batch=%d, model=%s", sleepSec, topK, batchSz, model)
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
count, err := processBatch(ctx, model)
if err != nil {
logger.Printf("Error processing batch: %v", err)
continue
}
if count > 0 {
logger.Printf("Generated related news for %d translations", count)
}
}
}
}

330
backend/cmd/scraper/main.go Normal file
View file

@ -0,0 +1,330 @@
package main
import (
"context"
"crypto/md5"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *workers.Config
pool *pgxpool.Pool
sleepInterval = 60
batchSize = 10
)
type URLSource struct {
ID int64
Nombre string
URL string
CategoriaID *int64
PaisID *int64
Idioma *string
Active bool
}
type Article struct {
Title string
Summary string
Content string
URL string
ImageURL string
PubDate *time.Time
}
func init() {
logger = log.New(os.Stdout, "[SCRAPER] ", log.LstdFlags)
logger.SetOutput(os.Stdout)
}
func loadConfig() {
sleepInterval = getEnvInt("SCRAPER_SLEEP", 60)
batchSize = getEnvInt("SCRAPER_BATCH", 10)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getActiveURLs(ctx context.Context) ([]URLSource, error) {
rows, err := pool.Query(ctx, `
SELECT id, nombre, url, categoria_id, pais_id, idioma, activo
FROM fuentes_url
WHERE activo = true
`)
if err != nil {
return nil, err
}
defer rows.Close()
var sources []URLSource
for rows.Next() {
var s URLSource
err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma, &s.Active)
if err != nil {
continue
}
sources = append(sources, s)
}
return sources, nil
}
func updateSourceStatus(ctx context.Context, sourceID int64, status, message string, httpCode int) error {
_, err := pool.Exec(ctx, `
UPDATE fuentes_url
SET last_check = NOW(),
last_status = $1,
status_message = $2,
last_http_code = $3
WHERE id = $4
`, status, message, httpCode, sourceID)
return err
}
func extractArticle(source URLSource) (*Article, error) {
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", source.URL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
article := &Article{
URL: source.URL,
}
// Extract title
article.Title = doc.Find("meta[property='og:title']").First().AttrOr("content", "")
if article.Title == "" {
article.Title = doc.Find("meta[name='title']").First().AttrOr("content", "")
}
if article.Title == "" {
article.Title = doc.Find("h1").First().Text()
}
if article.Title == "" {
article.Title = doc.Find("title").First().Text()
}
// Extract description/summary
article.Summary = doc.Find("meta[property='og:description']").First().AttrOr("content", "")
if article.Summary == "" {
article.Summary = doc.Find("meta[name='description']").First().AttrOr("content", "")
}
// Extract image
article.ImageURL = doc.Find("meta[property='og:image']").First().AttrOr("content", "")
// Extract main content - try common selectors
contentSelectors := []string{
"article",
"[role='main']",
"main",
".article-content",
".post-content",
".entry-content",
".content",
"#content",
}
for _, sel := range contentSelectors {
content := doc.Find(sel).First()
if content.Length() > 0 {
article.Content = content.Text()
break
}
}
// Clean up
article.Title = strings.TrimSpace(article.Title)
article.Summary = strings.TrimSpace(article.Summary)
article.Content = strings.TrimSpace(article.Content)
// Truncate summary if too long
if len(article.Summary) > 500 {
article.Summary = article.Summary[:500]
}
return article, nil
}
func saveArticle(ctx context.Context, source URLSource, article *Article) (bool, error) {
finalURL := article.URL
if finalURL == "" {
finalURL = source.URL
}
// Generate ID from URL
articleID := fmt.Sprintf("%x", md5.Sum([]byte(finalURL)))
// Check if exists
var exists bool
err := pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM noticias WHERE id = $1)", articleID).Scan(&exists)
if err != nil {
return false, err
}
if exists {
return false, nil
}
title := article.Title
if title == "" {
title = "Sin título"
}
summary := article.Summary
if summary == "" && article.Content != "" {
summary = article.Content
if len(summary) > 500 {
summary = summary[:500]
}
}
pubDate := time.Now()
if article.PubDate != nil {
pubDate = *article.PubDate
}
_, err = pool.Exec(ctx, `
INSERT INTO noticias (
id, titulo, resumen, url, fecha, imagen_url,
fuente_nombre, categoria_id, pais_id
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (id) DO NOTHING
`, articleID, title, summary, finalURL, pubDate, article.ImageURL,
source.Nombre, source.CategoriaID, source.PaisID)
if err != nil {
return false, err
}
return true, nil
}
func processSource(ctx context.Context, source URLSource) {
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
article, err := extractArticle(source)
if err != nil {
logger.Printf("Error extracting article from %s: %v", source.URL, err)
status := "ERROR"
if strings.Contains(err.Error(), "HTTP") {
status = "ERROR_HTTP"
}
updateSourceStatus(ctx, source.ID, status, err.Error()[:200], 0)
return
}
if article.Title == "" {
logger.Printf("No title found for %s", source.URL)
updateSourceStatus(ctx, source.ID, "ERROR_PARSE", "No title extracted", 200)
return
}
saved, err := saveArticle(ctx, source, article)
if err != nil {
logger.Printf("Error saving article: %v", err)
updateSourceStatus(ctx, source.ID, "ERROR_DB", err.Error()[:200], 0)
return
}
if saved {
logger.Printf("Saved: %s", article.Title)
updateSourceStatus(ctx, source.ID, "OK", "News created successfully", 200)
} else {
logger.Printf("Already exists: %s", article.Title)
updateSourceStatus(ctx, source.ID, "OK", "News already exists", 200)
}
}
func main() {
loadConfig()
logger.Println("Starting Scraper Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
pool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
// Handle shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, batch=%d", sleepInterval, batchSize)
ticker := time.NewTicker(time.Duration(sleepInterval) * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sources, err := getActiveURLs(ctx)
if err != nil {
logger.Printf("Error fetching URLs: %v", err)
continue
}
if len(sources) == 0 {
logger.Println("No active URLs to process")
continue
}
logger.Printf("Processing %d sources", len(sources))
for _, source := range sources {
processSource(ctx, source)
time.Sleep(2 * time.Second) // Rate limiting
}
}
}
}

190
backend/cmd/server/main.go Normal file
View file

@ -0,0 +1,190 @@
package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/cache"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/handlers"
"github.com/rss2/backend/internal/middleware"
"github.com/rss2/backend/internal/services"
)
func initDB() {
ctx := context.Background()
// Crear tabla entity_aliases si no existe
_, err := db.GetPool().Exec(ctx, `
CREATE TABLE IF NOT EXISTS entity_aliases (
id SERIAL PRIMARY KEY,
canonical_name VARCHAR(255) NOT NULL,
alias VARCHAR(255) NOT NULL,
tipo VARCHAR(50) NOT NULL CHECK (tipo IN ('persona', 'organizacion', 'lugar', 'tema')),
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE(alias, tipo)
)
`)
if err != nil {
log.Printf("Warning: Could not create entity_aliases table: %v", err)
} else {
log.Println("Table entity_aliases ready")
}
// Añadir columna role a users si no existe
_, err = db.GetPool().Exec(ctx, `
ALTER TABLE users ADD COLUMN IF NOT EXISTS role VARCHAR(20) DEFAULT 'user'
`)
if err != nil {
log.Printf("Warning: Could not add role column: %v", err)
} else {
log.Println("Column role ready")
}
// Crear tabla de configuración si no existe
_, err = db.GetPool().Exec(ctx, `
CREATE TABLE IF NOT EXISTS config (
key VARCHAR(100) PRIMARY KEY,
value TEXT,
updated_at TIMESTAMP DEFAULT NOW()
)
`)
if err != nil {
log.Printf("Warning: Could not create config table: %v", err)
} else {
log.Println("Table config ready")
}
// Insertar configuración por defecto si no existe
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_type', 'cpu')
ON CONFLICT (key) DO NOTHING
`)
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_workers', '2')
ON CONFLICT (key) DO NOTHING
`)
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_status', 'stopped')
ON CONFLICT (key) DO NOTHING
`)
}
func main() {
cfg := config.Load()
if err := db.Connect(cfg.DatabaseURL); err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
defer db.Close()
log.Println("Connected to PostgreSQL")
// Auto-setup DB tables
initDB()
if err := cache.Connect(cfg.RedisURL); err != nil {
log.Printf("Warning: Failed to connect to Redis: %v", err)
} else {
defer cache.Close()
log.Println("Connected to Redis")
}
services.Init(cfg)
r := gin.Default()
r.Use(middleware.CORSMiddleware())
r.Use(middleware.LoggerMiddleware())
r.GET("/health", func(c *gin.Context) {
c.JSON(200, gin.H{"status": "ok"})
})
api := r.Group("/api")
{
// Serve static images downloaded by wiki_worker
api.StaticFS("/wiki-images", gin.Dir("/app/data/wiki_images", false))
api.POST("/auth/login", handlers.Login)
api.POST("/auth/register", handlers.Register)
api.GET("/auth/check-first-user", handlers.CheckFirstUser)
news := api.Group("/news")
{
news.GET("", handlers.GetNews)
news.GET("/:id", handlers.GetNewsByID)
news.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteNews)
}
feeds := api.Group("/feeds")
{
feeds.GET("", handlers.GetFeeds)
feeds.GET("/export", handlers.ExportFeeds)
feeds.GET("/:id", handlers.GetFeedByID)
feeds.POST("", middleware.AuthRequired(), handlers.CreateFeed)
feeds.POST("/import", middleware.AuthRequired(), handlers.ImportFeeds)
feeds.PUT("/:id", middleware.AuthRequired(), handlers.UpdateFeed)
feeds.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteFeed)
feeds.POST("/:id/toggle", middleware.AuthRequired(), handlers.ToggleFeedActive)
feeds.POST("/:id/reactivate", middleware.AuthRequired(), handlers.ReactivateFeed)
}
api.GET("/search", handlers.SearchNews)
api.GET("/entities", handlers.GetEntities)
api.GET("/stats", handlers.GetStats)
api.GET("/categories", handlers.GetCategories)
api.GET("/countries", handlers.GetCountries)
admin := api.Group("/admin")
admin.Use(middleware.AuthRequired(), middleware.AdminRequired())
{
admin.POST("/aliases", handlers.CreateAlias)
admin.GET("/aliases/export", handlers.ExportAliases)
admin.POST("/aliases/import", handlers.ImportAliases)
admin.POST("/entities/retype", handlers.PatchEntityTipo)
admin.GET("/backup", handlers.BackupDatabase)
admin.GET("/backup/news", handlers.BackupNewsZipped)
admin.GET("/users", handlers.GetUsers)
admin.POST("/users/:id/promote", handlers.PromoteUser)
admin.POST("/users/:id/demote", handlers.DemoteUser)
admin.POST("/reset-db", handlers.ResetDatabase)
admin.GET("/workers/status", handlers.GetWorkerStatus)
admin.POST("/workers/config", handlers.SetWorkerConfig)
admin.POST("/workers/start", handlers.StartWorkers)
admin.POST("/workers/stop", handlers.StopWorkers)
}
auth := api.Group("/auth")
auth.Use(middleware.AuthRequired())
{
auth.GET("/me", handlers.GetCurrentUser)
}
}
middleware.SetJWTSecret(cfg.SecretKey)
port := cfg.ServerPort
addr := fmt.Sprintf(":%s", port)
go func() {
log.Printf("Server starting on %s", addr)
if err := r.Run(addr); err != nil {
log.Fatalf("Failed to start server: %v", err)
}
}()
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("Shutting down server...")
}

383
backend/cmd/topics/main.go Normal file
View file

@ -0,0 +1,383 @@
package main
import (
"context"
"log"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
sleepSec = 10
batchSz = 500
)
type Topic struct {
ID int64
Weight int
Keywords []string
}
type Country struct {
ID int64
Name string
Keywords []string
}
func init() {
logger = log.New(os.Stdout, "[TOPICS] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("TOPICS_SLEEP", 10)
batchSz = getEnvInt("TOPICS_BATCH", 500)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func ensureSchema(ctx context.Context) error {
_, err := dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS topics (
id SERIAL PRIMARY KEY,
slug VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(100) NOT NULL,
weight INTEGER DEFAULT 1,
keywords TEXT,
group_name VARCHAR(50)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS news_topics (
noticia_id VARCHAR(32) REFERENCES noticias(id) ON DELETE CASCADE,
topic_id INTEGER REFERENCES topics(id) ON DELETE CASCADE,
score INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (noticia_id, topic_id)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
ALTER TABLE noticias ADD COLUMN IF NOT EXISTS topics_processed BOOLEAN DEFAULT FALSE;
`)
return err
}
func loadTopics(ctx context.Context) ([]Topic, error) {
rows, err := dbPool.Query(ctx, "SELECT id, weight, keywords FROM topics")
if err != nil {
return nil, err
}
defer rows.Close()
var topics []Topic
for rows.Next() {
var t Topic
var kwStr *string
if err := rows.Scan(&t.ID, &t.Weight, &kwStr); err != nil {
continue
}
if kwStr != nil {
keywords := strings.Split(*kwStr, ",")
for i := range keywords {
keywords[i] = strings.ToLower(strings.TrimSpace(keywords[i]))
}
t.Keywords = keywords
}
topics = append(topics, t)
}
return topics, nil
}
func loadCountries(ctx context.Context) ([]Country, error) {
rows, err := dbPool.Query(ctx, "SELECT id, nombre FROM paises")
if err != nil {
return nil, err
}
defer rows.Close()
aliases := map[string][]string{
"Estados Unidos": {"eeuu", "ee.uu.", "usa", "estadounidense", "washington"},
"Rusia": {"ruso", "rusa", "moscú", "kremlin"},
"China": {"chino", "china", "pekin", "beijing"},
"Ucrania": {"ucraniano", "kiev", "kyiv"},
"Israel": {"israelí", "tel aviv", "jerusalén"},
"España": {"español", "madrid"},
"Reino Unido": {"uk", "londres", "británico"},
"Francia": {"francés", "parís"},
"Alemania": {"alemán", "berlín"},
"Palestina": {"palestino", "gaza", "cisjordania"},
"Irán": {"iraní", "teherán"},
}
var countries []Country
for rows.Next() {
var c Country
if err := rows.Scan(&c.ID, &c.Name); err != nil {
continue
}
c.Keywords = []string{strings.ToLower(c.Name)}
if kw, ok := aliases[c.Name]; ok {
c.Keywords = append(c.Keywords, kw...)
}
countries = append(countries, c)
}
return countries, nil
}
type NewsItem struct {
ID string
Titulo *string
Resumen *string
}
func fetchPendingNews(ctx context.Context, limit int) ([]NewsItem, error) {
rows, err := dbPool.Query(ctx, `
SELECT id, titulo, resumen
FROM noticias
WHERE topics_processed = FALSE
ORDER BY fecha DESC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var items []NewsItem
for rows.Next() {
var n NewsItem
if err := rows.Scan(&n.ID, &n.Titulo, &n.Resumen); err != nil {
continue
}
items = append(items, n)
}
return items, nil
}
func findTopics(text string, topics []Topic) []struct {
TopicID int64
Score int
} {
text = strings.ToLower(text)
var matches []struct {
TopicID int64
Score int
}
for _, topic := range topics {
count := 0
for _, kw := range topic.Keywords {
if strings.Contains(text, kw) {
count++
}
}
if count > 0 {
matches = append(matches, struct {
TopicID int64
Score int
}{topic.ID, topic.Weight * count})
}
}
return matches
}
func findBestCountry(text string, countries []Country) *int64 {
text = strings.ToLower(text)
bestID := new(int64)
bestCount := 0
for _, c := range countries {
count := 0
for _, kw := range c.Keywords {
if strings.Contains(text, kw) {
count++
}
}
if count > bestCount {
bestCount = count
*bestID = c.ID
}
}
if bestCount > 0 {
return bestID
}
return nil
}
func processBatch(ctx context.Context, topics []Topic, countries []Country) (int, error) {
items, err := fetchPendingNews(ctx, batchSz)
if err != nil {
return 0, err
}
if len(items) == 0 {
return 0, nil
}
type topicMatch struct {
NoticiaID string
TopicID int64
Score int
}
type countryUpdate struct {
PaisID int64
NoticiaID string
}
var topicMatches []topicMatch
var countryUpdates []countryUpdate
var processedIDs []string
for _, item := range items {
var text string
if item.Titulo != nil {
text += *item.Titulo
}
if item.Resumen != nil {
text += " " + *item.Resumen
}
// Find topics
matches := findTopics(text, topics)
for _, m := range matches {
topicMatches = append(topicMatches, topicMatch{item.ID, m.TopicID, m.Score})
}
// Find best country
if countryID := findBestCountry(text, countries); countryID != nil {
countryUpdates = append(countryUpdates, countryUpdate{*countryID, item.ID})
}
processedIDs = append(processedIDs, item.ID)
}
// Insert topic relations
if len(topicMatches) > 0 {
for _, tm := range topicMatches {
_, err := dbPool.Exec(ctx, `
INSERT INTO news_topics (noticia_id, topic_id, score)
VALUES ($1, $2, $3)
ON CONFLICT (noticia_id, topic_id) DO UPDATE SET score = EXCLUDED.score
`, tm.NoticiaID, tm.TopicID, tm.Score)
if err != nil {
logger.Printf("Error inserting topic: %v", err)
}
}
}
// Update country
if len(countryUpdates) > 0 {
for _, cu := range countryUpdates {
_, err := dbPool.Exec(ctx, `
UPDATE noticias SET pais_id = $1 WHERE id = $2
`, cu.PaisID, cu.NoticiaID)
if err != nil {
logger.Printf("Error updating country: %v", err)
}
}
}
// Mark as processed
if len(processedIDs) > 0 {
_, err := dbPool.Exec(ctx, `
UPDATE noticias SET topics_processed = TRUE WHERE id = ANY($1)
`, processedIDs)
if err != nil {
return 0, err
}
}
return len(items), nil
}
func main() {
loadConfig()
logger.Println("Starting Topics Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
// Ensure schema
if err := ensureSchema(ctx); err != nil {
logger.Printf("Error ensuring schema: %v", err)
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, batch=%d", sleepSec, batchSz)
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
topics, err := loadTopics(ctx)
if err != nil {
logger.Printf("Error loading topics: %v", err)
continue
}
if len(topics) == 0 {
logger.Println("No topics found in DB")
time.Sleep(time.Duration(sleepSec) * time.Second)
continue
}
countries, err := loadCountries(ctx)
if err != nil {
logger.Printf("Error loading countries: %v", err)
continue
}
count, err := processBatch(ctx, topics, countries)
if err != nil {
logger.Printf("Error processing batch: %v", err)
continue
}
if count > 0 {
logger.Printf("Processed %d news items", count)
}
if count < batchSz {
time.Sleep(time.Duration(sleepSec) * time.Second)
}
}
}
}

View file

@ -0,0 +1,267 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
pool *pgxpool.Pool
sleepInterval = 30
batchSize = 50
imagesDir = "/app/data/wiki_images"
)
type WikiSummary struct {
Type string `json:"type"`
Title string `json:"title"`
DisplayTitle string `json:"displaytitle"`
Extract string `json:"extract"`
ContentUrls struct {
Desktop struct {
Page string `json:"page"`
} `json:"desktop"`
} `json:"content_urls"`
Thumbnail *struct {
Source string `json:"source"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnail"`
}
type Tag struct {
ID int64
Valor string
Tipo string
}
func init() {
logger = log.New(os.Stdout, "[WIKI_WORKER] ", log.LstdFlags)
}
func getPendingTags(ctx context.Context) ([]Tag, error) {
rows, err := pool.Query(ctx, `
SELECT t.id, t.valor, t.tipo
FROM tags t
LEFT JOIN (
SELECT tag_id, COUNT(*) as cnt
FROM tags_noticia
GROUP BY tag_id
) c ON c.tag_id = t.id
WHERE t.tipo IN ('persona', 'organizacion')
AND t.wiki_checked = FALSE
ORDER BY COALESCE(c.cnt, 0) DESC, t.id DESC
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var tags []Tag
for rows.Next() {
var t Tag
if err := rows.Scan(&t.ID, &t.Valor, &t.Tipo); err == nil {
tags = append(tags, t)
}
}
return tags, nil
}
func downloadImage(imgURL, destPath string) error {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", imgURL, nil)
if err != nil {
return err
}
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (https://github.com/proyecto/rss2)")
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("HTTP %d", resp.StatusCode)
}
out, err := os.Create(destPath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return err
}
func fetchWikipediaInfo(valor string) (*WikiSummary, error) {
// Normalize the value to be wiki-compatible
title := strings.ReplaceAll(strings.TrimSpace(valor), " ", "_")
encodedTitle := url.PathEscape(title)
apiURL := fmt.Sprintf("https://es.wikipedia.org/api/rest_v1/page/summary/%s", encodedTitle)
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return nil, err
}
// Per MediaWiki API policy: https://meta.wikimedia.org/wiki/User-Agent_policy
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (pietrelinux@gmail.com)")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == 429 {
return nil, fmt.Errorf("HTTP 429: Too Many Requests (Rate Limited)")
}
if resp.StatusCode == 404 {
return nil, nil // Not found, but handled successfully without error
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
var summary WikiSummary
if err := json.NewDecoder(resp.Body).Decode(&summary); err != nil {
return nil, err
}
// Filter out disambiguation pages
if summary.Type == "disambiguation" {
return nil, nil // Treat as not found to strictly avoid incorrect tooltips
}
return &summary, nil
}
func processTag(ctx context.Context, tag Tag) {
logger.Printf("Procesando tag %d: %s", tag.ID, tag.Valor)
summary, err := fetchWikipediaInfo(tag.Valor)
if err != nil {
logger.Printf("Error al consultar Wikipedia para %s: %v", tag.Valor, err)
return
}
if summary == nil || summary.Extract == "" {
// Not found or disambiguation
_, _ = pool.Exec(ctx, "UPDATE tags SET wiki_checked = TRUE WHERE id = $1", tag.ID)
logger.Printf("No se encontraron resultados válidos en Wikipedia para: %s", tag.Valor)
return
}
var localImagePath *string
if summary.Thumbnail != nil && summary.Thumbnail.Source != "" {
ext := ".jpg"
if strings.HasSuffix(strings.ToLower(summary.Thumbnail.Source), ".png") {
ext = ".png"
}
fileName := fmt.Sprintf("wiki_%d%s", tag.ID, ext)
destPath := filepath.Join(imagesDir, fileName)
if err := downloadImage(summary.Thumbnail.Source, destPath); err != nil {
logger.Printf("Error descargando imagen para %s: %v", tag.Valor, err)
// Guardaremos la URL externa como fallback si falla la descarga
src := summary.Thumbnail.Source
localImagePath = &src
} else {
relativePath := "/api/wiki-images/" + fileName
localImagePath = &relativePath
}
}
wikiURL := summary.ContentUrls.Desktop.Page
_, err = pool.Exec(ctx, `
UPDATE tags
SET wiki_summary = $1,
wiki_url = $2,
image_path = $3,
wiki_checked = TRUE
WHERE id = $4
`, summary.Extract, wikiURL, localImagePath, tag.ID)
if err != nil {
logger.Printf("Error al actualizar la base de datos para tag %d: %v", tag.ID, err)
} else {
logger.Printf("Actualizado con éxito: %s (Imagen: %v)", tag.Valor, localImagePath != nil)
}
}
func main() {
if val := os.Getenv("WIKI_SLEEP"); val != "" {
if sleep, err := fmt.Sscanf(val, "%d", &sleepInterval); err == nil && sleep > 0 {
sleepInterval = sleep
}
}
logger.Println("Iniciando Wiki Worker...")
if err := os.MkdirAll(imagesDir, 0755); err != nil {
logger.Fatalf("Error creando directorio de imágenes: %v", err)
}
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
pool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Cerrando gracefully...")
workers.Close()
os.Exit(0)
}()
logger.Printf("Configuración: sleep=%ds, batch=%d", sleepInterval, batchSize)
for {
tags, err := getPendingTags(ctx)
if err != nil {
logger.Printf("Error recuperando tags pendientes: %v", err)
time.Sleep(10 * time.Second)
continue
}
if len(tags) == 0 {
logger.Printf("No hay tags pendientes. Durmiendo %d segundos...", sleepInterval)
time.Sleep(time.Duration(sleepInterval) * time.Second)
continue
}
logger.Printf("Recuperados %d tags para procesar...", len(tags))
for _, tag := range tags {
processTag(ctx, tag)
time.Sleep(3 * time.Second) // Increased delay to avoid Wikipedia Rate Limits (429)
}
}
}

51
backend/go.mod Normal file
View file

@ -0,0 +1,51 @@
module github.com/rss2/backend
go 1.22
require (
github.com/PuerkitoBio/goquery v1.9.2
github.com/gin-gonic/gin v1.9.1
github.com/golang-jwt/jwt/v5 v5.0.0
github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.4.3
github.com/mmcdole/gofeed v1.2.1
github.com/redis/go-redis/v9 v9.0.5
golang.org/x/crypto v0.26.0
)
require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.14.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mmcdole/goxpp v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.17.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

156
backend/go.sum Normal file
View file

@ -0,0 +1,156 @@
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/bsm/ginkgo/v2 v2.7.0 h1:ItPMPH90RbmZJt5GtkcNvIRuGEdwlBItdNVoyzaNQao=
github.com/bsm/ginkgo/v2 v2.7.0/go.mod h1:AiKlXPm7ItEHNc/2+OkrNG4E0ITzojb9/xWzvQ9XZ9w=
github.com/bsm/gomega v1.26.0 h1:LhQm+AFcgV2M0WyKroMASzAzCAJVpAxQXv4SaI9a69Y=
github.com/bsm/gomega v1.26.0/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang-jwt/jwt/v5 v5.0.0 h1:1n1XNM9hk7O9mnQoNBGolZvzebBQ7p93ULHRc28XJUE=
github.com/golang-jwt/jwt/v5 v5.0.0/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.4.3 h1:cxFyXhxlvAifxnkKKdlxv8XqUf59tDlYjnV5YYfsJJY=
github.com/jackc/pgx/v5 v5.4.3/go.mod h1:Ig06C2Vu0t5qXC60W8sqIthScaEnFvojjj9dSljmHRA=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s=
github.com/mmcdole/gofeed v1.2.1/go.mod h1:2wVInNpgmC85q16QTTuwbuKxtKkHLCDDtf0dCmnrNr4=
github.com/mmcdole/goxpp v1.1.0 h1:WwslZNF7KNAXTFuzRtn/OKZxFLJAAyOA9w82mDz2ZGI=
github.com/mmcdole/goxpp v1.1.0/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl5o=
github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

72
backend/internal/cache/redis.go vendored Normal file
View file

@ -0,0 +1,72 @@
package cache
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/redis/go-redis/v9"
)
var Client *redis.Client
func Connect(redisURL string) error {
opt, err := redis.ParseURL(redisURL)
if err != nil {
return fmt.Errorf("failed to parse redis URL: %w", err)
}
Client = redis.NewClient(opt)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err = Client.Ping(ctx).Err(); err != nil {
return fmt.Errorf("failed to ping redis: %w", err)
}
return nil
}
func Close() {
if Client != nil {
Client.Close()
}
}
func GetClient() *redis.Client {
return Client
}
func SearchKey(query, lang string, page, perPage int) string {
return fmt.Sprintf("search:%s:%s:%d:%d", query, lang, page, perPage)
}
func NewsKey(newsID int64, lang string) string {
return fmt.Sprintf("news:%d:%s", newsID, lang)
}
func FeedKey(feedID int64) string {
return fmt.Sprintf("feed:%d", feedID)
}
func Set(ctx context.Context, key string, value interface{}, expiration time.Duration) error {
data, err := json.Marshal(value)
if err != nil {
return err
}
return Client.Set(ctx, key, data, expiration).Err()
}
func Get(ctx context.Context, key string) (string, error) {
return Client.Get(ctx, key).Result()
}
func Unmarshal(data []byte, v interface{}) error {
return json.Unmarshal(data, v)
}
func Marshal(v interface{}) ([]byte, error) {
return json.Marshal(v)
}

View file

@ -0,0 +1,66 @@
package config
import (
"os"
"strconv"
"time"
)
type Config struct {
ServerPort string
DatabaseURL string
RedisURL string
QdrantHost string
QdrantPort int
SecretKey string
JWTExpiration time.Duration
TranslationURL string
OllamaURL string
SpacyURL string
DefaultLang string
NewsPerPage int
RateLimitPerMinute int
}
func Load() *Config {
return &Config{
ServerPort: getEnv("SERVER_PORT", "8080"),
DatabaseURL: getEnv("DATABASE_URL", "postgres://rss:rss@localhost:5432/rss"),
RedisURL: getEnv("REDIS_URL", "redis://localhost:6379"),
QdrantHost: getEnv("QDRANT_HOST", "localhost"),
QdrantPort: getEnvInt("QDRANT_PORT", 6333),
SecretKey: getEnv("SECRET_KEY", "change-this-secret-key"),
JWTExpiration: getEnvDuration("JWT_EXPIRATION", 24*time.Hour),
TranslationURL: getEnv("TRANSLATION_URL", "http://libretranslate:7790"),
OllamaURL: getEnv("OLLAMA_URL", "http://ollama:11434"),
SpacyURL: getEnv("SPACY_URL", "http://spacy:8000"),
DefaultLang: getEnv("DEFAULT_LANG", "es"),
NewsPerPage: getEnvInt("NEWS_PER_PAGE", 30),
RateLimitPerMinute: getEnvInt("RATE_LIMIT_PER_MINUTE", 60),
}
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getEnvDuration(key string, defaultValue time.Duration) time.Duration {
if value := os.Getenv(key); value != "" {
if duration, err := time.ParseDuration(value); err == nil {
return duration
}
}
return defaultValue
}

View file

@ -0,0 +1,44 @@
package db
import (
"context"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
var Pool *pgxpool.Pool
func Connect(databaseURL string) error {
config, err := pgxpool.ParseConfig(databaseURL)
if err != nil {
return fmt.Errorf("failed to parse database URL: %w", err)
}
config.MaxConns = 25
config.MinConns = 5
config.MaxConnLifetime = time.Hour
config.MaxConnIdleTime = 30 * time.Minute
Pool, err = pgxpool.NewWithConfig(context.Background(), config)
if err != nil {
return fmt.Errorf("failed to create pool: %w", err)
}
if err = Pool.Ping(context.Background()); err != nil {
return fmt.Errorf("failed to ping database: %w", err)
}
return nil
}
func Close() {
if Pool != nil {
Pool.Close()
}
}
func GetPool() *pgxpool.Pool {
return Pool
}

View file

@ -0,0 +1,760 @@
package handlers
import (
"archive/zip"
"bytes"
"context"
"encoding/csv"
"fmt"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
func CreateAlias(c *gin.Context) {
var req models.EntityAliasRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
ctx := c.Request.Context()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
return
}
defer tx.Rollback(ctx)
// 1. Ensure the canonical tag exists in tags table
var canonicalTagId int
err = tx.QueryRow(ctx, `
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
RETURNING id`, req.CanonicalName, req.Tipo).Scan(&canonicalTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure canonical tag", "message": err.Error()})
return
}
for _, alias := range req.Aliases {
alias = strings.TrimSpace(alias)
if alias == "" {
continue
}
// Insert the alias mapping into entity_aliases
_, err = tx.Exec(ctx, `
INSERT INTO entity_aliases (canonical_name, alias, tipo)
VALUES ($1, $2, $3)
ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = EXCLUDED.canonical_name`,
req.CanonicalName, alias, req.Tipo)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to insert alias", "message": err.Error()})
return
}
// 2. Check if the original alias string actually exists as a tag
var aliasTagId int
err = tx.QueryRow(ctx, "SELECT id FROM tags WHERE valor = $1 AND tipo = $2", alias, req.Tipo).Scan(&aliasTagId)
if err == nil && aliasTagId != 0 && aliasTagId != canonicalTagId {
// 3. Move all mentions in tags_noticia to the canonical tag id safely
_, err = tx.Exec(ctx, `
UPDATE tags_noticia
SET tag_id = $1
WHERE tag_id = $2 AND NOT EXISTS (
SELECT 1 FROM tags_noticia tn2
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
)
`, canonicalTagId, aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions safely", "message": err.Error()})
return
}
// Delete any remaining orphaned mentions of the alias that couldn't be merged (duplicates)
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
return
}
// 4. Delete the original alias tag
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
return
}
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusCreated, gin.H{
"message": "Aliases created and metrics merged successfully",
"canonical_name": req.CanonicalName,
"aliases_added": req.Aliases,
"tipo": req.Tipo,
})
}
func ExportAliases(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(),
"SELECT alias, canonical_name, tipo FROM entity_aliases ORDER BY tipo, canonical_name")
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get aliases", "message": err.Error()})
return
}
defer rows.Close()
c.Header("Content-Type", "text/csv")
c.Header("Content-Disposition", "attachment; filename=aliases.csv")
c.Header("Cache-Control", "no-cache")
writer := csv.NewWriter(c.Writer)
writer.Write([]string{"alias", "canonical_name", "tipo"})
for rows.Next() {
var alias, canonical, tipo string
rows.Scan(&alias, &canonical, &tipo)
writer.Write([]string{alias, canonical, tipo})
}
writer.Flush()
}
func ImportAliases(c *gin.Context) {
file, err := c.FormFile("file")
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "No file uploaded"})
return
}
src, err := file.Open()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to open file"})
return
}
defer src.Close()
reader := csv.NewReader(src)
records, err := reader.ReadAll()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Failed to parse CSV", "message": err.Error()})
return
}
if len(records) < 2 {
c.JSON(http.StatusBadRequest, gin.H{"error": "CSV file is empty or has no data rows"})
return
}
ctx := context.Background()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
return
}
defer tx.Rollback(ctx)
inserted := 0
skipped := 0
for i, record := range records[1:] {
if len(record) < 3 {
skipped++
continue
}
alias := strings.TrimSpace(record[0])
canonical := strings.TrimSpace(record[1])
tipo := strings.TrimSpace(record[2])
if alias == "" || canonical == "" {
skipped++
continue
}
_, err = tx.Exec(ctx,
"INSERT INTO entity_aliases (alias, canonical_name, tipo) VALUES ($1, $2, $3) ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = $2",
alias, canonical, tipo)
if err != nil {
fmt.Printf("Error inserting row %d: %v\n", i+1, err)
skipped++
continue
}
inserted++
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Import completed",
"inserted": inserted,
"skipped": skipped,
})
}
func GetAdminStats(c *gin.Context) {
var totalUsers, totalAliases int
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&totalUsers)
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM entity_aliases").Scan(&totalAliases)
c.JSON(http.StatusOK, gin.H{
"total_users": totalUsers,
"total_aliases": totalAliases,
})
}
func GetUsers(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users ORDER BY created_at DESC`)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get users", "message": err.Error()})
return
}
defer rows.Close()
type UserRow struct {
ID int64 `json:"id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
var users []UserRow
for rows.Next() {
var u UserRow
if err := rows.Scan(&u.ID, &u.Email, &u.Username, &u.IsAdmin, &u.CreatedAt, &u.UpdatedAt); err != nil {
continue
}
users = append(users, u)
}
if users == nil {
users = []UserRow{}
}
c.JSON(http.StatusOK, gin.H{"users": users, "total": len(users)})
}
func PromoteUser(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = true WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to promote user", "message": err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
return
}
c.JSON(http.StatusOK, gin.H{"message": "User promoted to admin"})
}
func DemoteUser(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = false WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to demote user", "message": err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
return
}
c.JSON(http.StatusOK, gin.H{"message": "User demoted from admin"})
}
func ResetDatabase(c *gin.Context) {
ctx := c.Request.Context()
tables := []string{
"noticias",
"feeds",
"traducciones",
"tags_noticia",
"tags",
"entity_aliases",
"favoritos",
"videos",
"video_parrillas",
"eventos",
"search_history",
}
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
return
}
defer tx.Rollback(ctx)
for _, table := range tables {
_, err = tx.Exec(ctx, "DELETE FROM "+table)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete from " + table, "message": err.Error()})
return
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Database reset successfully. All data has been deleted.",
"tables_cleared": tables,
})
}
type WorkerConfig struct {
Type string `json:"type"`
Workers int `json:"workers"`
Status string `json:"status"`
}
func GetWorkerStatus(c *gin.Context) {
var translatorType, translatorWorkers, translatorStatus string
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
if err != nil {
translatorType = "cpu"
}
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
if err != nil {
translatorWorkers = "2"
}
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_status'").Scan(&translatorStatus)
if err != nil {
translatorStatus = "stopped"
}
workers, _ := strconv.Atoi(translatorWorkers)
// Verificar si los contenedores están corriendo
runningCount := 0
if translatorStatus == "running" {
cmd := exec.Command("docker", "compose", "ps", "-q", "translator")
output, _ := cmd.Output()
if len(output) > 0 {
runningCount = workers
}
}
c.JSON(http.StatusOK, gin.H{
"type": translatorType,
"workers": workers,
"status": translatorStatus,
"running": runningCount,
})
}
func SetWorkerConfig(c *gin.Context) {
var req WorkerConfig
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
if req.Type != "cpu" && req.Type != "gpu" {
c.JSON(http.StatusBadRequest, gin.H{"error": "Type must be 'cpu' or 'gpu'"})
return
}
if req.Workers < 1 || req.Workers > 8 {
c.JSON(http.StatusBadRequest, gin.H{"error": "Workers must be between 1 and 8"})
return
}
ctx := c.Request.Context()
_, err := db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", req.Type)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_type"})
return
}
_, err = db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", strconv.Itoa(req.Workers))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_workers"})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Worker configuration updated",
"type": req.Type,
"workers": req.Workers,
"status": req.Status,
})
}
func StartWorkers(c *gin.Context) {
var req WorkerConfig
c.ShouldBindJSON(&req)
ctx := c.Request.Context()
// Obtener configuración actual
var translatorType, translatorWorkers string
err := db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
if err != nil || translatorType == "" {
translatorType = "cpu"
}
err = db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
if err != nil || translatorWorkers == "" {
translatorWorkers = "2"
}
if req.Type != "" {
translatorType = req.Type
}
if req.Workers > 0 {
translatorWorkers = strconv.Itoa(req.Workers)
}
workers, _ := strconv.Atoi(translatorWorkers)
if workers < 1 {
workers = 2
}
if workers > 8 {
workers = 8
}
// Determinar qué servicio iniciar
serviceName := "translator"
if translatorType == "gpu" {
serviceName = "translator-gpu"
}
// Detener cualquier translator existente
stopCmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
stopCmd.Dir = "/datos/rss2"
stopCmd.Run()
// Iniciar con el número de workers
startCmd := exec.Command("docker", "compose", "up", "-d", "--scale", fmt.Sprintf("%s=%d", serviceName, workers), serviceName)
startCmd.Dir = "/datos/rss2"
output, err := startCmd.CombinedOutput()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Failed to start workers",
"details": string(output),
})
return
}
// Actualizar estado en BD
db.GetPool().Exec(ctx, "UPDATE config SET value = 'running', updated_at = NOW() WHERE key = 'translator_status'")
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", translatorType)
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", translatorWorkers)
c.JSON(http.StatusOK, gin.H{
"message": "Workers started successfully",
"type": translatorType,
"workers": workers,
"status": "running",
})
}
func StopWorkers(c *gin.Context) {
// Detener traductores
cmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
cmd.Dir = "/datos/rss2"
output, err := cmd.CombinedOutput()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Failed to stop workers",
"details": string(output),
})
return
}
// Actualizar estado en BD
db.GetPool().Exec(c.Request.Context(), "UPDATE config SET value = 'stopped', updated_at = NOW() WHERE key = 'translator_status'")
c.JSON(http.StatusOK, gin.H{
"message": "Workers stopped successfully",
"status": "stopped",
})
}
// PatchEntityTipo changes the tipo of all tags matching a given valor
func PatchEntityTipo(c *gin.Context) {
var req struct {
Valor string `json:"valor" binding:"required"`
NewTipo string `json:"new_tipo" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
validTipos := map[string]bool{"persona": true, "organizacion": true, "lugar": true, "tema": true}
if !validTipos[req.NewTipo] {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid tipo. Must be persona, organizacion, lugar or tema"})
return
}
ctx := c.Request.Context()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
return
}
defer tx.Rollback(ctx)
// Since we don't know the exact old Tipo, we find all tags with this valor that ARE NOT already the new tipo
rows, err := tx.Query(ctx, "SELECT id, tipo FROM tags WHERE valor = $1 AND tipo != $2", req.Valor, req.NewTipo)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch existing tags", "message": err.Error()})
return
}
type OldTag struct {
ID int
Tipo string
}
var tagsToMove []OldTag
for rows.Next() {
var ot OldTag
if err := rows.Scan(&ot.ID, &ot.Tipo); err == nil {
tagsToMove = append(tagsToMove, ot)
}
}
rows.Close()
if len(tagsToMove) == 0 {
c.JSON(http.StatusOK, gin.H{"message": "No entities found to update or already the requested tipo"})
return
}
// Make sure the target tag (valor, new_tipo) exists
var targetTagId int
err = tx.QueryRow(ctx, `
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
RETURNING id`, req.Valor, req.NewTipo).Scan(&targetTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure target tag", "message": err.Error()})
return
}
totalMoved := 0
for _, old := range tagsToMove {
if old.ID == targetTagId {
continue
}
// Move valid tags_noticia references to the target tag id safely
res, err := tx.Exec(ctx, `
UPDATE tags_noticia
SET tag_id = $1
WHERE tag_id = $2 AND NOT EXISTS (
SELECT 1 FROM tags_noticia tn2
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
)
`, targetTagId, old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions", "message": err.Error()})
return
}
totalMoved += int(res.RowsAffected())
// Delete any remaining orphaned mentions (duplicates)
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
return
}
// Delete the old tag since it's now merged
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
return
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Entity tipo updated and merged successfully",
"valor": req.Valor,
"new_tipo": req.NewTipo,
"tags_merged": len(tagsToMove),
"rows_affected": totalMoved,
})
}
// BackupDatabase runs pg_dump and returns the SQL as a downloadable file
func BackupDatabase(c *gin.Context) {
dbHost := os.Getenv("DB_HOST")
if dbHost == "" {
dbHost = "db"
}
dbPort := os.Getenv("DB_PORT")
if dbPort == "" {
dbPort = "5432"
}
dbName := os.Getenv("DB_NAME")
if dbName == "" {
dbName = "rss"
}
dbUser := os.Getenv("DB_USER")
if dbUser == "" {
dbUser = "rss"
}
dbPass := os.Getenv("DB_PASS")
cmd := exec.Command("pg_dump",
"-h", dbHost,
"-p", dbPort,
"-U", dbUser,
"-d", dbName,
"--no-password",
)
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
var out bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "pg_dump failed",
"details": stderr.String(),
})
return
}
filename := fmt.Sprintf("backup_%s.sql", time.Now().Format("2006-01-02_15-04-05"))
c.Header("Content-Type", "application/octet-stream")
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
c.Header("Cache-Control", "no-cache")
c.Data(http.StatusOK, "application/octet-stream", out.Bytes())
}
// BackupNewsZipped performs a pg_dump of news tables and returns a ZIP file
func BackupNewsZipped(c *gin.Context) {
dbHost := os.Getenv("DB_HOST")
if dbHost == "" {
dbHost = "db"
}
dbPort := os.Getenv("DB_PORT")
if dbPort == "" {
dbPort = "5432"
}
dbName := os.Getenv("DB_NAME")
if dbName == "" {
dbName = "rss"
}
dbUser := os.Getenv("DB_USER")
if dbUser == "" {
dbUser = "rss"
}
dbPass := os.Getenv("DB_PASS")
// Tables to backup
tables := []string{"noticias", "traducciones", "tags", "tags_noticia"}
args := []string{
"-h", dbHost,
"-p", dbPort,
"-U", dbUser,
"-d", dbName,
"--no-password",
}
for _, table := range tables {
args = append(args, "-t", table)
}
cmd := exec.Command("pg_dump", args...)
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
var sqlOut bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &sqlOut
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "pg_dump failed",
"details": stderr.String(),
})
return
}
// Create ZIP
buf := new(bytes.Buffer)
zw := zip.NewWriter(buf)
sqlFileName := fmt.Sprintf("backup_noticias_%s.sql", time.Now().Format("2006-01-02"))
f, err := zw.Create(sqlFileName)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create ZIP entry", "message": err.Error()})
return
}
_, err = f.Write(sqlOut.Bytes())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to write to ZIP", "message": err.Error()})
return
}
if err := zw.Close(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to close ZIP writer", "message": err.Error()})
return
}
filename := fmt.Sprintf("backup_noticias_%s.zip", time.Now().Format("2006-01-02_15-04-05"))
c.Header("Content-Type", "application/zip")
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
c.Header("Cache-Control", "no-cache")
c.Data(http.StatusOK, "application/zip", buf.Bytes())
}

View file

@ -0,0 +1,183 @@
package handlers
import (
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt/v5"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
"golang.org/x/crypto/bcrypt"
)
var jwtSecret []byte
func CheckFirstUser(c *gin.Context) {
var count int
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&count)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check users"})
return
}
c.JSON(http.StatusOK, gin.H{"is_first_user": count == 0, "total_users": count})
}
func InitAuth(secret string) {
jwtSecret = []byte(secret)
}
type Claims struct {
UserID int64 `json:"user_id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
jwt.RegisteredClaims
}
func Login(c *gin.Context) {
var req models.LoginRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
var user models.User
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, password_hash, is_admin, created_at, updated_at
FROM users WHERE email = $1`, req.Email).Scan(
&user.ID, &user.Email, &user.Username, &user.PasswordHash, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
return
}
if err := bcrypt.CompareHashAndPassword([]byte(user.PasswordHash), []byte(req.Password)); err != nil {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
return
}
expirationTime := time.Now().Add(24 * time.Hour)
claims := &Claims{
UserID: user.ID,
Email: user.Email,
Username: user.Username,
IsAdmin: user.IsAdmin,
RegisteredClaims: jwt.RegisteredClaims{
ExpiresAt: jwt.NewNumericDate(expirationTime),
IssuedAt: jwt.NewNumericDate(time.Now()),
},
}
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
tokenString, err := token.SignedString(jwtSecret)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
return
}
c.JSON(http.StatusOK, models.AuthResponse{
Token: tokenString,
User: user,
})
}
func Register(c *gin.Context) {
var req models.RegisterRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
hashedPassword, err := bcrypt.GenerateFromPassword([]byte(req.Password), bcrypt.DefaultCost)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to hash password"})
return
}
var userCount int
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&userCount)
isFirstUser := userCount == 0
var userID int64
err = db.GetPool().QueryRow(c.Request.Context(), `
INSERT INTO users (email, username, password_hash, is_admin, created_at, updated_at)
VALUES ($1, $2, $3, $4, NOW(), NOW())
RETURNING id`,
req.Email, req.Username, string(hashedPassword), isFirstUser,
).Scan(&userID)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create user", Message: err.Error()})
return
}
var user models.User
err = db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users WHERE id = $1`, userID).Scan(
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch user"})
return
}
expirationTime := time.Now().Add(24 * time.Hour)
claims := &Claims{
UserID: user.ID,
Email: user.Email,
Username: user.Username,
IsAdmin: user.IsAdmin,
RegisteredClaims: jwt.RegisteredClaims{
ExpiresAt: jwt.NewNumericDate(expirationTime),
IssuedAt: jwt.NewNumericDate(time.Now()),
},
}
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
tokenString, err := token.SignedString(jwtSecret)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
return
}
c.JSON(http.StatusCreated, models.AuthResponse{
Token: tokenString,
User: user,
IsFirstUser: isFirstUser,
})
}
func GetCurrentUser(c *gin.Context) {
userVal, exists := c.Get("user")
if !exists {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Not authenticated"})
return
}
claims := userVal.(*Claims)
var user models.User
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users WHERE id = $1`, claims.UserID).Scan(
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "User not found"})
return
}
c.JSON(http.StatusOK, user)
}
func init() {
cfg := config.Load()
InitAuth(cfg.SecretKey)
}

View file

@ -0,0 +1,540 @@
package handlers
import (
"context"
"encoding/csv"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
type FeedResponse struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
Descripcion *string `json:"descripcion"`
URL string `json:"url"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
Activo bool `json:"activo"`
Fallos *int64 `json:"fallos"`
LastError *string `json:"last_error"`
FuenteURLID *int64 `json:"fuente_url_id"`
}
func GetFeeds(c *gin.Context) {
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "50"))
activo := c.Query("activo")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 50
}
offset := (page - 1) * perPage
where := "1=1"
args := []interface{}{}
argNum := 1
if activo != "" {
where += fmt.Sprintf(" AND activo = $%d", argNum)
args = append(args, activo == "true")
argNum++
}
if categoriaID != "" {
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
args = append(args, categoriaID)
argNum++
}
if paisID != "" {
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
args = append(args, paisID)
argNum++
}
var total int
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM feeds WHERE %s", where)
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count feeds", Message: err.Error()})
return
}
sqlQuery := fmt.Sprintf(`
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, f.pais_id, f.idioma, f.activo, f.fallos, f.last_error,
c.nombre AS categoria, p.nombre AS pais,
(SELECT COUNT(*) FROM noticias n WHERE n.fuente_nombre = f.nombre) as noticias_count
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
WHERE %s
ORDER BY p.nombre NULLS LAST, f.activo DESC, f.fallos ASC, c.nombre NULLS LAST, f.nombre
LIMIT $%d OFFSET $%d
`, where, argNum, argNum+1)
args = append(args, perPage, offset)
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
return
}
defer rows.Close()
type FeedWithStats struct {
FeedResponse
Categoria *string `json:"categoria"`
Pais *string `json:"pais"`
NoticiasCount int64 `json:"noticias_count"`
}
var feeds []FeedWithStats
for rows.Next() {
var f FeedWithStats
err := rows.Scan(
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos, &f.LastError,
&f.Categoria, &f.Pais, &f.NoticiasCount,
)
if err != nil {
continue
}
feeds = append(feeds, f)
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, gin.H{
"feeds": feeds,
"total": total,
"page": page,
"per_page": perPage,
"total_pages": totalPages,
})
}
func GetFeedByID(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
var f FeedResponse
err = db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos
FROM feeds WHERE id = $1`, id).Scan(
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, f)
}
type CreateFeedRequest struct {
Nombre string `json:"nombre" binding:"required"`
URL string `json:"url" binding:"required,url"`
Descripcion *string `json:"descripcion"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
}
func CreateFeed(c *gin.Context) {
var req CreateFeedRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
var feedID int64
err := db.GetPool().QueryRow(c.Request.Context(), `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id`,
req.Nombre, req.Descripcion, req.URL, req.CategoriaID, req.PaisID, req.Idioma,
).Scan(&feedID)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create feed", Message: err.Error()})
return
}
c.JSON(http.StatusCreated, gin.H{"id": feedID, "message": "Feed created successfully"})
}
type UpdateFeedRequest struct {
Nombre string `json:"nombre" binding:"required"`
URL string `json:"url" binding:"required,url"`
Descripcion *string `json:"descripcion"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
Activo *bool `json:"activo"`
}
func UpdateFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
var req UpdateFeedRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
activeVal := true
if req.Activo != nil {
activeVal = *req.Activo
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds
SET nombre = $1, descripcion = $2, url = $3,
categoria_id = $4, pais_id = $5, idioma = $6, activo = $7
WHERE id = $8`,
req.Nombre, req.Descripcion, req.URL,
req.CategoriaID, req.PaisID, req.Idioma, activeVal, id,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to update feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed updated successfully"})
}
func DeleteFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM feeds WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed deleted successfully"})
}
func ToggleFeedActive(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds SET activo = NOT activo WHERE id = $1`, id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to toggle feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed toggled successfully"})
}
func ReactivateFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = $1`, id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to reactivate feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed reactivated successfully"})
}
func ExportFeeds(c *gin.Context) {
activo := c.Query("activo")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
where := "1=1"
args := []interface{}{}
argNum := 1
if activo != "" {
where += fmt.Sprintf(" AND activo = $%d", argNum)
args = append(args, activo == "true")
argNum++
}
if categoriaID != "" {
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
args = append(args, categoriaID)
argNum++
}
if paisID != "" {
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
args = append(args, paisID)
argNum++
}
query := fmt.Sprintf(`
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, c.nombre AS categoria,
f.pais_id, p.nombre AS pais,
f.idioma, f.activo, f.fallos
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
WHERE %s
ORDER BY f.id
`, where)
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
return
}
defer rows.Close()
c.Header("Content-Type", "text/csv")
c.Header("Content-Disposition", "attachment; filename=feeds_export.csv")
writer := csv.NewWriter(c.Writer)
defer writer.Flush()
writer.Write([]string{"id", "nombre", "descripcion", "url", "categoria_id", "categoria", "pais_id", "pais", "idioma", "activo", "fallos"})
for rows.Next() {
var id int64
var nombre, url string
var descripcion, idioma *string
var categoriaID, paisID, fallos *int64
var activo bool
var categoria, pais *string
err := rows.Scan(&id, &nombre, &descripcion, &url, &categoriaID, &categoria, &paisID, &pais, &idioma, &activo, &fallos)
if err != nil {
continue
}
writer.Write([]string{
fmt.Sprintf("%d", id),
nombre,
stringOrEmpty(descripcion),
url,
int64ToString(categoriaID),
stringOrEmpty(categoria),
int64ToString(paisID),
stringOrEmpty(pais),
stringOrEmpty(idioma),
fmt.Sprintf("%t", activo),
int64ToString(fallos),
})
}
}
func ImportFeeds(c *gin.Context) {
file, err := c.FormFile("file")
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "No file provided"})
return
}
f, err := file.Open()
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to open file", Message: err.Error()})
return
}
defer f.Close()
content, err := io.ReadAll(f)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to read file", Message: err.Error()})
return
}
reader := csv.NewReader(strings.NewReader(string(content)))
_, err = reader.Read()
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid CSV format"})
return
}
imported := 0
skipped := 0
failed := 0
errors := []string{}
tx, err := db.GetPool().Begin(context.Background())
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to start transaction", Message: err.Error()})
return
}
defer tx.Rollback(context.Background())
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
failed++
continue
}
if len(record) < 4 {
skipped++
continue
}
nombre := strings.TrimSpace(record[1])
url := strings.TrimSpace(record[3])
if nombre == "" || url == "" {
skipped++
continue
}
var descripcion *string
if len(record) > 2 && strings.TrimSpace(record[2]) != "" {
descripcionStr := strings.TrimSpace(record[2])
descripcion = &descripcionStr
}
var categoriaID *int64
if len(record) > 4 && strings.TrimSpace(record[4]) != "" {
catID, err := strconv.ParseInt(strings.TrimSpace(record[4]), 10, 64)
if err == nil {
categoriaID = &catID
}
}
var paisID *int64
if len(record) > 6 && strings.TrimSpace(record[6]) != "" {
pID, err := strconv.ParseInt(strings.TrimSpace(record[6]), 10, 64)
if err == nil {
paisID = &pID
}
}
var idioma *string
if len(record) > 8 && strings.TrimSpace(record[8]) != "" {
lang := strings.TrimSpace(record[8])
if len(lang) > 2 {
lang = lang[:2]
}
idioma = &lang
}
activo := true
if len(record) > 9 && strings.TrimSpace(record[9]) != "" {
activo = strings.ToLower(strings.TrimSpace(record[9])) == "true"
}
var fallos int64
if len(record) > 10 && strings.TrimSpace(record[10]) != "" {
f, err := strconv.ParseInt(strings.TrimSpace(record[10]), 10, 64)
if err == nil {
fallos = f
}
}
var existingID int64
err = tx.QueryRow(context.Background(), "SELECT id FROM feeds WHERE url = $1", url).Scan(&existingID)
if err == nil {
_, err = tx.Exec(context.Background(), `
UPDATE feeds SET nombre=$1, descripcion=$2, categoria_id=$3, pais_id=$4, idioma=$5, activo=$6, fallos=$7
WHERE id=$8`,
nombre, descripcion, categoriaID, paisID, idioma, activo, fallos, existingID,
)
if err != nil {
failed++
errors = append(errors, fmt.Sprintf("Error updating %s: %v", url, err))
continue
}
} else {
_, err = tx.Exec(context.Background(), `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
nombre, descripcion, url, categoriaID, paisID, idioma, activo, fallos,
)
if err != nil {
failed++
errors = append(errors, fmt.Sprintf("Error inserting %s: %v", url, err))
continue
}
}
imported++
}
if err := tx.Commit(context.Background()); err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to commit transaction", Message: err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"imported": imported,
"skipped": skipped,
"failed": failed,
"errors": errors,
"message": fmt.Sprintf("Import completed. Imported: %d, Skipped: %d, Failed: %d", imported, skipped, failed),
})
}
func stringOrEmpty(s *string) string {
if s == nil {
return ""
}
return *s
}
func int64ToString(i *int64) string {
if i == nil {
return ""
}
return fmt.Sprintf("%d", *i)
}

View file

@ -0,0 +1,369 @@
package handlers
import (
"fmt"
"net/http"
"strconv"
"time"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
type NewsResponse struct {
ID string `json:"id"`
Titulo string `json:"titulo"`
Resumen string `json:"resumen"`
URL string `json:"url"`
Fecha *time.Time `json:"fecha"`
ImagenURL *string `json:"imagen_url"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
FuenteNombre string `json:"fuente_nombre"`
TitleTranslated *string `json:"title_translated"`
SummaryTranslated *string `json:"summary_translated"`
LangTranslated *string `json:"lang_translated"`
Entities []Entity `json:"entities,omitempty"`
}
func GetNews(c *gin.Context) {
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
query := c.Query("q")
categoryID := c.Query("category_id")
countryID := c.Query("country_id")
translatedOnly := c.Query("translated_only") == "true"
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 30
}
offset := (page - 1) * perPage
where := "1=1"
args := []interface{}{}
argNum := 1
if query != "" {
where += fmt.Sprintf(" AND (n.titulo ILIKE $%d OR n.resumen ILIKE $%d)", argNum, argNum)
args = append(args, "%"+query+"%")
argNum++
}
if categoryID != "" {
where += fmt.Sprintf(" AND n.categoria_id = $%d", argNum)
args = append(args, categoryID)
argNum++
}
if countryID != "" {
where += fmt.Sprintf(" AND n.pais_id = $%d", argNum)
args = append(args, countryID)
argNum++
}
if translatedOnly {
where += " AND t.status = 'done' AND t.titulo_trad IS NOT NULL AND t.titulo_trad != n.titulo"
}
var total int
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM noticias n LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' WHERE %s", where)
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count news", Message: err.Error()})
return
}
if total == 0 {
c.JSON(http.StatusOK, models.NewsListResponse{
News: []models.NewsWithTranslations{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
})
return
}
sqlQuery := fmt.Sprintf(`
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
n.categoria_id, n.pais_id, n.fuente_nombre,
t.titulo_trad,
t.resumen_trad,
t.lang_to as lang_trad
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
WHERE %s
ORDER BY n.fecha DESC LIMIT $%d OFFSET $%d
`, where, argNum, argNum+1)
args = append(args, perPage, offset)
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch news", Message: err.Error()})
return
}
defer rows.Close()
var newsList []NewsResponse
for rows.Next() {
var n NewsResponse
var imagenURL, fuenteNombre *string
var categoriaID, paisID *int32
err := rows.Scan(
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
&categoriaID, &paisID, &fuenteNombre,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
continue
}
if imagenURL != nil {
n.ImagenURL = imagenURL
}
if fuenteNombre != nil {
n.FuenteNombre = *fuenteNombre
}
if categoriaID != nil {
catID := int64(*categoriaID)
n.CategoriaID = &catID
}
if paisID != nil {
pID := int64(*paisID)
n.PaisID = &pID
}
newsList = append(newsList, n)
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, gin.H{
"news": newsList,
"total": total,
"page": page,
"per_page": perPage,
"total_pages": totalPages,
})
}
func GetNewsByID(c *gin.Context) {
id := c.Param("id")
sqlQuery := `
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
n.categoria_id, n.pais_id, n.fuente_nombre,
t.titulo_trad,
t.resumen_trad,
t.lang_to as lang_trad
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
WHERE n.id = $1`
var n NewsResponse
var imagenURL, fuenteNombre *string
var categoriaID, paisID *int32
err := db.GetPool().QueryRow(c.Request.Context(), sqlQuery, id).Scan(
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
&categoriaID, &paisID, &fuenteNombre,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
return
}
if imagenURL != nil {
n.ImagenURL = imagenURL
}
if fuenteNombre != nil {
n.FuenteNombre = *fuenteNombre
}
if categoriaID != nil {
catID := int64(*categoriaID)
n.CategoriaID = &catID
}
if paisID != nil {
pID := int64(*paisID)
n.PaisID = &pID
}
// Fetch entities for this news
entitiesQuery := `
SELECT t.valor, t.tipo, 1 as cnt, t.wiki_summary, t.wiki_url, t.image_path
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
WHERE tr.noticia_id = $1 AND t.tipo IN ('persona', 'organizacion')
`
rows, err := db.GetPool().Query(c.Request.Context(), entitiesQuery, id)
var entities []Entity
if err == nil {
defer rows.Close()
for rows.Next() {
var e Entity
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err == nil {
entities = append(entities, e)
}
}
}
if entities == nil {
entities = []Entity{}
}
n.Entities = entities
c.JSON(http.StatusOK, n)
}
func DeleteNews(c *gin.Context) {
id := c.Param("id")
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM noticias WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete news", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "News deleted successfully"})
}
type Entity struct {
Valor string `json:"valor"`
Tipo string `json:"tipo"`
Count int `json:"count"`
WikiSummary *string `json:"wiki_summary"`
WikiURL *string `json:"wiki_url"`
ImagePath *string `json:"image_path"`
}
type EntityListResponse struct {
Entities []Entity `json:"entities"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
func GetEntities(c *gin.Context) {
countryID := c.Query("country_id")
categoryID := c.Query("category_id")
entityType := c.DefaultQuery("tipo", "persona")
q := c.Query("q")
pageStr := c.DefaultQuery("page", "1")
perPageStr := c.DefaultQuery("per_page", "50")
page, _ := strconv.Atoi(pageStr)
perPage, _ := strconv.Atoi(perPageStr)
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 50
}
offset := (page - 1) * perPage
where := "t.tipo = $1"
args := []interface{}{entityType}
if countryID != "" {
where += fmt.Sprintf(" AND n.pais_id = $%d", len(args)+1)
args = append(args, countryID)
}
if categoryID != "" {
where += fmt.Sprintf(" AND n.categoria_id = $%d", len(args)+1)
args = append(args, categoryID)
}
if q != "" {
where += fmt.Sprintf(" AND COALESCE(ea.canonical_name, t.valor) ILIKE $%d", len(args)+1)
args = append(args, "%"+q+"%")
}
// 1. Get the total count of distinct canonical entities matching the filter
countQuery := fmt.Sprintf(`
SELECT COUNT(DISTINCT COALESCE(ea.canonical_name, t.valor))
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
WHERE %s
`, where)
var total int
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities count", Message: err.Error()})
return
}
if total == 0 {
c.JSON(http.StatusOK, EntityListResponse{
Entities: []Entity{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
})
return
}
// 2. Fetch the paginated entities
args = append(args, perPage, offset)
query := fmt.Sprintf(`
SELECT COALESCE(ea.canonical_name, t.valor) as valor, t.tipo, COUNT(*)::int as cnt,
MAX(t.wiki_summary), MAX(t.wiki_url), MAX(t.image_path)
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
WHERE %s
GROUP BY COALESCE(ea.canonical_name, t.valor), t.tipo
ORDER BY cnt DESC
LIMIT $%d OFFSET $%d
`, where, len(args)-1, len(args))
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities", Message: err.Error()})
return
}
defer rows.Close()
var entities []Entity
for rows.Next() {
var e Entity
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err != nil {
continue
}
entities = append(entities, e)
}
if entities == nil {
entities = []Entity{}
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, EntityListResponse{
Entities: entities,
Total: total,
Page: page,
PerPage: perPage,
TotalPages: totalPages,
})
}

View file

@ -0,0 +1,265 @@
package handlers
import (
"net/http"
"strconv"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
"github.com/rss2/backend/internal/services"
)
func SearchNews(c *gin.Context) {
query := c.Query("q")
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
lang := c.DefaultQuery("lang", "")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
useSemantic := c.Query("semantic") == "true"
if query == "" && categoriaID == "" && paisID == "" && lang == "" {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "At least one filter is required (q, categoria_id, pais_id, or lang)"})
return
}
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 30
}
// Default to Spanish if no lang specified
if lang == "" {
lang = "es"
}
ctx := c.Request.Context()
if useSemantic {
results, err := services.SemanticSearch(ctx, query, lang, page, perPage)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Semantic search failed", Message: err.Error()})
return
}
c.JSON(http.StatusOK, results)
return
}
offset := (page - 1) * perPage
// Build dynamic query
args := []interface{}{}
argNum := 1
whereClause := "WHERE 1=1"
if query != "" {
whereClause += " AND (n.titulo ILIKE $" + strconv.Itoa(argNum) + " OR n.resumen ILIKE $" + strconv.Itoa(argNum) + " OR n.contenido ILIKE $" + strconv.Itoa(argNum) + ")"
args = append(args, "%"+query+"%")
argNum++
}
if lang != "" {
whereClause += " AND t.lang_to = $" + strconv.Itoa(argNum)
args = append(args, lang)
argNum++
}
if categoriaID != "" {
whereClause += " AND n.categoria_id = $" + strconv.Itoa(argNum)
catID, err := strconv.ParseInt(categoriaID, 10, 64)
if err == nil {
args = append(args, catID)
argNum++
}
}
if paisID != "" {
whereClause += " AND n.pais_id = $" + strconv.Itoa(argNum)
pID, err := strconv.ParseInt(paisID, 10, 64)
if err == nil {
args = append(args, pID)
argNum++
}
}
args = append(args, perPage, offset)
sqlQuery := `
SELECT n.id, n.titulo, n.resumen, n.contenido, n.url, n.imagen,
n.feed_id, n.lang, n.categoria_id, n.pais_id, n.created_at, n.updated_at,
COALESCE(t.titulo_trad, '') as titulo_trad,
COALESCE(t.resumen_trad, '') as resumen_trad,
t.lang_to as lang_trad,
f.nombre as fuente_nombre
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $` + strconv.Itoa(argNum) + `
LEFT JOIN feeds f ON f.id = n.feed_id
` + whereClause + `
ORDER BY n.created_at DESC
LIMIT $` + strconv.Itoa(argNum+1) + ` OFFSET $` + strconv.Itoa(argNum+2)
rows, err := db.GetPool().Query(ctx, sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Search failed", Message: err.Error()})
return
}
defer rows.Close()
var newsList []models.NewsWithTranslations
for rows.Next() {
var n models.NewsWithTranslations
var imagen *string
err := rows.Scan(
&n.ID, &n.Title, &n.Summary, &n.Content, &n.URL, &imagen,
&n.FeedID, &n.Lang, &n.CategoryID, &n.CountryID, &n.CreatedAt, &n.UpdatedAt,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
continue
}
if imagen != nil {
n.ImageURL = imagen
}
newsList = append(newsList, n)
}
// Get total count
countArgs := args[:len(args)-2]
// Remove LIMIT/OFFSET from args for count
var total int
err = db.GetPool().QueryRow(ctx, `
SELECT COUNT(*) FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $`+strconv.Itoa(argNum)+`
`+whereClause, countArgs...).Scan(&total)
if err != nil {
total = len(newsList)
}
totalPages := (total + perPage - 1) / perPage
response := models.NewsListResponse{
News: newsList,
Total: total,
Page: page,
PerPage: perPage,
TotalPages: totalPages,
}
c.JSON(http.StatusOK, response)
}
func GetStats(c *gin.Context) {
var stats models.Stats
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT
(SELECT COUNT(*) FROM noticias) as total_news,
(SELECT COUNT(*) FROM feeds WHERE activo = true) as total_feeds,
(SELECT COUNT(*) FROM users) as total_users,
(SELECT COUNT(*) FROM noticias WHERE fecha::date = CURRENT_DATE) as news_today,
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('week', CURRENT_DATE)) as news_this_week,
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('month', CURRENT_DATE)) as news_this_month,
(SELECT COUNT(DISTINCT noticia_id) FROM traducciones WHERE status = 'done') as total_translated
`).Scan(
&stats.TotalNews, &stats.TotalFeeds, &stats.TotalUsers,
&stats.NewsToday, &stats.NewsThisWeek, &stats.NewsThisMonth,
&stats.TotalTranslated,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get stats", Message: err.Error()})
return
}
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT c.id, c.nombre, COUNT(n.id) as count
FROM categorias c
LEFT JOIN noticias n ON n.categoria_id = c.id
GROUP BY c.id, c.nombre
ORDER BY count DESC
LIMIT 10
`)
if err == nil {
defer rows.Close()
for rows.Next() {
var cs models.CategoryStat
rows.Scan(&cs.CategoryID, &cs.CategoryName, &cs.Count)
stats.TopCategories = append(stats.TopCategories, cs)
}
}
rows, err = db.GetPool().Query(c.Request.Context(), `
SELECT p.id, p.nombre, p.flag_emoji, COUNT(n.id) as count
FROM paises p
LEFT JOIN noticias n ON n.pais_id = p.id
GROUP BY p.id, p.nombre, p.flag_emoji
ORDER BY count DESC
LIMIT 10
`)
if err == nil {
defer rows.Close()
for rows.Next() {
var cs models.CountryStat
rows.Scan(&cs.CountryID, &cs.CountryName, &cs.FlagEmoji, &cs.Count)
stats.TopCountries = append(stats.TopCountries, cs)
}
}
c.JSON(http.StatusOK, stats)
}
func GetCategories(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT id, nombre FROM categorias ORDER BY nombre`)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get categories", Message: err.Error()})
return
}
defer rows.Close()
type Category struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
}
var categories []Category
for rows.Next() {
var cat Category
rows.Scan(&cat.ID, &cat.Nombre)
categories = append(categories, cat)
}
c.JSON(http.StatusOK, categories)
}
func GetCountries(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT p.id, p.nombre, c.nombre as continente
FROM paises p
LEFT JOIN continentes c ON c.id = p.continente_id
ORDER BY p.nombre`)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get countries", Message: err.Error()})
return
}
defer rows.Close()
type Country struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
Continente string `json:"continente"`
}
var countries []Country
for rows.Next() {
var country Country
rows.Scan(&country.ID, &country.Nombre, &country.Continente)
countries = append(countries, country)
}
c.JSON(http.StatusOK, countries)
}

View file

@ -0,0 +1,108 @@
package middleware
import (
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt/v5"
)
var jwtSecret []byte
func SetJWTSecret(secret string) {
jwtSecret = []byte(secret)
}
type Claims struct {
UserID int64 `json:"user_id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
jwt.RegisteredClaims
}
func AuthRequired() gin.HandlerFunc {
return func(c *gin.Context) {
authHeader := c.GetHeader("Authorization")
if authHeader == "" {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Authorization header required"})
c.Abort()
return
}
tokenString := strings.TrimPrefix(authHeader, "Bearer ")
if tokenString == authHeader {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Bearer token required"})
c.Abort()
return
}
claims := &Claims{}
token, err := jwt.ParseWithClaims(tokenString, claims, func(token *jwt.Token) (interface{}, error) {
return jwtSecret, nil
})
if err != nil || !token.Valid {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Invalid token"})
c.Abort()
return
}
c.Set("user", claims)
c.Next()
}
}
func AdminRequired() gin.HandlerFunc {
return func(c *gin.Context) {
userVal, exists := c.Get("user")
if !exists {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Not authenticated"})
c.Abort()
return
}
claims := userVal.(*Claims)
if !claims.IsAdmin {
c.JSON(http.StatusForbidden, gin.H{"error": "Admin access required"})
c.Abort()
return
}
c.Next()
}
}
func CORSMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With")
c.Writer.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS, GET, PUT, DELETE, PATCH")
if c.Request.Method == "OPTIONS" {
c.AbortWithStatus(204)
return
}
c.Next()
}
}
func LoggerMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
status := c.Writer.Status()
if status >= 400 {
// Log error responses
}
}
}
func RateLimitMiddleware(requestsPerMinute int) gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
}
}

View file

@ -0,0 +1,17 @@
package models
import "time"
type EntityAlias struct {
ID int `json:"id"`
CanonicalName string `json:"canonical_name"`
Alias string `json:"alias"`
Tipo string `json:"tipo"`
CreatedAt time.Time `json:"created_at"`
}
type EntityAliasRequest struct {
CanonicalName string `json:"canonical_name" binding:"required"`
Aliases []string `json:"aliases" binding:"required,min=1"`
Tipo string `json:"tipo" binding:"required,oneof=persona organizacion lugar tema"`
}

View file

@ -0,0 +1,171 @@
package models
import (
"time"
)
type News struct {
ID int64 `json:"id"`
Title string `json:"title"`
Summary string `json:"summary"`
Content string `json:"content"`
URL string `json:"url"`
ImageURL *string `json:"image_url"`
PublishedAt *time.Time `json:"published_at"`
Lang string `json:"lang"`
FeedID int64 `json:"feed_id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type NewsWithTranslations struct {
ID int64 `json:"id"`
Title string `json:"title"`
Summary string `json:"summary"`
Content string `json:"content"`
URL string `json:"url"`
ImageURL *string `json:"image_url"`
PublishedAt *string `json:"published_at"`
Lang string `json:"lang"`
FeedID int64 `json:"feed_id"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
TitleTranslated *string `json:"title_translated"`
SummaryTranslated *string `json:"summary_translated"`
ContentTranslated *string `json:"content_translated"`
LangTranslated *string `json:"lang_translated"`
}
type Feed struct {
ID int64 `json:"id"`
Title string `json:"title"`
URL string `json:"url"`
SiteURL *string `json:"site_url"`
Description *string `json:"description"`
ImageURL *string `json:"image_url"`
Language *string `json:"language"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
Active bool `json:"active"`
LastFetched *time.Time `json:"last_fetched"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type Category struct {
ID int64 `json:"id"`
Name string `json:"name"`
Color string `json:"color"`
Icon string `json:"icon"`
ParentID *int64 `json:"parent_id"`
}
type Country struct {
ID int64 `json:"id"`
Name string `json:"name"`
Code string `json:"code"`
Continent string `json:"continent"`
FlagEmoji string `json:"flag_emoji"`
}
type Translation struct {
ID int64 `json:"id"`
NewsID int64 `json:"news_id"`
LangFrom string `json:"lang_from"`
LangTo string `json:"lang_to"`
Title string `json:"title"`
Summary string `json:"summary"`
Status string `json:"status"`
Error *string `json:"error"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type User struct {
ID int64 `json:"id"`
Email string `json:"email"`
Username string `json:"username"`
PasswordHash string `json:"-"`
IsAdmin bool `json:"is_admin"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type SearchHistory struct {
ID int64 `json:"id"`
UserID int64 `json:"user_id"`
Query string `json:"query"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
ResultsCount int `json:"results_count"`
SearchedAt time.Time `json:"searched_at"`
}
type NewsListResponse struct {
News []NewsWithTranslations `json:"news"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
type FeedListResponse struct {
Feeds []Feed `json:"feeds"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
type Stats struct {
TotalNews int64 `json:"total_news"`
TotalFeeds int64 `json:"total_feeds"`
TotalUsers int64 `json:"total_users"`
TotalTranslated int64 `json:"total_translated"`
NewsToday int64 `json:"news_today"`
NewsThisWeek int64 `json:"news_this_week"`
NewsThisMonth int64 `json:"news_this_month"`
TopCategories []CategoryStat `json:"top_categories"`
TopCountries []CountryStat `json:"top_countries"`
}
type CategoryStat struct {
CategoryID int64 `json:"category_id"`
CategoryName string `json:"category_name"`
Count int64 `json:"count"`
}
type CountryStat struct {
CountryID int64 `json:"country_id"`
CountryName string `json:"country_name"`
FlagEmoji string `json:"flag_emoji"`
Count int64 `json:"count"`
}
type LoginRequest struct {
Email string `json:"email" binding:"required,email"`
Password string `json:"password" binding:"required,min=6"`
}
type RegisterRequest struct {
Email string `json:"email" binding:"required,email"`
Username string `json:"username" binding:"required,min=3,max=50"`
Password string `json:"password" binding:"required,min=6"`
}
type AuthResponse struct {
Token string `json:"token"`
User User `json:"user"`
IsFirstUser bool `json:"is_first_user,omitempty"`
}
type ErrorResponse struct {
Error string `json:"error"`
Message string `json:"message,omitempty"`
}
type SuccessResponse struct {
Message string `json:"message"`
}

View file

@ -0,0 +1,170 @@
package services
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/models"
)
var (
cfg *config.Config
)
func Init(c *config.Config) {
cfg = c
}
type TranslationRequest struct {
SourceLang string `json:"source_lang"`
TargetLang string `json:"target_lang"`
Texts []string `json:"texts"`
}
type TranslationResponse struct {
Translations []string `json:"translations"`
}
func Translate(ctx context.Context, sourceLang, targetLang string, texts []string) ([]string, error) {
if len(texts) == 0 {
return nil, nil
}
reqBody := TranslationRequest{
SourceLang: sourceLang,
TargetLang: targetLang,
Texts: texts,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 30 * time.Second}
resp, err := httpClient.Post(cfg.TranslationURL+"/translate", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("translation request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("translation service returned status %d", resp.StatusCode)
}
var result TranslationResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Translations, nil
}
type EmbeddingRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
}
type EmbeddingResponse struct {
Embeddings [][]float64 `json:"embeddings"`
}
func GetEmbeddings(ctx context.Context, texts []string) ([][]float64, error) {
if len(texts) == 0 {
return nil, nil
}
reqBody := EmbeddingRequest{
Model: "mxbai-embed-large",
Input: texts,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 60 * time.Second}
resp, err := httpClient.Post(cfg.OllamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("embeddings request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("embeddings service returned status %d", resp.StatusCode)
}
var result EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Embeddings, nil
}
type NERRequest struct {
Text string `json:"text"`
}
type NERResponse struct {
Entities []Entity `json:"entities"`
}
type Entity struct {
Text string `json:"text"`
Label string `json:"label"`
Start int `json:"start"`
End int `json:"end"`
}
func ExtractEntities(ctx context.Context, text string) ([]Entity, error) {
reqBody := NERRequest{Text: text}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 30 * time.Second}
resp, err := httpClient.Post(cfg.SpacyURL+"/ner", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("NER request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("NER service returned status %d", resp.StatusCode)
}
var result NERResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Entities, nil
}
func SemanticSearch(ctx context.Context, query, lang string, page, perPage int) (*models.NewsListResponse, error) {
embeddings, err := GetEmbeddings(ctx, []string{query})
if err != nil {
return nil, err
}
if len(embeddings) == 0 {
return &models.NewsListResponse{}, nil
}
return &models.NewsListResponse{
News: []models.NewsWithTranslations{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
}, nil
}

View file

@ -0,0 +1,83 @@
package workers
import (
"context"
"fmt"
"os"
"strconv"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
var pool *pgxpool.Pool
type Config struct {
Host string
Port int
DBName string
User string
Password string
}
func LoadDBConfig() *Config {
return &Config{
Host: getEnv("DB_HOST", "localhost"),
Port: getEnvInt("DB_PORT", 5432),
DBName: getEnv("DB_NAME", "rss"),
User: getEnv("DB_USER", "rss"),
Password: getEnv("DB_PASS", "rss"),
}
}
func Connect(cfg *Config) error {
dsn := fmt.Sprintf("postgres://%s:%s@%s:%d/%s?sslmode=disable",
cfg.User, cfg.Password, cfg.Host, cfg.Port, cfg.DBName)
poolConfig, err := pgxpool.ParseConfig(dsn)
if err != nil {
return fmt.Errorf("failed to parse config: %w", err)
}
poolConfig.MaxConns = 25
poolConfig.MinConns = 5
poolConfig.MaxConnLifetime = time.Hour
poolConfig.MaxConnIdleTime = 30 * time.Minute
pool, err = pgxpool.NewWithConfig(context.Background(), poolConfig)
if err != nil {
return fmt.Errorf("failed to create pool: %w", err)
}
if err = pool.Ping(context.Background()); err != nil {
return fmt.Errorf("failed to ping database: %w", err)
}
return nil
}
func GetPool() *pgxpool.Pool {
return pool
}
func Close() {
if pool != nil {
pool.Close()
}
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}

BIN
backend/server Executable file

Binary file not shown.

188
cache.py
View file

@ -1,188 +0,0 @@
"""
Redis cache module for high-traffic endpoints.
Provides caching decorator and invalidation utilities.
"""
import redis
import json
import logging
import hashlib
import time
from functools import wraps
from config import REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_TTL_DEFAULT
logger = logging.getLogger(__name__)
_redis_client = None
_redis_last_fail = 0
def get_redis():
"""Get Redis client singleton with failure backoff."""
global _redis_client, _redis_last_fail
if _redis_client is not None:
return _redis_client
# Prevent retrying too often if it's failing (60s backoff)
now = time.time()
if now - _redis_last_fail < 60:
return None
try:
redis_config = {
'host': REDIS_HOST,
'port': REDIS_PORT,
'decode_responses': True,
'socket_connect_timeout': 1, # Faster timeout
'socket_timeout': 1
}
if REDIS_PASSWORD:
redis_config['password'] = REDIS_PASSWORD
_redis_client = redis.Redis(**redis_config)
_redis_client.ping()
_redis_last_fail = 0
return _redis_client
except Exception as e:
logger.warning(f"Redis connection failed: {e}. Caching disabled for 60s.")
_redis_client = None
_redis_last_fail = now
return None
def cached(ttl_seconds=None, prefix="cache"):
"""
Decorator for caching function results in Redis.
Falls back to calling function directly if Redis is unavailable.
Args:
ttl_seconds: Time to live in seconds (default from config)
prefix: Key prefix for cache entries
"""
if ttl_seconds is None:
ttl_seconds = REDIS_TTL_DEFAULT
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
r = get_redis()
if r is None:
# Redis unavailable, call function directly
return func(*args, **kwargs)
# Build cache key from function name and arguments
# Use md5 for deterministic hash across processes
key_data = f"{args}:{sorted(kwargs.items())}"
# Add flask request args if available to prevent collision on filtered routes
try:
from flask import request
if request:
key_data += f":args:{sorted(request.args.items())}"
except Exception:
pass
key_hash = hashlib.md5(key_data.encode('utf-8')).hexdigest()
cache_key = f"cache:{prefix}:{func.__name__}:{key_hash}"
try:
# Try to get from cache
cached_value = r.get(cache_key)
if cached_value is not None:
# If it's a JSON response, we might need to return it correctly
try:
data = json.loads(cached_value)
# Detect if we should return as JSON
from flask import jsonify
return jsonify(data)
except (json.JSONDecodeError, ImportError):
return cached_value
# Cache miss - call function and cache result
result = func(*args, **kwargs)
# Handle Flask Response objects
cache_data = result
try:
from flask import Response
if isinstance(result, Response):
if result.is_json:
cache_data = result.get_json()
else:
cache_data = result.get_data(as_text=True)
except (ImportError, Exception):
pass
r.setex(cache_key, ttl_seconds, json.dumps(cache_data, default=str))
return result
except (redis.RedisError, json.JSONDecodeError) as e:
logger.warning(f"Cache error for {func.__name__}: {e}")
return func(*args, **kwargs)
return wrapper
return decorator
def invalidate_pattern(pattern):
"""
Invalidate all cache keys matching pattern.
Args:
pattern: Pattern to match (e.g., "home:*" or "stats:*")
"""
r = get_redis()
if r is None:
return
try:
cursor = 0
deleted = 0
while True:
cursor, keys = r.scan(cursor, match=f"cache:{pattern}", count=100)
if keys:
r.delete(*keys)
deleted += len(keys)
if cursor == 0:
break
if deleted > 0:
logger.info(f"Invalidated {deleted} cache keys matching '{pattern}'")
except redis.RedisError as e:
logger.warning(f"Cache invalidation failed: {e}")
def cache_get(key):
"""Get value from cache by key."""
r = get_redis()
if r is None:
return None
try:
value = r.get(f"cache:{key}")
return json.loads(value) if value else None
except (redis.RedisError, json.JSONDecodeError):
return None
def cache_set(key, value, ttl_seconds=None):
"""Set value in cache with optional TTL."""
if ttl_seconds is None:
ttl_seconds = REDIS_TTL_DEFAULT
r = get_redis()
if r is None:
return False
try:
r.setex(f"cache:{key}", ttl_seconds, json.dumps(value, default=str))
return True
except redis.RedisError:
return False
def cache_del(key):
"""Delete a key from cache."""
r = get_redis()
if r is None:
return False
try:
r.delete(f"cache:{key}")
return True
except redis.RedisError:
return False

View file

@ -1,69 +0,0 @@
import os
from dotenv import load_dotenv
load_dotenv()
DB_CONFIG = {
"dbname": os.getenv("DB_NAME", "rss"),
"user": os.getenv("DB_USER", "rss"),
"password": os.getenv("DB_PASS", ""),
"host": os.getenv("DB_HOST", "localhost"),
"port": int(os.getenv("DB_PORT", 5432)),
}
# Write DB (primary) - for workers/ingestion
DB_WRITE_CONFIG = {
"dbname": os.getenv("DB_NAME", "rss"),
"user": os.getenv("DB_USER", "rss"),
"password": os.getenv("DB_PASS", ""),
"host": os.getenv("DB_WRITE_HOST", os.getenv("DB_HOST", "localhost")),
"port": int(os.getenv("DB_PORT", 5432)),
}
# Read DB (replica) - for web queries
DB_READ_CONFIG = {
"dbname": os.getenv("DB_NAME", "rss"),
"user": os.getenv("DB_USER", "rss"),
"password": os.getenv("DB_PASS", ""),
"host": os.getenv("DB_READ_HOST", os.getenv("DB_HOST", "localhost")),
"port": int(os.getenv("DB_PORT", 5432)),
}
# Redis Cache
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None) # None = sin autenticación (para compatibilidad)
REDIS_TTL_DEFAULT = int(os.getenv("REDIS_TTL_DEFAULT", 60))
SECRET_KEY = os.getenv("SECRET_KEY", "CAMBIA_ESTA_CLAVE_POR_ALGO_LARGO_Y_ALEATORIO")
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "es")
DEFAULT_TRANSLATION_LANG = os.getenv("DEFAULT_TRANSLATION_LANG", "es")
WEB_TRANSLATED_DEFAULT = int(os.getenv("WEB_TRANSLATED_DEFAULT", "1"))
# Configuración de paginación
NEWS_PER_PAGE_DEFAULT = 30 # Reducido de 50 para mejor rendimiento
RSS_MAX_WORKERS = int(os.getenv("RSS_MAX_WORKERS", "3")) # Reducido de 10 a 3
RSS_FEED_TIMEOUT = int(os.getenv("RSS_FEED_TIMEOUT", "60")) # Aumentado timeout
RSS_MAX_FAILURES = int(os.getenv("RSS_MAX_FAILURES", "5"))
TARGET_LANGS = os.getenv("TARGET_LANGS", "es")
TRANSLATOR_BATCH = int(os.getenv("TRANSLATOR_BATCH", "2")) # Reducido de 4 a 2
ENQUEUE = int(os.getenv("ENQUEUE", "50")) # Reducido de 200 a 50
TRANSLATOR_SLEEP_IDLE = float(os.getenv("TRANSLATOR_SLEEP_IDLE", "10")) # Aumentado de 5 a 10
MAX_SRC_TOKENS = int(os.getenv("MAX_SRC_TOKENS", "512"))
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
NUM_BEAMS_TITLE = int(os.getenv("NUM_BEAMS_TITLE", "1")) # Reducido beams para menos CPU
NUM_BEAMS_BODY = int(os.getenv("NUM_BEAMS_BODY", "1"))
UNIVERSAL_MODEL = os.getenv("UNIVERSAL_MODEL", "facebook/nllb-200-1.3B")
DEVICE = os.getenv("DEVICE", "cpu")
TOKENIZERS_PARALLELISM = os.getenv("TOKENIZERS_PARALLELISM", "false")
PYTHONUNBUFFERED = os.getenv("PYTHONUNBUFFERED", "1")
PYTORCH_CUDA_ALLOC_CONF = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")

76
db.py
View file

@ -1,76 +0,0 @@
import os
import psycopg2
from contextlib import contextmanager
# Database configuration
DB_HOST = os.environ.get("DB_HOST", "db")
DB_NAME = os.environ.get("DB_NAME", "rss")
DB_USER = os.environ.get("DB_USER", "rss")
DB_PASS = os.environ.get("DB_PASS", "x")
DB_PORT = os.environ.get("DB_PORT", "5432")
DB_READ_HOST = os.environ.get("DB_READ_HOST", "db-replica")
DB_WRITE_HOST = os.environ.get("DB_WRITE_HOST", "db")
@contextmanager
def get_conn():
"""Get a database connection (Default: Primary/Write)."""
conn = None
try:
conn = psycopg2.connect(
host=DB_HOST,
database=DB_NAME,
user=DB_USER,
password=DB_PASS,
port=DB_PORT
)
yield conn
finally:
if conn:
conn.close()
@contextmanager
def get_read_conn():
"""Get a read-only database connection (Replica)."""
conn = None
try:
try:
# Attempt to connect to Replica first
conn = psycopg2.connect(
host=DB_READ_HOST,
database=DB_NAME,
user=DB_USER,
password=DB_PASS,
port=DB_PORT,
connect_timeout=5
)
except (psycopg2.OperationalError, psycopg2.InterfaceError) as e:
# Fallback to Primary if Replica is down on initial connection
print(f"Warning: Replica unreachable ({e}), falling back to Primary for read.")
conn = psycopg2.connect(
host=DB_WRITE_HOST,
database=DB_NAME,
user=DB_USER,
password=DB_PASS,
port=DB_PORT
)
yield conn
finally:
if conn:
conn.close()
@contextmanager
def get_write_conn():
"""Get a write database connection (Primary)."""
conn = None
try:
conn = psycopg2.connect(
host=DB_WRITE_HOST,
database=DB_NAME,
user=DB_USER,
password=DB_PASS,
port=DB_PORT
)
yield conn
finally:
if conn:
conn.close()

47
deploy-clean.sh Executable file
View file

@ -0,0 +1,47 @@
#!/bin/bash
# Script para despliegue limpio de RSS2
echo "=== RSS2 Clean Deployment Script ==="
echo ""
# Detener contenedores
echo "1. Deteniendo contenedores..."
docker compose down -v 2>/dev/null
# Eliminar volúmenes de datos (si hay permisos)
echo "2. Eliminando volúmenes de datos..."
docker volume rm rss2_db 2>/dev/null || true
docker volume rm rss2_redis 2>/dev/null || true
# Si los volúmenes Docker tienen problemas, intentar con rm
echo " Intentando limpiar /data/..."
sudo rm -rf /datos/rss2/data/pgdata 2>/dev/null || true
sudo rm -rf /datos/rss2/data/redis-data 2>/dev/null || true
# Iniciar base de datos
echo "3. Iniciando base de datos..."
docker compose up -d db
# Esperar a que esté lista
echo "4. Esperando a que la base de datos esté lista..."
sleep 10
# Verificar estado
if docker compose ps db | grep -q "healthy"; then
echo " ✓ Base de datos iniciada correctamente"
# Ejecutar script de schema
echo "5. Ejecutando script de inicialización..."
docker compose exec -T db psql -U rss -d rss -f /docker-entrypoint-initdb.d/00-complete-schema.sql 2>&1 | tail -5
# Iniciar demás servicios
echo "6. Iniciando servicios..."
docker compose up -d redis backend-go rss2_frontend nginx rss-ingestor-go
echo ""
echo "=== Despliegue completado ==="
echo "Accede a: http://localhost:8001"
else
echo " ✗ Error: La base de datos no está healthy"
docker compose logs db
fi

View file

@ -12,47 +12,16 @@ services:
LC_ALL: C.UTF-8
TZ: Europe/Madrid
PGDATA: /var/lib/postgresql/data/18/main
command:
[
"postgres",
"-c",
"max_connections=200",
"-c",
"shared_buffers=4GB",
"-c",
"effective_cache_size=12GB",
"-c",
"work_mem=16MB",
"-c",
"maintenance_work_mem=512MB",
"-c",
"autovacuum_max_workers=3",
"-c",
"autovacuum_vacuum_scale_factor=0.02",
"-c",
"autovacuum_vacuum_cost_limit=1000",
# Parallel Query Optimization (Adjusted)
"-c",
"max_worker_processes=8",
"-c",
"max_parallel_workers=6",
"-c",
"max_parallel_workers_per_gather=2",
# Streaming Replication
"-c",
"wal_level=replica",
"-c",
"max_wal_senders=5",
"-c",
"wal_keep_size=1GB",
"-c",
"hot_standby=on"
]
volumes:
- ./pgdata:/var/lib/postgresql/data
- ./init-db:/docker-entrypoint-initdb.d:ro
- ./data/pgdata:/var/lib/postgresql/data
- ./init-db:/docker-entrypoint-initdb.d:rw
- ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro
entrypoint: ["bash", "/docker-entrypoint-db.sh"]
networks:
- backend
backend:
aliases:
- db
- rss2_db
restart: unless-stopped
healthcheck:
test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ]
@ -67,40 +36,6 @@ services:
reservations:
memory: 4G
db-replica:
build:
context: .
dockerfile: Dockerfile.replica
container_name: rss2_db_replica
shm_size: 2gb
environment:
POSTGRES_DB: ${POSTGRES_DB:-rss}
POSTGRES_USER: ${POSTGRES_USER:-rss}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
PGDATA: /var/lib/postgresql/data
TZ: Europe/Madrid
command: [ "postgres", "-c", "max_connections=200", "-c", "shared_buffers=256MB", "-c", "effective_cache_size=2GB", "-c", "hot_standby=on", "-c", "max_worker_processes=16", "-c", "hot_standby_feedback=on", "-c", "max_standby_streaming_delay=300s" ]
volumes:
- ./pgdata-replica:/var/lib/postgresql/data
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
healthcheck:
test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U rss -d rss || exit 1" ]
interval: 5s
timeout: 5s
retries: 30
start_period: 30s
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 2G
redis:
image: redis:7-alpine
container_name: rss2_redis
@ -110,11 +45,14 @@ services:
command: >
redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD}
volumes:
- ./redis-data:/data
- ./data/redis-data:/data
- /etc/timezone:/etc/timezone:ro
- /etc/localtime:/etc/localtime:ro
networks:
- backend
backend:
aliases:
- redis
- rss2_redis
restart: unless-stopped
healthcheck:
test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ]
@ -156,73 +94,80 @@ services:
reservations:
memory: 512M
rss-tasks:
build: .
container_name: rss2_tasks_py
command: bash -lc "python -m scheduler"
langdetect:
build:
context: .
dockerfile: Dockerfile
container_name: rss2_langdetect_py
command: bash -lc "python -m workers.langdetect_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
REDIS_HOST: redis
REDIS_PORT: 6379
REDIS_PASSWORD: ${REDIS_PASSWORD}
LANG_DETECT_SLEEP: 60
LANG_DETECT_BATCH: 1000
TZ: Europe/Madrid
volumes:
- ./workers:/app/workers
networks:
- backend
depends_on:
db:
condition: service_healthy
redis:
restart: unless-stopped
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
# ==================================================================================
# SCRAPER WORKER (Go) - Extrae artículos de URLs
# ==================================================================================
scraper:
build:
context: .
dockerfile: Dockerfile.scraper
container_name: rss2_scraper
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
SCRAPER_SLEEP: 60
SCRAPER_BATCH: 10
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1'
memory: 1G
memory: 512M
url-worker:
# ==================================================================================
# DISCOVERY WORKER (Go) - Descubre RSS feeds
# ==================================================================================
discovery:
build:
context: .
dockerfile: Dockerfile.url_worker
container_name: rss2_url_worker
command: bash -lc "python -m workers.url_worker_daemon"
dockerfile: Dockerfile.discovery
container_name: rss2_discovery
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
DB_READ_HOST: db
DB_WRITE_HOST: db
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2'
memory: 2G
url-discovery-worker:
build: .
container_name: rss2_url_discovery
command: bash -lc "python -m workers.url_discovery_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
URL_DISCOVERY_INTERVAL_MIN: 15
URL_DISCOVERY_BATCH_SIZE: 10
DISCOVERY_INTERVAL: 900
DISCOVERY_BATCH: 10
MAX_FEEDS_PER_URL: 5
TZ: Europe/Madrid
networks:
@ -235,104 +180,109 @@ services:
resources:
limits:
cpus: '1'
memory: 1G
memory: 512M
rss2_web:
build: .
container_name: rss2_web
command: bash -lc "gunicorn --config gunicorn_config.py app:app"
volumes:
# SEGURIDAD: Código en read-only donde sea posible
- ./app.py:/app/app.py:ro
- ./routers:/app/routers:ro
- ./models:/app/models:ro
- ./utils:/app/utils:ro
- ./templates:/app/templates:ro
- ./static:/app/static:ro
- ./config.py:/app/config.py:ro
- ./db.py:/app/db.py:ro
- ./cache.py:/app/cache.py:ro
- ./gunicorn_config.py:/app/gunicorn_config.py:ro
# Directorios escribibles
- ./hf_cache:/app/hf_cache
- ./data:/app/data
# ==================================================================================
# WIKI WORKER (Go) - Wikipedia info and thumbnails
# ==================================================================================
wiki-worker:
build:
context: .
dockerfile: Dockerfile.wiki
container_name: rss2_wiki_worker
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
DB_READ_HOST: db
DB_WRITE_HOST: db
REDIS_HOST: redis
REDIS_PORT: 6379
REDIS_PASSWORD: ${REDIS_PASSWORD}
QDRANT_HOST: qdrant
QDRANT_PORT: 6333
QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
SECRET_KEY: ${SECRET_KEY}
GUNICORN_WORKERS: 8
ALLTALK_URL: http://host.docker.internal:7851
WIKI_SLEEP: 10
TZ: Europe/Madrid
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ./data/wiki_images:/app/data/wiki_images
networks:
- frontend
- backend
depends_on:
db:
condition: service_healthy
# db-replica:
# condition: service_healthy
redis:
condition: service_healthy
qdrant:
condition: service_started
restart: unless-stopped
deploy:
resources:
limits:
cpus: '8'
memory: 8G
reservations:
memory: 4G
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
cpus: '0.5'
memory: 256M
# ==================================================================================
# BACKEND GO (API REST)
# ==================================================================================
backend-go:
build:
context: ./backend
dockerfile: Dockerfile
container_name: rss2_backend_go
environment:
TZ: Europe/Madrid
DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable
REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379
SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string}
SERVER_PORT: "8080"
volumes:
- ./data/wiki_images:/app/data/wiki_images
networks:
- backend
- frontend
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
# ==================================================================================
# FRONTEND REACT
# ==================================================================================
rss2_frontend:
build:
context: ./frontend
dockerfile: Dockerfile
container_name: rss2_frontend
environment:
TZ: Europe/Madrid
VITE_API_URL: /api
networks:
- frontend
depends_on:
- backend-go
restart: unless-stopped
# ==================================================================================
# NGINX (Puerto 8001 - sirve React + proxy API)
# ==================================================================================
nginx:
image: nginx:alpine
container_name: rss2_nginx
environment:
TZ: Europe/Madrid
ports:
# ÚNICO puerto expuesto públicamente
- "8001:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./static:/app/static:ro
- /etc/timezone:/etc/timezone:ro
- /etc/localtime:/etc/localtime:ro
networks:
- frontend
depends_on:
- rss2_web
- rss2_frontend
- backend-go
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2'
memory: 512M
# ==================================================================================
# TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3
# ==================================================================================
translator:
build:
context: .
dockerfile: Dockerfile
dockerfile: Dockerfile.translator
image: rss2-translator:latest
container_name: rss2_translator_py
command: bash -lc "python -m workers.translation_worker"
command: bash -lc "python -m workers.ctranslator_worker"
security_opt:
- seccomp=unconfined
environment:
DB_HOST: db
DB_PORT: 5432
@ -340,41 +290,36 @@ services:
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
TARGET_LANGS: es
TRANSLATOR_BATCH: 128
ENQUEUE: 300
# CTranslate2 configuration
TRANSLATOR_BATCH: 32
CT2_MODEL_PATH: /app/models/nllb-ct2
CT2_DEVICE: cuda
CT2_COMPUTE_TYPE: int8_float16
CT2_DEVICE: cpu
CT2_COMPUTE_TYPE: int8
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
TRANSLATOR_ID: ${TRANSLATOR_ID:-}
volumes:
- ./workers:/app/workers
- ./hf_cache:/app/hf_cache
- ./models:/app/models
networks:
- backend
deploy:
resources:
limits:
memory: 8G
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
profiles:
- cpu-only
depends_on:
db:
condition: service_healthy
restart: unless-stopped
translator2:
# ==================================================================================
# TRANSLATION SCHEDULER - Creates translation jobs
# ==================================================================================
translation-scheduler:
build:
context: .
dockerfile: Dockerfile
image: rss2-translator2:latest
container_name: rss2_translator_py2
command: bash -lc "python -m workers.translation_worker"
dockerfile: Dockerfile.scheduler
image: rss2-scheduler:latest
container_name: rss2_translation_scheduler
environment:
DB_HOST: db
DB_PORT: 5432
@ -382,40 +327,35 @@ services:
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
TARGET_LANGS: es
TRANSLATOR_BATCH: 128
ENQUEUE: 300
CT2_MODEL_PATH: /app/models/nllb-ct2
CT2_DEVICE: cuda
CT2_COMPUTE_TYPE: int8_float16
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
HF_HOME: /app/hf_cache
SCHEDULER_BATCH: 1000
SCHEDULER_SLEEP: 30
TZ: Europe/Madrid
volumes:
- ./hf_cache:/app/hf_cache
- ./models:/app/models
- ./workers:/app/workers
networks:
- backend
deploy:
resources:
limits:
memory: 8G
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
cpus: '0.5'
memory: 256M
depends_on:
db:
condition: service_healthy
restart: unless-stopped
translator3:
# ==================================================================================
# TRANSLATOR GPU (CTranslate2 with CUDA)
# ==================================================================================
translator-gpu:
build:
context: .
dockerfile: Dockerfile
image: rss2-translator3:latest
container_name: rss2_translator_py3
command: bash -lc "python -m workers.translation_worker"
dockerfile: Dockerfile.translator-gpu
image: rss2-translator-gpu:latest
container_name: rss2_translator_gpu
command: bash -lc "python -m workers.ctranslator_worker"
security_opt:
- seccomp=unconfined
environment:
DB_HOST: db
DB_PORT: 5432
@ -423,14 +363,15 @@ services:
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
TARGET_LANGS: es
TRANSLATOR_BATCH: 128
ENQUEUE: 300
TRANSLATOR_BATCH: 64
CT2_MODEL_PATH: /app/models/nllb-ct2
CT2_DEVICE: cuda
CT2_COMPUTE_TYPE: int8_float16
CT2_COMPUTE_TYPE: float16
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
volumes:
- ./workers:/app/workers
- ./hf_cache:/app/hf_cache
- ./models:/app/models
networks:
@ -438,7 +379,7 @@ services:
deploy:
resources:
limits:
memory: 8G
memory: 4G
reservations:
devices:
- driver: nvidia
@ -470,6 +411,7 @@ services:
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
volumes:
- ./workers:/app/workers
- ./hf_cache:/app/hf_cache
networks:
- backend
@ -487,19 +429,53 @@ services:
condition: service_healthy
restart: unless-stopped
related:
# ==================================================================================
# TOPICS WORKER (Go) - Matching temas y países
# ==================================================================================
topics:
build:
context: .
dockerfile: Dockerfile
container_name: rss2_related_py
command: bash -lc "python -m workers.related_worker"
dockerfile: Dockerfile.topics
container_name: rss2_topics
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
RELATED_WINDOW_H: 168
TOPICS_SLEEP: 10
TOPICS_BATCH: 500
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1'
memory: 512M
# ==================================================================================
# RELATED WORKER (Go) - Noticias relacionadas
# ==================================================================================
related:
build:
context: .
dockerfile: Dockerfile.related
container_name: rss2_related
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
RELATED_SLEEP: 10
RELATED_BATCH: 200
RELATED_TOPK: 10
EMB_MODEL: mxbai-embed-large
TZ: Europe/Madrid
networks:
- backend
@ -513,6 +489,99 @@ services:
cpus: '1'
memory: 1G
qdrant:
image: qdrant/qdrant:latest
container_name: rss2_qdrant
environment:
TZ: Europe/Madrid
QDRANT__SERVICE__GRPC_PORT: 6334
volumes:
- ./data/qdrant_storage:/qdrant/storage
- /etc/timezone:/etc/timezone:ro
- /etc/localtime:/etc/localtime:ro
networks:
- backend
restart: unless-stopped
deploy:
resources:
limits:
cpus: '4'
memory: 4G
reservations:
memory: 2G
# ==================================================================================
# QDRANT WORKER (Go) - Vectorización y búsqueda semántica
# ==================================================================================
qdrant-worker:
build:
context: .
dockerfile: Dockerfile.qdrant
container_name: rss2_qdrant_worker
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
QDRANT_HOST: qdrant
QDRANT_PORT: 6333
QDRANT_COLLECTION: news_vectors
OLLAMA_URL: http://ollama:11434
QDRANT_SLEEP: 30
QDRANT_BATCH: 100
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
qdrant:
condition: service_started
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1'
memory: 1G
# ==================================================================================
# NER WORKER (Python) - Extracción de entidades
# ==================================================================================
ner:
build:
context: .
dockerfile: Dockerfile
container_name: rss2_ner
command: bash -lc "python -m workers.ner_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
NER_LANG: es
NER_BATCH: 64
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
volumes:
- ./workers:/app/workers
- ./hf_cache:/app/hf_cache
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2'
memory: 2G
# ==================================================================================
# CLUSTER WORKER (Python) - Agrupación de noticias
# ==================================================================================
cluster:
build:
context: .
@ -528,34 +597,8 @@ services:
EVENT_DIST_THRESHOLD: 0.35
EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2'
memory: 2G
ner:
build: .
container_name: rss2_ner
command: bash -lc "python -m workers.ner_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
NER_LANG: es
NER_BATCH: 64
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
volumes:
- ./hf_cache:/app/hf_cache
- ./workers:/app/workers
networks:
- backend
depends_on:
@ -568,33 +611,13 @@ services:
cpus: '2'
memory: 2G
topics:
# ==================================================================================
# LLM CATEGORIZER (Python) - Categorización con Ollama
# ==================================================================================
llm-categorizer:
build:
context: .
dockerfile: Dockerfile
container_name: rss2_topics_worker
command: bash -lc "python -m workers.topics_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
TZ: Europe/Madrid
networks:
- backend
depends_on:
db:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
cpus: '1'
memory: 1G
llm-categorizer:
build: .
container_name: rss2_llm_categorizer
command: bash -lc "python -m workers.simple_categorizer_worker"
environment:
@ -606,6 +629,8 @@ services:
CATEGORIZER_BATCH_SIZE: 10
CATEGORIZER_SLEEP_IDLE: 5
TZ: Europe/Madrid
volumes:
- ./workers:/app/workers
networks:
- backend
depends_on:
@ -618,72 +643,6 @@ services:
cpus: '2'
memory: 1G
qdrant:
image: qdrant/qdrant:latest
container_name: rss2_qdrant
environment:
TZ: Europe/Madrid
QDRANT__SERVICE__GRPC_PORT: 6334
# SEGURIDAD: Puertos NO expuestos - solo acceso interno
# ports:
# - "6333:6333"
# - "6334:6334"
volumes:
- ./qdrant_storage:/qdrant/storage
- /etc/timezone:/etc/timezone:ro
- /etc/localtime:/etc/localtime:ro
networks:
- backend
restart: unless-stopped
deploy:
resources:
limits:
cpus: '4'
memory: 4G
reservations:
memory: 2G
qdrant-worker:
build:
context: .
dockerfile: Dockerfile
container_name: rss2_qdrant_worker
command: bash -lc "python -m workers.qdrant_worker"
environment:
DB_HOST: db
DB_PORT: 5432
DB_NAME: ${DB_NAME:-rss}
DB_USER: ${DB_USER:-rss}
DB_PASS: ${DB_PASS}
DB_READ_HOST: db
DB_WRITE_HOST: db
QDRANT_HOST: qdrant
QDRANT_PORT: 6333
QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
EMB_DEVICE: cpu
QDRANT_BATCH_SIZE: ${QDRANT_BATCH_SIZE:-100}
QDRANT_SLEEP_IDLE: ${QDRANT_SLEEP_IDLE:-30}
HF_HOME: /app/hf_cache
TZ: Europe/Madrid
volumes:
- ./hf_cache:/app/hf_cache
networks:
- backend
depends_on:
db:
condition: service_healthy
# db-replica:
# condition: service_healthy
qdrant:
condition: service_started
restart: unless-stopped
deploy:
resources:
limits:
cpus: '2'
memory: 4G
# ==================================================================================
# MONITORING STACK - SECURED
# ==================================================================================

42
docker-entrypoint-db.sh Executable file
View file

@ -0,0 +1,42 @@
#!/bin/bash
set -e
# Detectar si la base de datos necesita reinicialización
PGDATA_DIR="/var/lib/postgresql/data/18/main"
echo "RSS2: Checking database integrity..."
# Si no existe el archivo de versión, es una base de datos nueva
if [ ! -f "$PGDATA_DIR/PG_VERSION" ]; then
echo "RSS2: New database - will be initialized by docker-entrypoint"
else
# Verificar si la base de datos es funcional
if ! pg_isready -h localhost -p 5432 -U "${POSTGRES_USER:-rss}" 2>/dev/null; then
echo "RSS2: Database appears corrupted - removing old data files for fresh initialization..."
# Eliminar solo los archivos de datos, no todo el directorio
rm -rf "$PGDATA_DIR"/*
echo "RSS2: Data files removed - docker-entrypoint will initialize fresh database"
else
echo "RSS2: Database is healthy"
fi
fi
# Ejecutar el entrypoint original con los parámetros de PostgreSQL
exec docker-entrypoint.sh \
postgres \
-c max_connections=200 \
-c shared_buffers=4GB \
-c effective_cache_size=12GB \
-c work_mem=16MB \
-c maintenance_work_mem=512MB \
-c autovacuum_max_workers=3 \
-c autovacuum_vacuum_scale_factor=0.02 \
-c autovacuum_vacuum_cost_limit=1000 \
-c max_worker_processes=8 \
-c max_parallel_workers=6 \
-c max_parallel_workers_per_gather=2 \
-c wal_level=replica \
-c max_wal_senders=5 \
-c wal_keep_size=1GB \
-c hot_standby=on \
"$@"

View file

@ -1,391 +0,0 @@
# Sistema de Descubrimiento y Gestión de Feeds RSS
Este documento describe el sistema mejorado de descubrimiento automático y gestión de feeds RSS implementado en RSS2.
## 📋 Visión General
El sistema ahora incluye dos mecanismos para gestionar feeds RSS:
1. **Gestión Manual Mejorada**: Interfaz web para descubrir y añadir feeds desde cualquier URL
2. **Worker Automático**: Proceso en segundo plano que descubre feeds desde URLs almacenadas
## 🎯 Componentes del Sistema
### 1. Utilidad de Descubrimiento (`utils/feed_discovery.py`)
Módulo Python que proporciona funciones para:
- **`discover_feeds(url)`**: Descubre automáticamente todos los feeds RSS/Atom desde una URL
- **`validate_feed(feed_url)`**: Valida un feed y extrae su información básica
- **`get_feed_metadata(feed_url)`**: Obtiene metadatos detallados de un feed
```python
from utils.feed_discovery import discover_feeds
# Descubrir feeds desde una URL
feeds = discover_feeds('https://elpais.com')
# Retorna: [{'url': '...', 'title': '...', 'valid': True, ...}, ...]
```
### 2. Router de Feeds Mejorado (`routers/feeds.py`)
Nuevos endpoints añadidos:
#### Interfaz Web
- **`GET/POST /feeds/discover`**: Interfaz para descubrir feeds desde una URL
- Muestra todos los feeds encontrados
- Permite seleccionar cuáles añadir
- Aplica configuración global (categoría, país, idioma)
- **`POST /feeds/discover_and_add`**: Añade múltiples feeds seleccionados
- Extrae automáticamente título y descripción
- Evita duplicados
- Muestra estadísticas de feeds añadidos
#### API JSON
- **`POST /feeds/api/discover`**: API para descubrir feeds
```json
{
"url": "https://example.com"
}
```
Retorna:
```json
{
"feeds": [...],
"count": 5
}
```
- **`POST /feeds/api/validate`**: API para validar un feed específico
```json
{
"url": "https://example.com/rss"
}
```
### 3. Worker de Descubrimiento (`workers/url_discovery_worker.py`)
Worker automático que:
1. **Procesa URLs de la tabla `fuentes_url`**
- Prioriza URLs nunca procesadas
- Reintenta URLs con errores
- Actualiza URLs antiguas
2. **Descubre y Crea Feeds Automáticamente**
- Encuentra todos los feeds RSS en cada URL
- Valida cada feed encontrado
- Crea entradas en la tabla `feeds`
- Evita duplicados
3. **Gestión de Estado**
- Actualiza `last_check`, `last_status`, `status_message`
- Maneja errores gracefully
- Registra estadísticas detalladas
#### Configuración del Worker
Variables de entorno:
```bash
# Intervalo de ejecución (minutos)
URL_DISCOVERY_INTERVAL_MIN=15
# Número de URLs a procesar por ciclo
URL_DISCOVERY_BATCH_SIZE=10
# Máximo de feeds a crear por URL
MAX_FEEDS_PER_URL=5
```
#### Estados de URLs en `fuentes_url`
| Estado | Descripción |
|--------|-------------|
| `success` | Feeds creados exitosamente |
| `existing` | Feeds encontrados pero ya existían |
| `no_feeds` | No se encontraron feeds RSS |
| `no_valid_feeds` | Se encontraron feeds pero ninguno válido |
| `error` | Error al procesar la URL |
## 🚀 Uso del Sistema
### Método 1: Interfaz Web Manual
1. **Navega a `/feeds/discover`**
2. **Ingresa una URL** (ej: `https://elpais.com`)
3. **Haz clic en "Buscar Feeds"**
4. El sistema mostrará todos los feeds encontrados con:
- Estado de validación
- Título y descripción
- Número de entradas
- Tipo de feed (RSS/Atom)
5. **Configura opciones globales**:
- Categoría
- País
- Idioma
6. **Selecciona los feeds deseados** y haz clic en "Añadir Feeds Seleccionados"
### Método 2: Worker Automático
1. **Añade URLs a la tabla `fuentes_url`**:
```sql
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma, active)
VALUES ('El País', 'https://elpais.com', 1, 1, 'es', TRUE);
```
2. **El worker procesará automáticamente**:
- Cada 15 minutos (configurable)
- Descubrirá todos los feeds
- Creará entradas en `feeds`
- Actualizará el estado
3. **Monitorea el progreso**:
```sql
SELECT nombre, url, last_check, last_status, status_message
FROM fuentes_url
ORDER BY last_check DESC;
```
### Método 3: Interfaz de URLs (Existente)
Usa la interfaz web existente en `/urls/add_source` para añadir URLs que serán procesadas por el worker.
## 🔄 Flujo de Trabajo Completo
```
┌─────────────────┐
│ Usuario añade │
│ URL del sitio │
└────────┬────────┘
v
┌─────────────────────────┐
│ URL guardada en │
│ tabla fuentes_url │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Worker ejecuta cada │
│ 15 minutos │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Descubre feeds RSS │
│ usando feedfinder2 │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Valida cada feed │
│ encontrado │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Crea entradas en │
│ tabla feeds │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Ingestor Go procesa │
│ feeds cada 15 minutos │
└────────┬────────────────┘
v
┌─────────────────────────┐
│ Noticias descargadas │
│ y procesadas │
└─────────────────────────┘
```
## 📊 Tablas de Base de Datos
### `fuentes_url`
Almacena URLs de sitios web para descubrimiento automático:
```sql
CREATE TABLE fuentes_url (
id SERIAL PRIMARY KEY,
nombre VARCHAR(255) NOT NULL,
url TEXT NOT NULL UNIQUE,
categoria_id INTEGER REFERENCES categorias(id),
pais_id INTEGER REFERENCES paises(id),
idioma CHAR(2) DEFAULT 'es',
last_check TIMESTAMP,
last_status VARCHAR(50),
status_message TEXT,
last_http_code INTEGER,
active BOOLEAN DEFAULT TRUE
);
```
### `feeds`
Almacena feeds RSS descubiertos y validados:
```sql
CREATE TABLE feeds (
id SERIAL PRIMARY KEY,
nombre VARCHAR(255),
descripcion TEXT,
url TEXT NOT NULL UNIQUE,
categoria_id INTEGER REFERENCES categorias(id),
pais_id INTEGER REFERENCES paises(id),
idioma CHAR(2),
activo BOOLEAN DEFAULT TRUE,
fallos INTEGER DEFAULT 0,
last_etag TEXT,
last_modified TEXT,
last_error TEXT
);
```
## ⚙️ Configuración del Sistema
### Variables de Entorno
Añade al archivo `.env`:
```bash
# RSS Ingestor
RSS_POKE_INTERVAL_MIN=15 # Intervalo de ingesta (minutos)
RSS_MAX_FAILURES=10 # Fallos máximos antes de desactivar feed
RSS_FEED_TIMEOUT=60 # Timeout para descargar feeds (segundos)
# URL Discovery Worker
URL_DISCOVERY_INTERVAL_MIN=15 # Intervalo de descubrimiento (minutos)
URL_DISCOVERY_BATCH_SIZE=10 # URLs a procesar por ciclo
MAX_FEEDS_PER_URL=5 # Máximo de feeds por URL
```
### Docker Compose
El worker ya está configurado en `docker-compose.yml`:
```yaml
url-discovery-worker:
build: .
container_name: rss2_url_discovery
command: bash -lc "python -m workers.url_discovery_worker"
environment:
DB_HOST: db
URL_DISCOVERY_INTERVAL_MIN: 15
URL_DISCOVERY_BATCH_SIZE: 10
MAX_FEEDS_PER_URL: 5
depends_on:
db:
condition: service_healthy
restart: unless-stopped
```
## 🔧 Comandos Útiles
### Ver logs del worker de descubrimiento
```bash
docker logs -f rss2_url_discovery
```
### Reiniciar el worker
```bash
docker restart rss2_url_discovery
```
### Ejecutar manualmente el worker (testing)
```bash
docker exec -it rss2_url_discovery python -m workers.url_discovery_worker
```
### Ver estadísticas de descubrimiento
```sql
-- URLs procesadas recientemente
SELECT nombre, url, last_check, last_status, status_message
FROM fuentes_url
WHERE last_check > NOW() - INTERVAL '1 day'
ORDER BY last_check DESC;
-- Feeds creados recientemente
SELECT nombre, url, fecha_creacion
FROM feeds
WHERE fecha_creacion > NOW() - INTERVAL '1 day'
ORDER BY fecha_creacion DESC;
```
## 🛠️ Troubleshooting
### El worker no encuentra feeds
1. Verifica que la URL sea accesible:
```bash
curl -I https://example.com
```
2. Verifica los logs del worker:
```bash
docker logs rss2_url_discovery
```
3. Prueba manualmente el descubrimiento:
```python
from utils.feed_discovery import discover_feeds
feeds = discover_feeds('https://example.com')
print(feeds)
```
### Feeds duplicados
El sistema previene duplicados usando `ON CONFLICT (url) DO NOTHING`. Si un feed ya existe, simplemente se omite.
### Worker consume muchos recursos
Ajusta las configuraciones:
```bash
# Reduce el batch size
URL_DISCOVERY_BATCH_SIZE=5
# Aumenta el intervalo
URL_DISCOVERY_INTERVAL_MIN=30
# Reduce feeds por URL
MAX_FEEDS_PER_URL=3
```
## 📝 Mejores Prácticas
1. **Añade URLs de sitios, no feeds directos**
- ✅ `https://elpais.com`
- ❌ `https://elpais.com/rss/feed.xml`
2. **Configura categoría y país correctamente**
- Facilita la organización
- Mejora la experiencia del usuario
3. **Monitorea el estado de las URLs**
- Revisa periódicamente `fuentes_url`
- Desactiva URLs que fallan consistentemente
4. **Limita el número de feeds por URL**
- Evita sobrecarga de feeds similares
- Mantén `MAX_FEEDS_PER_URL` entre 3-5
## 🎉 Ventajas del Sistema
**Automatización completa**: Solo añade URLs, el sistema hace el resto
**Descubrimiento inteligente**: Encuentra todos los feeds disponibles
**Validación automática**: Solo crea feeds válidos y funcionales
**Sin duplicados**: Gestión inteligente de feeds existentes
**Escalable**: Procesa múltiples URLs en lotes
**Resiliente**: Manejo robusto de errores y reintentos
**Monitoreable**: Logs detallados y estados claros
## 📚 Referencias
- **feedfinder2**: https://github.com/dfm/feedfinder2
- **feedparser**: https://feedparser.readthedocs.io/
- **Tabla fuentes_url**: `/init-db/01.schema.sql`
- **Worker**: `/workers/url_discovery_worker.py`
- **Utilidades**: `/utils/feed_discovery.py`

View file

@ -1,370 +0,0 @@
# Sistema de Categorización Automática con LLM
## Descripción
Este sistema utiliza **ExLlamaV2** con un modelo de lenguaje local (LLM) para categorizar automáticamente las noticias del feed RSS.
### ¿Qué hace?
1. **Recopila 10 noticias** sin categorizar de la base de datos
2. **Envía al LLM local** con un prompt especializado
3. **El LLM discrimina/categoriza** cada noticia en una de las categorías predefinidas
4. **Actualiza la base de datos** con las categorías asignadas
### Ventajas
- ✅ **100% Local**: No envía datos a APIs externas
- ✅ **Optimizado para RTX 3060 12GB**: Modelos cuantizados eficientes
- ✅ **Categorización inteligente**: Entiende contexto, no solo keywords
- ✅ **Escalable**: Procesa lotes de 10 noticias automáticamente
- ✅ **Integrado**: Se ejecuta como un worker más del sistema
---
## Instalación
### Paso 1: Descargar el Modelo
El sistema necesita un modelo LLM compatible. Recomendamos **Mistral-7B-Instruct GPTQ** para RTX 3060 12GB.
```bash
# Ejecutar el script de descarga
./scripts/download_llm_model.sh
```
El script te mostrará opciones:
1. **Mistral-7B-Instruct-v0.2 (GPTQ)** - RECOMENDADO
2. Mistral-7B-Instruct-v0.2 (EXL2)
3. OpenHermes-2.5-Mistral-7B (GPTQ)
4. Neural-Chat-7B (GPTQ)
**Tiempo estimado de descarga**: 10-30 minutos (según conexión)
**Espacio en disco**: ~4.5 GB
### Paso 2: Verificar la instalación
```bash
# Verificar que el modelo se descargó correctamente
ls -lh models/llm/
# Deberías ver archivos como:
# - model.safetensors o *.safetensors
# - config.json
# - tokenizer.json
# - etc.
```
### Paso 3: Probar el sistema (opcional)
Antes de levantar el contenedor, puedes probar que funciona:
```bash
# Instalar dependencias localmente (solo para prueba)
pip3 install exllamav2 torch
# Ejecutar script de prueba
python3 scripts/test_llm_categorizer.py
```
---
## Uso
### Iniciar el servicio
```bash
# Construir y levantar el contenedor
docker compose up -d llm-categorizer
# Ver logs en tiempo real
docker compose logs -f llm-categorizer
```
### Verificar funcionamiento
```bash
# Ver estado del contenedor
docker compose ps llm-categorizer
# Ver últimas 50 líneas de log
docker compose logs --tail=50 llm-categorizer
# Ver categorías asignadas en la base de datos
docker exec -it rss2_db psql -U rss -d rss -c \
"SELECT llm_categoria, COUNT(*) FROM noticias WHERE llm_processed = TRUE GROUP BY llm_categoria;"
```
### Detener el servicio
```bash
docker compose stop llm-categorizer
```
---
## Configuración
### Variables de Entorno
Puedes ajustar el comportamiento editando `docker-compose.yml`:
```yaml
environment:
# Número de noticias a procesar por lote (default: 10)
LLM_BATCH_SIZE: 10
# Tiempo de espera cuando no hay noticias (segundos, default: 30)
LLM_SLEEP_IDLE: 30
# Longitud máxima de contexto (default: 4096)
LLM_MAX_SEQ_LEN: 4096
# Modo de caché: FP16 o Q4 (default: FP16)
# Q4 usa menos VRAM pero puede ser más lento
LLM_CACHE_MODE: FP16
# Distribución de GPU: "auto" para single GPU
LLM_GPU_SPLIT: auto
```
### Categorías
Las categorías están definidas en `workers/llm_categorizer_worker.py`:
```python
CATEGORIES = [
"Política",
"Economía",
"Tecnología",
"Ciencia",
"Salud",
"Deportes",
"Entretenimiento",
"Internacional",
"Nacional",
"Sociedad",
"Cultura",
"Medio Ambiente",
"Educación",
"Seguridad",
"Otros"
]
```
Para modificarlas, edita el archivo y reconstruye el contenedor:
```bash
docker compose up -d --build llm-categorizer
```
---
## Base de Datos
### Nuevas columnas en `noticias`
El worker añade automáticamente estas columnas:
- `llm_categoria` (VARCHAR): Categoría asignada
- `llm_confianza` (FLOAT): Nivel de confianza (0.0 - 1.0)
- `llm_processed` (BOOLEAN): Si ya fue procesada
- `llm_processed_at` (TIMESTAMP): Fecha de procesamiento
### Consultas útiles
```sql
-- Ver distribución de categorías
SELECT llm_categoria, COUNT(*) as total, AVG(llm_confianza) as confianza_media
FROM noticias
WHERE llm_processed = TRUE
GROUP BY llm_categoria
ORDER BY total DESC;
-- Ver noticias de una categoría específica
SELECT id, titulo, llm_categoria, llm_confianza, fecha
FROM noticias
WHERE llm_categoria = 'Tecnología'
AND llm_processed = TRUE
ORDER BY fecha DESC
LIMIT 20;
-- Ver noticias con baja confianza (revisar manualmente)
SELECT id, titulo, llm_categoria, llm_confianza
FROM noticias
WHERE llm_processed = TRUE
AND llm_confianza < 0.6
ORDER BY llm_confianza ASC
LIMIT 20;
-- Resetear procesamiento (para reprocesar)
UPDATE noticias SET llm_processed = FALSE WHERE llm_categoria = 'Otros';
```
---
## Monitorización
### Prometheus/Grafana
El worker está integrado con el stack de monitorización. Puedes ver:
- Uso de GPU (VRAM)
- Tiempo de procesamiento por lote
- Tasa de categorización
Accede a Grafana: http://localhost:3001
### Logs
```bash
# Ver logs en tiempo real
docker compose logs -f llm-categorizer
# Buscar errores
docker compose logs llm-categorizer | grep ERROR
# Ver estadísticas de categorización
docker compose logs llm-categorizer | grep "Distribución"
```
---
## Troubleshooting
### Error: "Out of memory"
**Causa**: El modelo es demasiado grande para tu GPU.
**Solución**:
1. Usa un modelo más pequeño (ej: EXL2 con menor bpw)
2. Reduce el batch size: `LLM_BATCH_SIZE: 5`
3. Usa cache Q4 en lugar de FP16: `LLM_CACHE_MODE: Q4`
```yaml
environment:
LLM_BATCH_SIZE: 5
LLM_CACHE_MODE: Q4
```
### Error: "Model not found"
**Causa**: El modelo no se descargó correctamente.
**Solución**:
```bash
# Verificar directorio
ls -la models/llm/
# Debería contener config.json y archivos .safetensors
# Si está vacío, ejecutar de nuevo:
./scripts/download_llm_model.sh
```
### El worker no procesa noticias
**Causa**: Posiblemente ya están todas procesadas.
**Solución**:
```bash
# Verificar cuántas noticias faltan
docker exec -it rss2_db psql -U rss -d rss -c \
"SELECT COUNT(*) FROM noticias WHERE llm_processed = FALSE;"
# Si es 0, resetear algunas para probar
docker exec -it rss2_db psql -U rss -d rss -c \
"UPDATE noticias SET llm_processed = FALSE WHERE id IN (SELECT id FROM noticias ORDER BY fecha DESC LIMIT 20);"
```
### Categorización incorrecta
**Causa**: El prompt puede necesitar ajustes o el modelo no es adecuado.
**Soluciones**:
1. Ajustar el prompt en `workers/llm_categorizer_worker.py` (método `_build_prompt`)
2. Probar un modelo diferente (ej: OpenHermes es mejor generalista)
3. Ajustar la temperatura (más baja = más determinista):
```python
self.settings.temperature = 0.05 # Muy determinista
```
---
## Rendimiento
### RTX 3060 12GB
- **Modelo recomendado**: Mistral-7B-Instruct GPTQ 4-bit
- **VRAM utilizada**: ~6-7 GB
- **Tiempo por noticia**: ~2-5 segundos
- **Throughput**: ~120-300 noticias/hora
### Optimizaciones
Para mejorar el rendimiento:
1. **Aumentar batch size** (si sobra VRAM):
```yaml
LLM_BATCH_SIZE: 20
```
2. **Cache Q4** (menos VRAM, ligeramente más lento):
```yaml
LLM_CACHE_MODE: Q4
```
3. **Modelo EXL2 optimizado**:
- Usar Mistral EXL2 4.0bpw
- Es más rápido que GPTQ en ExLlamaV2
---
## Integración con la Web
Para mostrar las categorías en la interfaz web, modifica `routers/search.py` o crea una nueva vista:
```python
# Ejemplo de endpoint para estadísticas
@app.route('/api/categories/stats')
def category_stats():
query = """
SELECT llm_categoria, COUNT(*) as total
FROM noticias
WHERE llm_processed = TRUE
GROUP BY llm_categoria
ORDER BY total DESC
"""
# ... ejecutar query y devolver JSON
```
---
## Roadmap
Posibles mejoras futuras:
- [ ] Subcategorías automáticas
- [ ] Detección de temas trending
- [ ] Resúmenes automáticos por categoría
- [ ] Alertas personalizadas por categoría
- [ ] API REST para categorización bajo demanda
- [ ] Fine-tuning del modelo con feedback de usuario
---
## Soporte
Para problemas o preguntas:
1. Revisar logs: `docker compose logs llm-categorizer`
2. Verificar GPU: `nvidia-smi`
3. Consultar documentación de ExLlamaV2: https://github.com/turboderp/exllamav2
---
## Licencia
Este componente se distribuye bajo la misma licencia que el proyecto principal RSS2.
Los modelos LLM tienen sus propias licencias (generalmente Apache 2.0 o MIT para los recomendados).

View file

@ -1,223 +0,0 @@
# 🎬 Sistema de Parrillas de Videos Automatizados
## 📋 Descripción
Este sistema permite generar videos automáticos de noticias filtradas según diferentes criterios:
- **Por País**: Noticias de Bulgaria, España, Estados Unidos, etc.
- **Por Categoría**: Ciencia, Tecnología, Deport, Política, etc.
- **Por Entidad**: Personas u organizaciones específicas (ej: "Donald Trump", "OpenAI")
- **Por Continente**: Europa, Asia, América, etc.
## 🎯 Características
### ✅ Sistema Implementado
1. **Base de Datos**
- Tabla `video_parrillas`: Configuraciones de parrillas
- Tabla `video_generados`: Registro de videos creados
- Tabla `video_noticias`: Relación entre videos y noticias
2. **API REST**
- `GET /parrillas/` - Listado de parrillas
- `GET /parrillas/<id>` - Detalle de parrilla
- `POST /parrillas/nueva` - Crear parrilla
- `GET /parrillas/api/<id>/preview` - Preview de noticias
- `POST /parrillas/api/<id>/generar` - Generar video
- `POST /parrillas/api/<id>/toggle` - Activar/desactivar
- `DELETE /parrillas/api/<id>` - Eliminar parrilla
3. **Generador de Videos**
- Script: `generar_videos_noticias.py`
- Integración con AllTalk TTS
- Generación automática de subtítulos SRT
- Soporte para múltiples idiomas
## 🚀 Uso Rápido
### 1. Crear una Parrilla
```bash
# Acceder a la interfaz web
http://localhost:8001/parrillas/
# O usar SQL directo
docker-compose exec -T db psql -U rss -d rss -c "
INSERT INTO video_parrillas (nombre, descripcion, tipo_filtro, pais_id, max_noticias, frecuencia, activo)
VALUES ('Noticias de Bulgaria', 'Resumen diario de noticias de Bulgaria', 'pais',
(SELECT id FROM paises WHERE nombre = 'Bulgaria'), 5, 'daily', true);
"
```
### 2. Generar Video Manualmente
```bash
# Generar video para parrilla con ID 1
docker-compose exec web python generar_videos_noticias.py 1
```
### 3. Generación Automática (Diaria)
```bash
# Procesar todas las parrillas activas con frecuencia 'daily'
docker-compose exec web python generar_videos_noticias.py
```
## 📝 Ejemplos de Parrillas
### Ejemplo 1: Noticias de Ciencia en Europa
```sql
INSERT INTO video_parrillas (
nombre, descripcion, tipo_filtro,
categoria_id, continente_id,
max_noticias, duracion_maxima, idioma_voz,
frecuencia, activo
) VALUES (
'Ciencia en Europa',
'Las últimas noticias científicas de Europa',
'categoria',
(SELECT id FROM categorias WHERE nombre ILIKE '%ciencia%'),
(SELECT id FROM continentes WHERE nombre = 'Europa'),
7, 300, 'es',
'daily', true
);
```
### Ejemplo 2: Noticias sobre una Persona
```sql
INSERT INTO video_parrillas (
nombre, descripcion, tipo_filtro,
entidad_nombre, entidad_tipo,
max_noticias, idioma_voz,
frecuencia, activo
) VALUES (
'Donald Trump en las Noticias',
'Todas las menciones de Donald Trump',
'entidad',
'Donald Trump', 'persona',
10, 'es',
'daily', true
);
```
### Ejemplo 3: Noticias de Tecnología
```sql
INSERT INTO video_parrillas (
nombre, descripcion, tipo_filtro,
categoria_id,
max_noticias, idioma_voz,
include_subtitles, template,
frecuencia, activo
) VALUES (
'Tech News Daily',
'Resumen diario de tecnología',
'categoria',
(SELECT id FROM categorias WHERE nombre ILIKE '%tecnolog%'),
8, 'es',
true, 'modern',
'daily', true
);
```
## 🔧 Configuración Avanzada
### Opciones de Parrilla
| Campo | Tipo | Descripción |
|-------|------|-------------|
| `nombre` | string | Nombre único de la parrilla |
| `descripcion` | text | Descripción detallada |
| `tipo_filtro` | enum | 'pais', 'categoria', 'entidad', 'continente', 'custom' |
| `pais_id` | int | ID del país (si tipo_filtro='pais') |
| `categoria_id` | int | ID de categoría |
| `continente_id` | int | ID de continente |
| `entidad_nombre` | string | Nombre de persona/organización |
| `entidad_tipo` | string | 'persona' o 'organizacion' |
| `max_noticias` | int | Máximo de noticias por video (default: 5) |
| `duracion_maxima` | int | Duración máxima en segundos (default: 180) |
| `idioma_voz` | string | Idioma del TTS ('es', 'en', etc.) |
| `template` | string | 'standard', 'modern', 'minimal' |
| `include_images` | bool | Incluir imágenes en el video |
| `include_subtitles` | bool | Generar subtítulos SRT |
| `frecuencia` | string | 'daily', 'weekly', 'manual' |
### Configuración de AllTalk
El sistema utiliza AllTalk para generar la narración con voz. Configurar en docker-compose.yml:
```yaml
environment:
ALLTALK_URL: http://alltalk:7851
```
## 📊 Estructura de Archivos Generados
```
data/
videos/
<video_id>/
script.txt # Libreto completo del video
audio.wav # Audio generado con TTS
subtitles.srt # Subtítulos (si enabled)
metadata.json # Metadata del video
```
## 🔄 Workflow de Generación
1. **Consulta de Noticias**: Filtra noticias según criterios de la parrilla
2. **Construcción de Script**: Genera libreto narrativo
3. **Síntesis de Voz**: Envía texto a AllTalk TTS
4. **Generación de Subtítulos**: Crea archivo SRT con timestamps
5. **Registro en BD**: Guarda paths y metadata en `video_generados`
6. **Relación de Noticias**: Vincula noticias incluidas en `video_noticias`
## 🎨 Próximas Mejoras
- [ ] Integración con generador de videos (combinar audio + imágenes)
- [ ] Templates visuales personalizados
- [ ] Transiciones entre noticias
- [ ] Música de fondo
- [ ] Logo/branding personalizado
- [ ] Exportación directa a YouTube/TikTok
- [ ] Programación automática con cron
- [ ] Dashboard de analíticas de videos
- [ ] Sistema de thumbnails automáticos
## 🐛 Troubleshooting
### Error: "No hay noticias disponibles"
- Verificar que existan noticias traducidas (`traducciones.status = 'done'`)
- Ajustar filtros de la parrilla
- Verificar rango de fechas (por defecto últimas 24h)
### Error en AllTalk TTS
- Verificar que el servicio AllTalk esté corriendo
- Revisar URL en variable de entorno `ALLTALK_URL`
- Comprobar logs: `docker-compose logs alltalk`
### Video no se genera
- Revisar estado en tabla `video_generados`
- Ver columna `error_message` si `status = 'error'`
- Verificar permisos en directorio `/app/data/videos`
## 📞 Soporte
Para problemas o sugerencias, revisar los logs:
```bash
# Logs del generador
docker-compose exec web python generar_videos_noticias.py <id> 2>&1 | tee video_generation.log
# Ver videos en cola
docker-compose exec -T db psql -U rss -d rss -c "
SELECT id, parrilla_id, titulo, status, fecha_generacion
FROM video_generados
ORDER BY fecha_generacion DESC LIMIT 10;
"
```
## 📄 Licencia
Este módulo es parte del sistema RSS2 News Aggregator.

View file

@ -1,426 +0,0 @@
# 📖 PROCESO COMPLETO: Descubrimiento y Gestión de Feeds RSS
## 🎯 Problema Resuelto
**Pregunta:** ¿Cómo asigno país y categoría a los feeds descubiertos automáticamente?
**Respuesta:** El sistema ahora usa un flujo inteligente de 3 niveles:
1. **Auto-aprobación** (feeds con categoría/país)
2. **Revisión manual** (feeds sin metadata completa)
3. **Análisis automático** (sugerencias inteligentes)
---
## 🔄 FLUJO COMPLETO DEL SISTEMA
### Paso 1: Añadir URL Fuente
Tienes 2 opciones para añadir URLs:
#### Opción A: Con Categoría y País (AUTO-APROBACIÓN)
```sql
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma, active)
VALUES ('El País', 'https://elpais.com', 1, 44, 'es', TRUE);
-- ^ ^
-- categoria_id pais_id
```
**Resultado**: Feeds se crean **AUTOMÁTICAMENTE** y se activan
- Worker descubre feeds
- Hereda categoría (1) y país (44) del padre
- Crea feeds en tabla `feeds` directam ente
- Ingestor empieza a descargar noticias
#### Opción B: Sin Categoría o País (REQUIERE REVISIÓN)
```sql
INSERT INTO fuentes_url (nombre, url, active)
VALUES ('BBC News', 'https://www.bbc.com/news', TRUE);
-- Sin categoria_id ni pais_id
```
⚠️ **Resultado**: Feeds van a **REVISIÓN MANUAL**
- Worker descubre feeds
- Analiza automáticamente:
- Detecta país desde dominio (.com → Reino Unido)
- Detecta idioma (en)
- Sugiere categoría ("Internacional")
- Crea feeds en tabla `feeds_pending`
- **ESPERA APROBACIÓN MANUAL** antes de activar
---
### Paso 2: Worker Descubre Feeds (cada 15 min)
El worker `url_discovery_worker` ejecuta automaticamente:
```
1. Lee fuentes_url activas
2. Para cada URL:
a. Descubre todos los feeds RSS
b. Valida cada feed
c. Analiza metadata:
- Idioma del feed
- País (desde dominio: .es, .uk, .fr, etc.)
- Categoría sugerida (keywords en título/descripción)
d. DECIDE EL FLUJO:
┌─────────────────────────────────────┐
│ ¿Parent tiene categoria_id Y pais_id? │
└──────────┬──────────────────────────┘
┌────────┴────────┐
│ SÍ │ NO
▼ ▼
┌──────────────┐ ┌─────────────────┐
│ AUTO-APROBAR │ │ REQUIERE REVISIÓN│
└───────┬──────┘ └─────────┬───────┘
│ │
▼ ▼
tabla: feeds tabla: feeds_pending
activo: TRUE reviewed: FALSE
✅ Listo para ⏳ Espera aprobación
ingestor
```
---
### Paso 3A: Feeds AUTO-APROBADOS
Si la URL padre tiene `categoria_id` y `pais_id`:
```sql
-- Ejemplo: URL con metadata completa
fuentes_url:
id=1, url='https://elpais.com',
categoria_id=1 (Noticias),
pais_id=44 (España)
↓ Worker descubre 3 feeds:
- https://elpais.com/rss/portada.xml
- https://elpais.com/rss/internacional.xml
- https://elpais.com/rss/deportes.xml
↓ Se crean DIRECTAMENTE en tabla feeds:
INSERT INTO feeds (nombre, url, categoria_id, pais_id, activo)
VALUES
('El País - Portada', 'https://elpais.com/rss/portada.xml', 1, 44, TRUE),
('El País - Internacional', 'https://elpais.com/rss/internacional.xml', 1, 44, TRUE),
('El País - Deportes', 'https://elpais.com/rss/deportes.xml', 1, 44, TRUE);
✅ Feeds están ACTIVOS inmediatamente
✅ Ingestor Go los procesa en siguiente ciclo (15 min)
✅ Noticias empiezan a llegar
```
---
### Paso 3B: Feeds PENDIENTES (requieren revisión)
Si la URL padre NO tiene `categoria_id` o `pais_id`:
```sql
-- Ejemplo: URL sin metadata
fuentes_url:
id=2, url='https://www.bbc.com/news',
categoria_id=NULL,
pais_id=NULL
↓ Worker descubre 2 feeds y ANALIZA automáticamente:
Feed 1: https://www.bbc.com/news/world/rss.xml
- Título: "BBC News - World"
- Idioma detectado: 'en'
- País detectado: 'Reino Unido' (desde .com + idioma inglés)
- Categoría sugerida: 'Internacional' (keyword "world")
Feed 2: https://www.bbc.com/sport/rss.xml
- Título: "BBC Sport"
- Idioma detectado: 'en'
- País detectado: 'Reino Unido'
- Categoría sugerida: 'Deportes' (keyword "sport")
↓ Se crean en tabla feeds_pending:
INSERT INTO feeds_pending (
fuente_url_id, feed_url, feed_title,
feed_language, detected_country_id, suggested_categoria_id,
reviewed, approved, notes
) VALUES (
2,
'https://www.bbc.com/news/world/rss.xml',
'BBC News - World',
'en',
74, -- Reino Unido (ID detectado)
2, -- Internacional (ID sugerido)
FALSE, FALSE,
'Country from domain: Reino Un ido | Suggested category: Internacional (confidence: 85%)'
);
⏳ Feeds están PENDIENTES
⏳ NO están activos aún
⏳ Requieren revisión manual en /feeds/pending
```
---
## 📊 Tabla Comparativa
| Aspecto | Auto-Aprobación | Revisión Manual |
|---------|----------------|-----------------|
| **Requisito** | URL padre con `categoria_id` Y `pais_id` | URL padre sin uno o ambos |
| **Tabla destino** | `feeds` (directa) | `feeds_pending` (temporal) |
| **Estado inicial** | `activo = TRUE` | `reviewed = FALSE, approved = FALSE` |
| **Análisis automático** | Hereda valores del padre | Detecta país, sugiere categoría |
| **Intervención manual** | ❌ No necesaria | ✅ Requerida |
| **Tiempo hasta activación** | Inmediato | Después de aprobación |
| **Ingestor procesa** | Sí (próximo ciclo) | No (hasta aprobar) |
---
## 🛠️ Interfaces de Gestión
### 1. Añadir URL con Metadata (Auto-aprobación)
**Ruta:** `/urls/add_source`
```
Formulario:
┌─────────────────────────────────────┐
│ Nombre: El País │
│ URL: https://elpais.com │
│ Categoría: [Noticias ▼] ← IMPORTANTE
│ País: [España ▼] ← IMPORTANTE
│ Idioma: es │
│ │
│ [Añadir Fuente] │
└─────────────────────────────────────┘
Resultado: Feeds se crearán AUTOMÁTICAMENTE
```
### 2. Revisar Feeds Pendientes (Nueva interfaz)
**Ruta:** `/feeds/pending` (próximamente)
```
Feeds Pendientes de Revisión
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Feed: BBC News - World
URL: https://www.bbc.com/news/world/rss.xml
Fuente: BBC News (https://www.bbc.com/news)
Análisis Automático:
├─ Idioma: English (en)
├─ País detectado: Reino Unido (.com domain + language)
└─ Categoría sugerida: Internacional (85% confianza)
Keywords: "world", "international", "global"
┌─────────────────────────────────────┐
│ Categoría: [Internacional ▼] │ ← Pre-seleccionada
│ País: [Reino Unido ▼] │ ← Pre-seleccionado
│ Idioma: [en] │ ← Auto-detectado
│ │
│ [✓ Aprobar Feed] [✗ Rechazar] │
└─────────────────────────────────────┘
```
### 3. Descubrir Feeds Manualmente
**Ruta:** `/feeds/discover`
```
Perfecto para cuando quieres control total:
1. Ingresar URL
2. Ver todos los feeds encontrados
3. Seleccionar cuáles añadir
4. Asignar categoría/país globalmente
5. Feeds se crean directamente (no van a pending)
```
---
## 💡 RECOMENDACIONES DE USO
### Estrategia 1: Auto-aprobación Total
**Para fuentes conocidas y confiables:**
```sql
-- Añadir fuentes con metadata completa
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma) VALUES
('El País', 'https://elpais.com', 1, 44, 'es'),
('Le Monde', 'https://lemonde.fr', 1, 60, 'fr'),
('The Guardian', 'https://theguardian.com', 1, 74, 'en');
-- Worker creará feeds automáticamente
-- Sin intervención manual necesaria
```
### Estrategia 2: Revisión Manual
**Para fuentes nuevas o desconocidas:**
```sql
-- Añadir sin metadata
INSERT INTO fuentes_url (nombre, url) VALUES
('Sitio Desconocido', 'https://ejemplo.com');
-- Worker crea feeds en feeds_pending
-- Revisar en /feeds/pending
-- Aprobar/rechazar manualmente
```
### Estrategia 3: Híbrida (Recomendada)
**Combinar ambas:**
- URLs conocidas → Con categoría/país
- URLs nuevas → Sin metadata (revisión)
- Usar análisis automático como guía
- Ajustar manualmente si es necesario
---
## 🔍 Análisis Automático Explicado
### Detección de País
```python
# 1. Desde dominio (TLD)
.es → España
.uk, .co.uk → Reino Unido
.fr → Francia
.de → Alemania
.mx → México
.ar → Argentina
# 2. Desde idioma (si no hay dominio claro)
es → España (país principal)
en → Reino Unido
fr → Francia
pt → Portugal
# 3. Desde subdominios
es.example.com → España
uk.example.com → Reino Unido
```
### Sugerencia de Categoría
```python
# Análisis de keywords en título + descripción
Keywords encontrados → Categoría sugerida (% confianza)
"política", "gobierno", "elecciones" → Política (75%)
"economía", "bolsa", "mercado" → Economía (82%)
"tecnología", "software", "digital" → Tecnología (90%)
"deportes", "fútbol", "liga" → Deportes (95%)
"internacional", "mundo", "global" → Internacional (70%)
```
---
## 📝 Ejemplos Completos
### Ejemplo 1: Periódico Español (Auto-aprobación)
```sql
-- 1. Añadir fuente con metadata
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma)
VALUES ('El Mundo', 'https://elmundo.es', 1, 44, 'es');
-- 2. Worker ejecuta (15 min después):
-- - Descubre: elmundo.es/rss/portada.xml
-- - Descubre: elmundo.es/rss/deportes.xml
-- - Hereda: categoria_id=1, pais_id=44
-- - Crea en feeds directamente
-- 3. Resultado en tabla feeds:
SELECT id, nombre, url, categoria_id, pais_id, activo
FROM feeds
WHERE fuente_nombre LIKE '%El Mundo%';
-- id | nombre | url | cat | pais | activo
-- 1 | El Mundo - Portada | elmundo.es/rss/portada.xml | 1 | 44 | TRUE
-- 2 | El Mundo - Deportes | elmundo.es/rss/deportes.xml | 1 | 44 | TRUE
-- ✅ Feeds activos, ingestor procesando
```
### Ejemplo 2: Sitio Internacional (Revisión Manual)
```sql
-- 1. Añadir fuente SIN metadata
INSERT INTO fuentes_url (nombre, url)
VALUES ('Reuters', 'https://www.reuters.com');
-- 2. Worker ejecuta (15 min después):
-- - Descubre: reuters.com/rssfeed/worldNews
-- - Analiza: idioma=en, país=Reino Unido (dominio+idioma)
-- - Sugiere: categoría=Internacional (keyword "world")
-- - Crea en feeds_pending
-- 3. Resultado en tabla feeds_pending:
SELECT feed_title, detected_country_id, suggested_categoria_id, notes
FROM feeds_pending
WHERE fuente_url_id = 3;
-- feed_title | detected_country_id | suggested_cat | notes
-- Reuters World News | 74 (Reino Unido) | 2 (Int.) | "Country from domain..."
-- ⏳ Requiere aprobación en /feeds/pending
```
---
## ✅ CHECKLIST: Añadir Nueva Fuente
**Para auto-aprobación (recomendado si sabes país/categoría):**
- [ ] Ir a `/urls/add_source`
- [ ] Ingresar nombre descriptivo
- [ ] Ingresar URL del sitio (NO del feed RSS)
- [ ] **IMPORTANTE:** Seleccionar categoría
- [ ] **IMPORTANTE:** Seleccionar país
- [ ] Ingresar idioma (opcional, se detecta)
- [ ] Guardar
- [ ] Esperar 15 minutos (máximo)
- [ ] Ver feeds en `/feeds/` (activos automáticamente)
**Para revisión manual (si no estás seguro):**
- [ ] Ir a `/urls/add_source`
- [ ] Ingresar nombre y URL
- [ ] Dejar categoría/país vacíos
- [ ] Guardar
- [ ] Esperar 15 minutos
- [ ] Ir a `/feeds/pending`
- [ ] Revisar sugerencias automáticas
- [ ] Ajustar categoría/país si necesario
- [ ] Aprobar feeds
- [ ] Feeds se activan inmediatamente
---
## 🎓 Resumen Ejecutivo
**3 Niveles de Automatización:**
| Nivel | Descripción | Cuándo Usar |
|-------|-------------|-------------|
| **Nivel 1: Totalmente Manual** | Descubrir en `/feeds/discover` | Control total, pocas URLs |
| **Nivel 2: Auto-aprobación** | URL con cat/país → feeds activos | URLs confiables, muchas fuentes |
| **Nivel 3: Revisión Asistida** | URL sin cat/país → análisis → aprobar | URLs nuevas, verificación |
**Flujo Recomendado:**
1. Añade URL con categoría/país si la conoces
2. Si no, déjalo vacío y revisa sugerencias automáticas
3. Worker descubre y analiza todo automáticamente
4. Tú solo apruebas/ajustas lo necesario
**Resultado:** Gestión eficiente de cientos de fuentes RSS con mínima intervención manual.
---
**📅 Fecha de última actualización:** 2026-01-07
**📌 Versión del sistema:** 2.0 - Análisis Inteligente de Feeds

View file

@ -1,164 +0,0 @@
# Problema de Traducciones Repetitivas - Análisis y Solución
## 📋 Descripción del Problema
Se detectaron traducciones con texto extremadamente repetitivo, como:
- "la línea de la línea de la línea de la línea..."
- "de Internet de Internet de Internet..."
- "de la la la la..."
### Ejemplo Real Encontrado:
```
La red de conexión de Internet de Internet de la India (WIS) se encuentra
en la línea de Internet de Internet de la India (WIS) y en la línea de
Internet de Internet de la India (WIS) se encuentra en...
```
## 🔍 Causas Identificadas
1. **Repetition Penalty Insuficiente**: El modelo estaba configurado con `repetition_penalty=1.2`, demasiado bajo para prevenir bucles.
2. **N-gram Blocking Inadecuado**: `no_repeat_ngram_size=4` permitía repeticiones de frases de 3 palabras.
3. **Falta de Validación Post-Traducción**: No había verificación de calidad después de traducir.
4. **Textos Fuente Corruptos**: Algunos RSS feeds contienen HTML mal formado o texto corrupto que confunde al modelo.
## ✅ Soluciones Implementadas
### 1. Mejoras en el Translation Worker (`workers/translation_worker.py`)
#### A. Parámetros de Traducción Mejorados
```python
# ANTES:
repetition_penalty=1.2
no_repeat_ngram_size=4
# AHORA:
repetition_penalty=2.5 # Penalización mucho más agresiva
no_repeat_ngram_size=3 # Bloquea repeticiones de 3-gramas
```
#### B. Función de Validación de Calidad
Nueva función `_is_repetitive_output()` que detecta:
- Palabras repetidas 4+ veces consecutivas
- Frases de 2 palabras repetidas 3+ veces
- Patrones específicos conocidos: "de la la", "la línea de la línea", etc.
- Baja diversidad de vocabulario (< 25% palabras únicas)
#### C. Validación Post-Traducción
```python
# Rechazar traducciones repetitivas automáticamente
if _is_repetitive_output(ttr) or _is_repetitive_output(btr):
LOG.warning(f"Rejecting repetitive translation for tr_id={i['tr_id']}")
errors.append(("Repetitive output detected", i["tr_id"]))
continue
```
### 2. Script de Limpieza Automática
Creado `scripts/clean_repetitive_translations.py` que:
- Escanea todas las traducciones completadas
- Detecta patrones repetitivos
- Marca traducciones defectuosas como 'pending' para re-traducción
- Genera reportes de calidad
**Uso:**
```bash
docker exec rss2_web python3 scripts/clean_repetitive_translations.py
```
### 3. Limpieza Inicial Ejecutada
Se identificaron y marcaron **3,093 traducciones defectuosas** para re-traducción:
```sql
UPDATE traducciones
SET status='pending',
titulo_trad=NULL,
resumen_trad=NULL,
error='Repetitive output - retranslating with improved settings'
WHERE status='done'
AND (resumen_trad LIKE '%la línea de la línea%'
OR resumen_trad LIKE '%de la la %'
OR resumen_trad LIKE '%de Internet de Internet%');
```
## 🚀 Próximos Pasos
### 1. Reiniciar el Translation Worker
```bash
docker restart rss2_translation_worker
```
### 2. Monitorear Re-traducciones
Las 3,093 noticias marcadas se re-traducirán automáticamente con la nueva configuración mejorada.
### 3. Ejecutar Limpieza Periódica
Agregar al cron o scheduler:
```bash
# Cada día a las 3 AM
0 3 * * * docker exec rss2_web python3 scripts/clean_repetitive_translations.py
```
### 4. Monitoreo de Calidad
Verificar logs del translation worker para ver rechazos:
```bash
docker logs -f rss2_translation_worker | grep "Rejecting repetitive"
```
## 📊 Métricas de Calidad
### Antes de la Solución:
- ~3,093 traducciones defectuosas detectadas
- ~X% de tasa de error (calculado sobre total de traducciones)
### Después de la Solución:
- Validación automática en tiempo real
- Rechazo inmediato de outputs repetitivos
- Re-traducción automática con mejores parámetros
## 🔧 Configuración Adicional Recomendada
### Variables de Entorno (.env)
```bash
# Aumentar batch size para mejor contexto
TRANSLATOR_BATCH=64 # Actual: 128 (OK)
# Ajustar beams para mejor calidad
NUM_BEAMS_TITLE=3
NUM_BEAMS_BODY=3
# Tokens máximos
MAX_NEW_TOKENS_TITLE=128
MAX_NEW_TOKENS_BODY=512
```
## 📝 Notas Técnicas
### ¿Por qué ocurre este problema?
Los modelos de traducción neuronal (como NLLB) pueden entrar en "bucles de repetición" cuando:
1. El texto fuente está corrupto o mal formado
2. El contexto es muy largo y pierde coherencia
3. La penalización por repetición es insuficiente
4. Hay patrones ambiguos en el texto fuente
### Prevención a Largo Plazo
1. **Validación de Entrada**: Limpiar HTML y texto corrupto antes de traducir
2. **Chunking Inteligente**: Dividir textos largos en segmentos coherentes
3. **Monitoreo Continuo**: Ejecutar script de limpieza regularmente
4. **Logs Detallados**: Analizar qué tipos de textos causan problemas
## 🎯 Resultados Esperados
Con estas mejoras, se espera:
- ✅ Eliminación del 99%+ de traducciones repetitivas
- ✅ Mejor calidad general de traducciones
- ✅ Detección automática de problemas
- ✅ Re-traducción automática de contenido defectuoso
---
**Fecha de Implementación**: 2026-01-28
**Estado**: ✅ Implementado y Activo

18
feeds.csv Normal file
View file

@ -0,0 +1,18 @@
id,nombre,descripcion,url,categoria_id,categoria,pais_id,pais,idioma,activo,fallos
19,8am Daily Dari,روزنامه ۸صبح افغانستان به زبان دری.,https://8am.af/feed,7,Internacional,1,Afganistán,fa,False,30
20,8am Daily Pashto,د ۸صبح ورځپاڼې پښتو خپرونه.,https://8am.af/ps/feed,7,Internacional,1,Afganistán,ps,False,30
1,Afghanistan News.Net Noticias,Cobertura continua de noticias generales sobre Afganistán y su entorno regional.,https://feeds.afghanistannews.net/rss/6e1d5c8e1f98f17c,7,Internacional,1,Afganistán,en,True,0
36,Arezo TV Dari,آرزو تلویزیون خبر و گزارش به دری.,https://arezo.tv/fa/feed,7,Internacional,1,Afganistán,fa,False,30
37,Arezo TV Pashto,د آرزو تلویزیون پښتو خبرونه.,https://arezo.tv/ps/feed,7,Internacional,1,Afganistán,ps,False,30
4,Ariana News Dari,خبرها و تحلیل‌ها از افغانستان به زبان دری.,https://ariananews.af/feed,7,Internacional,1,Afganistán,fa,True,0
28,Avapress Dari,خبرگزاری صدای افغان (آوا) به زبان دری.,https://avapress.com/fa/rss,7,Internacional,1,Afganistán,fa,False,30
29,Avapress Pashto,د افغان غږ خبري آژانس په پښتو ژبه.,https://avapress.com/ps/rss,7,Internacional,1,Afganistán,ps,False,30
23,Bakhtar News Agency Dari,آژانس خبری باختر به زبان دری.,https://bakhtarnews.af/fa/feed,7,Internacional,1,Afganistán,fa,False,5
24,Bakhtar News Agency Pashto,د باختر خبري اژانس پښتو پاڼه.,https://bakhtarnews.af/ps/feed,7,Internacional,1,Afganistán,ps,False,5
38,Barya News Dari,باریانیوز رسانه خبری افغانستان.,https://barya.news/feed,7,Internacional,1,Afganistán,fa,False,30
39,Barya News Pashto,باریانیوز پښتو خپرونې.,https://barya.news/ps/feed,7,Internacional,1,Afganistán,ps,False,30
47,Chaprast News Dari,چپرست نیوز خبرهای افغانستان.,https://chaprast.com/feed,7,Internacional,1,Afganistán,fa,False,30
30,Ensaf News Dari,رسانه تحلیلی به زبان دری.,https://www.ensafnews.com/fa/feed,7,Internacional,1,Afganistán,fa,False,30
27,Hamshahri Afghanistan Dari,نسخه افغانستان همشهری به زبان دری.,https://hamshahri.af/feed,7,Internacional,1,Afganistán,fa,False,30
44,Jomhor News Dari,جمهور نیوز خبرگزاری مستقل دری.,https://jomhornews.com/fa/rss,7,Internacional,1,Afganistán,fa,False,30
45,Jomhor News Pashto,جمهور نیوز پښتو.,https://jomhornews.com/ps/rss,7,Internacional,1,Afganistán,ps,False,30
1 id nombre descripcion url categoria_id categoria pais_id pais idioma activo fallos
2 19 8am Daily – Dari روزنامه ۸صبح افغانستان به زبان دری. https://8am.af/feed 7 Internacional 1 Afganistán fa False 30
3 20 8am Daily – Pashto د ۸صبح ورځپاڼې پښتو خپرونه. https://8am.af/ps/feed 7 Internacional 1 Afganistán ps False 30
4 1 Afghanistan News.Net – Noticias Cobertura continua de noticias generales sobre Afganistán y su entorno regional. https://feeds.afghanistannews.net/rss/6e1d5c8e1f98f17c 7 Internacional 1 Afganistán en True 0
5 36 Arezo TV – Dari آرزو تلویزیون – خبر و گزارش به دری. https://arezo.tv/fa/feed 7 Internacional 1 Afganistán fa False 30
6 37 Arezo TV – Pashto د آرزو تلویزیون پښتو خبرونه. https://arezo.tv/ps/feed 7 Internacional 1 Afganistán ps False 30
7 4 Ariana News – Dari خبرها و تحلیل‌ها از افغانستان به زبان دری. https://ariananews.af/feed 7 Internacional 1 Afganistán fa True 0
8 28 Avapress – Dari خبرگزاری صدای افغان (آوا) به زبان دری. https://avapress.com/fa/rss 7 Internacional 1 Afganistán fa False 30
9 29 Avapress – Pashto د افغان غږ خبري آژانس په پښتو ژبه. https://avapress.com/ps/rss 7 Internacional 1 Afganistán ps False 30
10 23 Bakhtar News Agency – Dari آژانس خبری باختر به زبان دری. https://bakhtarnews.af/fa/feed 7 Internacional 1 Afganistán fa False 5
11 24 Bakhtar News Agency – Pashto د باختر خبري اژانس پښتو پاڼه. https://bakhtarnews.af/ps/feed 7 Internacional 1 Afganistán ps False 5
12 38 Barya News – Dari باریانیوز – رسانه خبری افغانستان. https://barya.news/feed 7 Internacional 1 Afganistán fa False 30
13 39 Barya News – Pashto باریانیوز پښتو خپرونې. https://barya.news/ps/feed 7 Internacional 1 Afganistán ps False 30
14 47 Chaprast News – Dari چپرست نیوز – خبرهای افغانستان. https://chaprast.com/feed 7 Internacional 1 Afganistán fa False 30
15 30 Ensaf News – Dari رسانه تحلیلی به زبان دری. https://www.ensafnews.com/fa/feed 7 Internacional 1 Afganistán fa False 30
16 27 Hamshahri Afghanistan – Dari نسخه افغانستان همشهری به زبان دری. https://hamshahri.af/feed 7 Internacional 1 Afganistán fa False 30
17 44 Jomhor News – Dari جمهور نیوز – خبرگزاری مستقل دری. https://jomhornews.com/fa/rss 7 Internacional 1 Afganistán fa False 30
18 45 Jomhor News – Pashto جمهور نیوز پښتو. https://jomhornews.com/ps/rss 7 Internacional 1 Afganistán ps False 30

19
frontend/Dockerfile Normal file
View file

@ -0,0 +1,19 @@
FROM node:20-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm install
COPY . .
RUN npm run build
FROM nginx:alpine
COPY --from=builder /app/dist /usr/share/nginx/html
COPY nginx.conf /etc/nginx/nginx.conf
EXPOSE 80
CMD ["nginx", "-g", "daemon off;"]

13
frontend/index.html Normal file
View file

@ -0,0 +1,13 @@
<!doctype html>
<html lang="es">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>RSS2 - Noticias del Mundo</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>

33
frontend/nginx.conf Normal file
View file

@ -0,0 +1,33 @@
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
resolver 127.0.0.11 valid=10s;
server {
listen 80;
server_name localhost;
root /usr/share/nginx/html;
index index.html;
location / {
try_files $uri $uri/ /index.html;
}
location /api {
proxy_pass http://backend-go:8080;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_cache_bypass $http_upgrade;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
}

1
frontend/node_modules/.bin/autoprefixer generated vendored Symbolic link
View file

@ -0,0 +1 @@
../autoprefixer/bin/autoprefixer

1
frontend/node_modules/.bin/baseline-browser-mapping generated vendored Symbolic link
View file

@ -0,0 +1 @@
../baseline-browser-mapping/dist/cli.cjs

1
frontend/node_modules/.bin/browserslist generated vendored Symbolic link
View file

@ -0,0 +1 @@
../browserslist/cli.js

1
frontend/node_modules/.bin/cssesc generated vendored Symbolic link
View file

@ -0,0 +1 @@
../cssesc/bin/cssesc

1
frontend/node_modules/.bin/esbuild generated vendored Symbolic link
View file

@ -0,0 +1 @@
../esbuild/bin/esbuild

1
frontend/node_modules/.bin/jiti generated vendored Symbolic link
View file

@ -0,0 +1 @@
../jiti/bin/jiti.js

1
frontend/node_modules/.bin/jsesc generated vendored Symbolic link
View file

@ -0,0 +1 @@
../jsesc/bin/jsesc

1
frontend/node_modules/.bin/json5 generated vendored Symbolic link
View file

@ -0,0 +1 @@
../json5/lib/cli.js

1
frontend/node_modules/.bin/loose-envify generated vendored Symbolic link
View file

@ -0,0 +1 @@
../loose-envify/cli.js

1
frontend/node_modules/.bin/nanoid generated vendored Symbolic link
View file

@ -0,0 +1 @@
../nanoid/bin/nanoid.cjs

1
frontend/node_modules/.bin/parser generated vendored Symbolic link
View file

@ -0,0 +1 @@
../@babel/parser/bin/babel-parser.js

1
frontend/node_modules/.bin/resolve generated vendored Symbolic link
View file

@ -0,0 +1 @@
../resolve/bin/resolve

1
frontend/node_modules/.bin/rollup generated vendored Symbolic link
View file

@ -0,0 +1 @@
../rollup/dist/bin/rollup

1
frontend/node_modules/.bin/semver generated vendored Symbolic link
View file

@ -0,0 +1 @@
../semver/bin/semver.js

1
frontend/node_modules/.bin/sucrase generated vendored Symbolic link
View file

@ -0,0 +1 @@
../sucrase/bin/sucrase

1
frontend/node_modules/.bin/sucrase-node generated vendored Symbolic link
View file

@ -0,0 +1 @@
../sucrase/bin/sucrase-node

1
frontend/node_modules/.bin/tailwind generated vendored Symbolic link
View file

@ -0,0 +1 @@
../tailwindcss/lib/cli.js

1
frontend/node_modules/.bin/tailwindcss generated vendored Symbolic link
View file

@ -0,0 +1 @@
../tailwindcss/lib/cli.js

1
frontend/node_modules/.bin/tsc generated vendored Symbolic link
View file

@ -0,0 +1 @@
../typescript/bin/tsc

1
frontend/node_modules/.bin/tsserver generated vendored Symbolic link
View file

@ -0,0 +1 @@
../typescript/bin/tsserver

1
frontend/node_modules/.bin/update-browserslist-db generated vendored Symbolic link
View file

@ -0,0 +1 @@
../update-browserslist-db/cli.js

1
frontend/node_modules/.bin/vite generated vendored Symbolic link
View file

@ -0,0 +1 @@
../vite/bin/vite.js

2284
frontend/node_modules/.package-lock.json generated vendored Normal file

File diff suppressed because it is too large Load diff

128
frontend/node_modules/@alloc/quick-lru/index.d.ts generated vendored Normal file
View file

@ -0,0 +1,128 @@
declare namespace QuickLRU {
interface Options<KeyType, ValueType> {
/**
The maximum number of milliseconds an item should remain in the cache.
@default Infinity
By default, `maxAge` will be `Infinity`, which means that items will never expire.
Lazy expiration upon the next write or read call.
Individual expiration of an item can be specified by the `set(key, value, maxAge)` method.
*/
readonly maxAge?: number;
/**
The maximum number of items before evicting the least recently used items.
*/
readonly maxSize: number;
/**
Called right before an item is evicted from the cache.
Useful for side effects or for items like object URLs that need explicit cleanup (`revokeObjectURL`).
*/
onEviction?: (key: KeyType, value: ValueType) => void;
}
}
declare class QuickLRU<KeyType, ValueType>
implements Iterable<[KeyType, ValueType]> {
/**
The stored item count.
*/
readonly size: number;
/**
Simple ["Least Recently Used" (LRU) cache](https://en.m.wikipedia.org/wiki/Cache_replacement_policies#Least_Recently_Used_.28LRU.29).
The instance is [`iterable`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Iteration_protocols) so you can use it directly in a [`for…of`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Statements/for...of) loop.
@example
```
import QuickLRU = require('quick-lru');
const lru = new QuickLRU({maxSize: 1000});
lru.set('🦄', '🌈');
lru.has('🦄');
//=> true
lru.get('🦄');
//=> '🌈'
```
*/
constructor(options: QuickLRU.Options<KeyType, ValueType>);
[Symbol.iterator](): IterableIterator<[KeyType, ValueType]>;
/**
Set an item. Returns the instance.
Individual expiration of an item can be specified with the `maxAge` option. If not specified, the global `maxAge` value will be used in case it is specified in the constructor, otherwise the item will never expire.
@returns The list instance.
*/
set(key: KeyType, value: ValueType, options?: {maxAge?: number}): this;
/**
Get an item.
@returns The stored item or `undefined`.
*/
get(key: KeyType): ValueType | undefined;
/**
Check if an item exists.
*/
has(key: KeyType): boolean;
/**
Get an item without marking it as recently used.
@returns The stored item or `undefined`.
*/
peek(key: KeyType): ValueType | undefined;
/**
Delete an item.
@returns `true` if the item is removed or `false` if the item doesn't exist.
*/
delete(key: KeyType): boolean;
/**
Delete all items.
*/
clear(): void;
/**
Update the `maxSize` in-place, discarding items as necessary. Insertion order is mostly preserved, though this is not a strong guarantee.
Useful for on-the-fly tuning of cache sizes in live systems.
*/
resize(maxSize: number): void;
/**
Iterable for all the keys.
*/
keys(): IterableIterator<KeyType>;
/**
Iterable for all the values.
*/
values(): IterableIterator<ValueType>;
/**
Iterable for all entries, starting with the oldest (ascending in recency).
*/
entriesAscending(): IterableIterator<[KeyType, ValueType]>;
/**
Iterable for all entries, starting with the newest (descending in recency).
*/
entriesDescending(): IterableIterator<[KeyType, ValueType]>;
}
export = QuickLRU;

263
frontend/node_modules/@alloc/quick-lru/index.js generated vendored Normal file
View file

@ -0,0 +1,263 @@
'use strict';
class QuickLRU {
constructor(options = {}) {
if (!(options.maxSize && options.maxSize > 0)) {
throw new TypeError('`maxSize` must be a number greater than 0');
}
if (typeof options.maxAge === 'number' && options.maxAge === 0) {
throw new TypeError('`maxAge` must be a number greater than 0');
}
this.maxSize = options.maxSize;
this.maxAge = options.maxAge || Infinity;
this.onEviction = options.onEviction;
this.cache = new Map();
this.oldCache = new Map();
this._size = 0;
}
_emitEvictions(cache) {
if (typeof this.onEviction !== 'function') {
return;
}
for (const [key, item] of cache) {
this.onEviction(key, item.value);
}
}
_deleteIfExpired(key, item) {
if (typeof item.expiry === 'number' && item.expiry <= Date.now()) {
if (typeof this.onEviction === 'function') {
this.onEviction(key, item.value);
}
return this.delete(key);
}
return false;
}
_getOrDeleteIfExpired(key, item) {
const deleted = this._deleteIfExpired(key, item);
if (deleted === false) {
return item.value;
}
}
_getItemValue(key, item) {
return item.expiry ? this._getOrDeleteIfExpired(key, item) : item.value;
}
_peek(key, cache) {
const item = cache.get(key);
return this._getItemValue(key, item);
}
_set(key, value) {
this.cache.set(key, value);
this._size++;
if (this._size >= this.maxSize) {
this._size = 0;
this._emitEvictions(this.oldCache);
this.oldCache = this.cache;
this.cache = new Map();
}
}
_moveToRecent(key, item) {
this.oldCache.delete(key);
this._set(key, item);
}
* _entriesAscending() {
for (const item of this.oldCache) {
const [key, value] = item;
if (!this.cache.has(key)) {
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield item;
}
}
}
for (const item of this.cache) {
const [key, value] = item;
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield item;
}
}
}
get(key) {
if (this.cache.has(key)) {
const item = this.cache.get(key);
return this._getItemValue(key, item);
}
if (this.oldCache.has(key)) {
const item = this.oldCache.get(key);
if (this._deleteIfExpired(key, item) === false) {
this._moveToRecent(key, item);
return item.value;
}
}
}
set(key, value, {maxAge = this.maxAge === Infinity ? undefined : Date.now() + this.maxAge} = {}) {
if (this.cache.has(key)) {
this.cache.set(key, {
value,
maxAge
});
} else {
this._set(key, {value, expiry: maxAge});
}
}
has(key) {
if (this.cache.has(key)) {
return !this._deleteIfExpired(key, this.cache.get(key));
}
if (this.oldCache.has(key)) {
return !this._deleteIfExpired(key, this.oldCache.get(key));
}
return false;
}
peek(key) {
if (this.cache.has(key)) {
return this._peek(key, this.cache);
}
if (this.oldCache.has(key)) {
return this._peek(key, this.oldCache);
}
}
delete(key) {
const deleted = this.cache.delete(key);
if (deleted) {
this._size--;
}
return this.oldCache.delete(key) || deleted;
}
clear() {
this.cache.clear();
this.oldCache.clear();
this._size = 0;
}
resize(newSize) {
if (!(newSize && newSize > 0)) {
throw new TypeError('`maxSize` must be a number greater than 0');
}
const items = [...this._entriesAscending()];
const removeCount = items.length - newSize;
if (removeCount < 0) {
this.cache = new Map(items);
this.oldCache = new Map();
this._size = items.length;
} else {
if (removeCount > 0) {
this._emitEvictions(items.slice(0, removeCount));
}
this.oldCache = new Map(items.slice(removeCount));
this.cache = new Map();
this._size = 0;
}
this.maxSize = newSize;
}
* keys() {
for (const [key] of this) {
yield key;
}
}
* values() {
for (const [, value] of this) {
yield value;
}
}
* [Symbol.iterator]() {
for (const item of this.cache) {
const [key, value] = item;
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield [key, value.value];
}
}
for (const item of this.oldCache) {
const [key, value] = item;
if (!this.cache.has(key)) {
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield [key, value.value];
}
}
}
}
* entriesDescending() {
let items = [...this.cache];
for (let i = items.length - 1; i >= 0; --i) {
const item = items[i];
const [key, value] = item;
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield [key, value.value];
}
}
items = [...this.oldCache];
for (let i = items.length - 1; i >= 0; --i) {
const item = items[i];
const [key, value] = item;
if (!this.cache.has(key)) {
const deleted = this._deleteIfExpired(key, value);
if (deleted === false) {
yield [key, value.value];
}
}
}
}
* entriesAscending() {
for (const [key, value] of this._entriesAscending()) {
yield [key, value.value];
}
}
get size() {
if (!this._size) {
return this.oldCache.size;
}
let oldCacheSize = 0;
for (const key of this.oldCache.keys()) {
if (!this.cache.has(key)) {
oldCacheSize++;
}
}
return Math.min(this._size + oldCacheSize, this.maxSize);
}
}
module.exports = QuickLRU;

9
frontend/node_modules/@alloc/quick-lru/license generated vendored Normal file
View file

@ -0,0 +1,9 @@
MIT License
Copyright (c) Sindre Sorhus <sindresorhus@gmail.com> (sindresorhus.com)
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

43
frontend/node_modules/@alloc/quick-lru/package.json generated vendored Normal file
View file

@ -0,0 +1,43 @@
{
"name": "@alloc/quick-lru",
"version": "5.2.0",
"description": "Simple “Least Recently Used” (LRU) cache",
"license": "MIT",
"repository": "sindresorhus/quick-lru",
"funding": "https://github.com/sponsors/sindresorhus",
"author": {
"name": "Sindre Sorhus",
"email": "sindresorhus@gmail.com",
"url": "https://sindresorhus.com"
},
"engines": {
"node": ">=10"
},
"scripts": {
"test": "xo && nyc ava && tsd"
},
"files": [
"index.js",
"index.d.ts"
],
"keywords": [
"lru",
"quick",
"cache",
"caching",
"least",
"recently",
"used",
"fast",
"map",
"hash",
"buffer"
],
"devDependencies": {
"ava": "^2.0.0",
"coveralls": "^3.0.3",
"nyc": "^15.0.0",
"tsd": "^0.11.0",
"xo": "^0.26.0"
}
}

139
frontend/node_modules/@alloc/quick-lru/readme.md generated vendored Normal file
View file

@ -0,0 +1,139 @@
# quick-lru [![Build Status](https://travis-ci.org/sindresorhus/quick-lru.svg?branch=master)](https://travis-ci.org/sindresorhus/quick-lru) [![Coverage Status](https://coveralls.io/repos/github/sindresorhus/quick-lru/badge.svg?branch=master)](https://coveralls.io/github/sindresorhus/quick-lru?branch=master)
> Simple [“Least Recently Used” (LRU) cache](https://en.m.wikipedia.org/wiki/Cache_replacement_policies#Least_Recently_Used_.28LRU.29)
Useful when you need to cache something and limit memory usage.
Inspired by the [`hashlru` algorithm](https://github.com/dominictarr/hashlru#algorithm), but instead uses [`Map`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Map) to support keys of any type, not just strings, and values can be `undefined`.
## Install
```
$ npm install quick-lru
```
## Usage
```js
const QuickLRU = require('quick-lru');
const lru = new QuickLRU({maxSize: 1000});
lru.set('🦄', '🌈');
lru.has('🦄');
//=> true
lru.get('🦄');
//=> '🌈'
```
## API
### new QuickLRU(options?)
Returns a new instance.
### options
Type: `object`
#### maxSize
*Required*\
Type: `number`
The maximum number of items before evicting the least recently used items.
#### maxAge
Type: `number`\
Default: `Infinity`
The maximum number of milliseconds an item should remain in cache.
By default maxAge will be Infinity, which means that items will never expire.
Lazy expiration happens upon the next `write` or `read` call.
Individual expiration of an item can be specified by the `set(key, value, options)` method.
#### onEviction
*Optional*\
Type: `(key, value) => void`
Called right before an item is evicted from the cache.
Useful for side effects or for items like object URLs that need explicit cleanup (`revokeObjectURL`).
### Instance
The instance is [`iterable`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Iteration_protocols) so you can use it directly in a [`for…of`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Statements/for...of) loop.
Both `key` and `value` can be of any type.
#### .set(key, value, options?)
Set an item. Returns the instance.
Individual expiration of an item can be specified with the `maxAge` option. If not specified, the global `maxAge` value will be used in case it is specified on the constructor, otherwise the item will never expire.
#### .get(key)
Get an item.
#### .has(key)
Check if an item exists.
#### .peek(key)
Get an item without marking it as recently used.
#### .delete(key)
Delete an item.
Returns `true` if the item is removed or `false` if the item doesn't exist.
#### .clear()
Delete all items.
#### .resize(maxSize)
Update the `maxSize`, discarding items as necessary. Insertion order is mostly preserved, though this is not a strong guarantee.
Useful for on-the-fly tuning of cache sizes in live systems.
#### .keys()
Iterable for all the keys.
#### .values()
Iterable for all the values.
#### .entriesAscending()
Iterable for all entries, starting with the oldest (ascending in recency).
#### .entriesDescending()
Iterable for all entries, starting with the newest (descending in recency).
#### .size
The stored item count.
---
<div align="center">
<b>
<a href="https://tidelift.com/subscription/pkg/npm-quick-lru?utm_source=npm-quick-lru&utm_medium=referral&utm_campaign=readme">Get professional support for this package with a Tidelift subscription</a>
</b>
<br>
<sub>
Tidelift helps make open source sustainable for maintainers while giving companies<br>assurances about security, maintenance, and licensing for their dependencies.
</sub>
</div>

22
frontend/node_modules/@babel/code-frame/LICENSE generated vendored Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2014-present Sebastian McKenzie and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

19
frontend/node_modules/@babel/code-frame/README.md generated vendored Normal file
View file

@ -0,0 +1,19 @@
# @babel/code-frame
> Generate errors that contain a code frame that point to source locations.
See our website [@babel/code-frame](https://babeljs.io/docs/babel-code-frame) for more information.
## Install
Using npm:
```sh
npm install --save-dev @babel/code-frame
```
or using yarn:
```sh
yarn add @babel/code-frame --dev
```

217
frontend/node_modules/@babel/code-frame/lib/index.js generated vendored Normal file
View file

@ -0,0 +1,217 @@
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
var picocolors = require('picocolors');
var jsTokens = require('js-tokens');
var helperValidatorIdentifier = require('@babel/helper-validator-identifier');
function isColorSupported() {
return (typeof process === "object" && (process.env.FORCE_COLOR === "0" || process.env.FORCE_COLOR === "false") ? false : picocolors.isColorSupported
);
}
const compose = (f, g) => v => f(g(v));
function buildDefs(colors) {
return {
keyword: colors.cyan,
capitalized: colors.yellow,
jsxIdentifier: colors.yellow,
punctuator: colors.yellow,
number: colors.magenta,
string: colors.green,
regex: colors.magenta,
comment: colors.gray,
invalid: compose(compose(colors.white, colors.bgRed), colors.bold),
gutter: colors.gray,
marker: compose(colors.red, colors.bold),
message: compose(colors.red, colors.bold),
reset: colors.reset
};
}
const defsOn = buildDefs(picocolors.createColors(true));
const defsOff = buildDefs(picocolors.createColors(false));
function getDefs(enabled) {
return enabled ? defsOn : defsOff;
}
const sometimesKeywords = new Set(["as", "async", "from", "get", "of", "set"]);
const NEWLINE$1 = /\r\n|[\n\r\u2028\u2029]/;
const BRACKET = /^[()[\]{}]$/;
let tokenize;
const JSX_TAG = /^[a-z][\w-]*$/i;
const getTokenType = function (token, offset, text) {
if (token.type === "name") {
const tokenValue = token.value;
if (helperValidatorIdentifier.isKeyword(tokenValue) || helperValidatorIdentifier.isStrictReservedWord(tokenValue, true) || sometimesKeywords.has(tokenValue)) {
return "keyword";
}
if (JSX_TAG.test(tokenValue) && (text[offset - 1] === "<" || text.slice(offset - 2, offset) === "</")) {
return "jsxIdentifier";
}
const firstChar = String.fromCodePoint(tokenValue.codePointAt(0));
if (firstChar !== firstChar.toLowerCase()) {
return "capitalized";
}
}
if (token.type === "punctuator" && BRACKET.test(token.value)) {
return "bracket";
}
if (token.type === "invalid" && (token.value === "@" || token.value === "#")) {
return "punctuator";
}
return token.type;
};
tokenize = function* (text) {
let match;
while (match = jsTokens.default.exec(text)) {
const token = jsTokens.matchToToken(match);
yield {
type: getTokenType(token, match.index, text),
value: token.value
};
}
};
function highlight(text) {
if (text === "") return "";
const defs = getDefs(true);
let highlighted = "";
for (const {
type,
value
} of tokenize(text)) {
if (type in defs) {
highlighted += value.split(NEWLINE$1).map(str => defs[type](str)).join("\n");
} else {
highlighted += value;
}
}
return highlighted;
}
let deprecationWarningShown = false;
const NEWLINE = /\r\n|[\n\r\u2028\u2029]/;
function getMarkerLines(loc, source, opts, startLineBaseZero) {
const startLoc = Object.assign({
column: 0,
line: -1
}, loc.start);
const endLoc = Object.assign({}, startLoc, loc.end);
const {
linesAbove = 2,
linesBelow = 3
} = opts || {};
const startLine = startLoc.line - startLineBaseZero;
const startColumn = startLoc.column;
const endLine = endLoc.line - startLineBaseZero;
const endColumn = endLoc.column;
let start = Math.max(startLine - (linesAbove + 1), 0);
let end = Math.min(source.length, endLine + linesBelow);
if (startLine === -1) {
start = 0;
}
if (endLine === -1) {
end = source.length;
}
const lineDiff = endLine - startLine;
const markerLines = {};
if (lineDiff) {
for (let i = 0; i <= lineDiff; i++) {
const lineNumber = i + startLine;
if (!startColumn) {
markerLines[lineNumber] = true;
} else if (i === 0) {
const sourceLength = source[lineNumber - 1].length;
markerLines[lineNumber] = [startColumn, sourceLength - startColumn + 1];
} else if (i === lineDiff) {
markerLines[lineNumber] = [0, endColumn];
} else {
const sourceLength = source[lineNumber - i].length;
markerLines[lineNumber] = [0, sourceLength];
}
}
} else {
if (startColumn === endColumn) {
if (startColumn) {
markerLines[startLine] = [startColumn, 0];
} else {
markerLines[startLine] = true;
}
} else {
markerLines[startLine] = [startColumn, endColumn - startColumn];
}
}
return {
start,
end,
markerLines
};
}
function codeFrameColumns(rawLines, loc, opts = {}) {
const shouldHighlight = opts.forceColor || isColorSupported() && opts.highlightCode;
const startLineBaseZero = (opts.startLine || 1) - 1;
const defs = getDefs(shouldHighlight);
const lines = rawLines.split(NEWLINE);
const {
start,
end,
markerLines
} = getMarkerLines(loc, lines, opts, startLineBaseZero);
const hasColumns = loc.start && typeof loc.start.column === "number";
const numberMaxWidth = String(end + startLineBaseZero).length;
const highlightedLines = shouldHighlight ? highlight(rawLines) : rawLines;
let frame = highlightedLines.split(NEWLINE, end).slice(start, end).map((line, index) => {
const number = start + 1 + index;
const paddedNumber = ` ${number + startLineBaseZero}`.slice(-numberMaxWidth);
const gutter = ` ${paddedNumber} |`;
const hasMarker = markerLines[number];
const lastMarkerLine = !markerLines[number + 1];
if (hasMarker) {
let markerLine = "";
if (Array.isArray(hasMarker)) {
const markerSpacing = line.slice(0, Math.max(hasMarker[0] - 1, 0)).replace(/[^\t]/g, " ");
const numberOfMarkers = hasMarker[1] || 1;
markerLine = ["\n ", defs.gutter(gutter.replace(/\d/g, " ")), " ", markerSpacing, defs.marker("^").repeat(numberOfMarkers)].join("");
if (lastMarkerLine && opts.message) {
markerLine += " " + defs.message(opts.message);
}
}
return [defs.marker(">"), defs.gutter(gutter), line.length > 0 ? ` ${line}` : "", markerLine].join("");
} else {
return ` ${defs.gutter(gutter)}${line.length > 0 ? ` ${line}` : ""}`;
}
}).join("\n");
if (opts.message && !hasColumns) {
frame = `${" ".repeat(numberMaxWidth + 1)}${opts.message}\n${frame}`;
}
if (shouldHighlight) {
return defs.reset(frame);
} else {
return frame;
}
}
function index (rawLines, lineNumber, colNumber, opts = {}) {
if (!deprecationWarningShown) {
deprecationWarningShown = true;
const message = "Passing lineNumber and colNumber is deprecated to @babel/code-frame. Please use `codeFrameColumns`.";
if (process.emitWarning) {
process.emitWarning(message, "DeprecationWarning");
} else {
const deprecationError = new Error(message);
deprecationError.name = "DeprecationWarning";
console.warn(new Error(message));
}
}
colNumber = Math.max(colNumber, 0);
const location = {
start: {
column: colNumber,
line: lineNumber
}
};
return codeFrameColumns(rawLines, location, opts);
}
exports.codeFrameColumns = codeFrameColumns;
exports.default = index;
exports.highlight = highlight;
//# sourceMappingURL=index.js.map

File diff suppressed because one or more lines are too long

32
frontend/node_modules/@babel/code-frame/package.json generated vendored Normal file
View file

@ -0,0 +1,32 @@
{
"name": "@babel/code-frame",
"version": "7.29.0",
"description": "Generate errors that contain a code frame that point to source locations.",
"author": "The Babel Team (https://babel.dev/team)",
"homepage": "https://babel.dev/docs/en/next/babel-code-frame",
"bugs": "https://github.com/babel/babel/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen",
"license": "MIT",
"publishConfig": {
"access": "public"
},
"repository": {
"type": "git",
"url": "https://github.com/babel/babel.git",
"directory": "packages/babel-code-frame"
},
"main": "./lib/index.js",
"dependencies": {
"@babel/helper-validator-identifier": "^7.28.5",
"js-tokens": "^4.0.0",
"picocolors": "^1.1.1"
},
"devDependencies": {
"charcodes": "^0.2.0",
"import-meta-resolve": "^4.1.0",
"strip-ansi": "^4.0.0"
},
"engines": {
"node": ">=6.9.0"
},
"type": "commonjs"
}

22
frontend/node_modules/@babel/compat-data/LICENSE generated vendored Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2014-present Sebastian McKenzie and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

19
frontend/node_modules/@babel/compat-data/README.md generated vendored Normal file
View file

@ -0,0 +1,19 @@
# @babel/compat-data
> The compat-data to determine required Babel plugins
See our website [@babel/compat-data](https://babeljs.io/docs/babel-compat-data) for more information.
## Install
Using npm:
```sh
npm install --save @babel/compat-data
```
or using yarn:
```sh
yarn add @babel/compat-data
```

View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file as Babel 8 drop support of core-js 2
module.exports = require("./data/corejs2-built-ins.json");

View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file now that it is included in babel-plugin-polyfill-corejs3
module.exports = require("./data/corejs3-shipped-proposals.json");

View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
module.exports = require("./data/native-modules.json");

View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
module.exports = require("./data/overlapping-plugins.json");

40
frontend/node_modules/@babel/compat-data/package.json generated vendored Normal file
View file

@ -0,0 +1,40 @@
{
"name": "@babel/compat-data",
"version": "7.29.0",
"author": "The Babel Team (https://babel.dev/team)",
"license": "MIT",
"description": "The compat-data to determine required Babel plugins",
"repository": {
"type": "git",
"url": "https://github.com/babel/babel.git",
"directory": "packages/babel-compat-data"
},
"publishConfig": {
"access": "public"
},
"exports": {
"./plugins": "./plugins.js",
"./native-modules": "./native-modules.js",
"./corejs2-built-ins": "./corejs2-built-ins.js",
"./corejs3-shipped-proposals": "./corejs3-shipped-proposals.js",
"./overlapping-plugins": "./overlapping-plugins.js",
"./plugin-bugfixes": "./plugin-bugfixes.js"
},
"scripts": {
"build-data": "./scripts/download-compat-table.sh && node ./scripts/build-data.mjs && node ./scripts/build-modules-support.mjs && node ./scripts/build-bugfixes-targets.mjs"
},
"keywords": [
"babel",
"compat-table",
"compat-data"
],
"devDependencies": {
"@mdn/browser-compat-data": "^6.0.8",
"core-js-compat": "^3.48.0",
"electron-to-chromium": "^1.5.278"
},
"engines": {
"node": ">=6.9.0"
},
"type": "commonjs"
}

View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
module.exports = require("./data/plugin-bugfixes.json");

2
frontend/node_modules/@babel/compat-data/plugins.js generated vendored Normal file
View file

@ -0,0 +1,2 @@
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
module.exports = require("./data/plugins.json");

22
frontend/node_modules/@babel/core/LICENSE generated vendored Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2014-present Sebastian McKenzie and other contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

19
frontend/node_modules/@babel/core/README.md generated vendored Normal file
View file

@ -0,0 +1,19 @@
# @babel/core
> Babel compiler core.
See our website [@babel/core](https://babeljs.io/docs/babel-core) for more information or the [issues](https://github.com/babel/babel/issues?utf8=%E2%9C%93&q=is%3Aissue+label%3A%22pkg%3A%20core%22+is%3Aopen) associated with this package.
## Install
Using npm:
```sh
npm install --save-dev @babel/core
```
or using yarn:
```sh
yarn add @babel/core --dev
```

Some files were not shown because too many files have changed in this diff Show more