go integration and wikipedia
This commit is contained in:
parent
47a252e339
commit
ee90335b92
7828 changed files with 1307913 additions and 20807 deletions
|
|
@ -22,9 +22,13 @@ ALLTALK_URL=http://host.docker.internal:7851
|
|||
|
||||
# AI Models & Workers
|
||||
RSS_MAX_WORKERS=3
|
||||
# Translation Pipeline
|
||||
TARGET_LANGS=es
|
||||
TRANSLATOR_BATCH=128
|
||||
ENQUEUE=300
|
||||
TRANSLATOR_BATCH=16
|
||||
SCHEDULER_BATCH=2000
|
||||
SCHEDULER_SLEEP=30
|
||||
LANG_DETECT_BATCH=1000
|
||||
LANG_DETECT_SLEEP=60
|
||||
|
||||
# RSS Ingestor Configuration
|
||||
RSS_POKE_INTERVAL_MIN=15
|
||||
|
|
|
|||
79
Dockerfile
79
Dockerfile
|
|
@ -1,67 +1,50 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
# CUDA o CPU
|
||||
ARG TORCH_CUDA=cu121
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Dependencias del sistema
|
||||
# --------------------------------------------------------
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libpq-dev \
|
||||
gcc \
|
||||
git \
|
||||
libcairo2 \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libgdk-pixbuf-2.0-0 \
|
||||
libffi-dev \
|
||||
shared-mime-info \
|
||||
libpq-dev gcc git curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
TOKENIZERS_PARALLELISM=false \
|
||||
HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
|
||||
HF_HOME=/root/.cache/huggingface
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Instalación de requirements
|
||||
# --------------------------------------------------------
|
||||
COPY requirements.txt .
|
||||
RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
|
||||
# Instalar PyTorch según GPU/CPU
|
||||
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
|
||||
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
|
||||
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
|
||||
else \
|
||||
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \
|
||||
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
|
||||
fi
|
||||
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install --no-cache-dir \
|
||||
ctranslate2 \
|
||||
sentencepiece \
|
||||
transformers==4.44.0 \
|
||||
protobuf==3.20.3 \
|
||||
"numpy<2" \
|
||||
psycopg2-binary \
|
||||
redis \
|
||||
requests \
|
||||
beautifulsoup4 \
|
||||
lxml \
|
||||
langdetect \
|
||||
nltk \
|
||||
scikit-learn \
|
||||
pandas \
|
||||
sentence-transformers \
|
||||
spacy
|
||||
|
||||
# Instalar ctranslate2 con soporte CUDA
|
||||
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
|
||||
pip install --no-cache-dir ctranslate2 ; \
|
||||
else \
|
||||
pip install --no-cache-dir ctranslate2 ; \
|
||||
fi
|
||||
RUN python -m spacy download es_core_news_lg
|
||||
|
||||
# Descargar modelo spaCy ES
|
||||
RUN python -m spacy download es_core_news_md || true
|
||||
COPY workers/ ./workers/
|
||||
COPY init-db/ ./init-db/
|
||||
COPY migrations/ ./migrations/
|
||||
COPY entity_config.json .
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Copiar TODO el proyecto rss2/
|
||||
# --------------------------------------------------------
|
||||
COPY . .
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Puede descargar modelos NLLB o Sentence-BERT si existe
|
||||
# --------------------------------------------------------
|
||||
RUN python download_models.py || true
|
||||
|
||||
EXPOSE 8000
|
||||
ENV DB_HOST=db
|
||||
ENV DB_PORT=5432
|
||||
ENV DB_NAME=rss
|
||||
ENV DB_USER=rss
|
||||
ENV DB_PASS=x
|
||||
|
||||
CMD ["python", "-m", "workers.embeddings_worker"]
|
||||
|
|
|
|||
31
Dockerfile.discovery
Normal file
31
Dockerfile.discovery
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
FROM golang:1.22-alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/discovery ./cmd/discovery
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/discovery /bin/discovery
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
DISCOVERY_INTERVAL=900 \
|
||||
DISCOVERY_BATCH=10 \
|
||||
MAX_FEEDS_PER_URL=5
|
||||
|
||||
ENTRYPOINT ["/bin/discovery"]
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
|
||||
|
||||
# Evitar prompts interactivos
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Instalar dependencias del sistema
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3.10 \
|
||||
python3-pip \
|
||||
git \
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Crear directorio de trabajo
|
||||
WORKDIR /app
|
||||
|
||||
# Actualizar pip
|
||||
RUN pip3 install --upgrade pip setuptools wheel
|
||||
|
||||
# Instalar dependencias de PyTorch (CUDA 12.1)
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# Instalar ExLlamaV2
|
||||
RUN pip3 install exllamav2
|
||||
|
||||
# Instalar otras dependencias
|
||||
RUN pip3 install \
|
||||
psycopg2-binary \
|
||||
huggingface-hub \
|
||||
sentencepiece \
|
||||
ninja
|
||||
|
||||
# Instalar python-is-python3 para compatibilidad
|
||||
RUN apt-get update && apt-get install -y python-is-python3 && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copiar código del worker
|
||||
COPY workers/llm_categorizer_worker.py /app/workers/llm_categorizer_worker.py
|
||||
COPY workers/__init__.py /app/workers/__init__.py
|
||||
|
||||
# Crear directorios para modelos y cache
|
||||
RUN mkdir -p /app/models/llm /app/hf_cache
|
||||
|
||||
# Variables de entorno
|
||||
ENV HF_HOME=/app/hf_cache
|
||||
ENV TRANSFORMERS_CACHE=/app/hf_cache
|
||||
|
||||
# Healthcheck opcional
|
||||
HEALTHCHECK --interval=60s --timeout=10s --start-period=120s \
|
||||
CMD python3 -c "import sys; sys.exit(0)" || exit 1
|
||||
|
||||
# Comando por defecto
|
||||
CMD ["python3", "-m", "workers.llm_categorizer_worker"]
|
||||
34
Dockerfile.qdrant
Normal file
34
Dockerfile.qdrant
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
FROM golang:1.22-alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/qdrant-worker ./cmd/qdrant
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/qdrant-worker /bin/qdrant-worker
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
QDRANT_HOST=qdrant \
|
||||
QDRANT_PORT=6333 \
|
||||
QDRANT_COLLECTION=news_vectors \
|
||||
OLLAMA_URL=http://ollama:11434 \
|
||||
QDRANT_SLEEP=30 \
|
||||
QDRANT_BATCH=100
|
||||
|
||||
ENTRYPOINT ["/bin/qdrant-worker"]
|
||||
32
Dockerfile.related
Normal file
32
Dockerfile.related
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
FROM golang:1.22-alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/related ./cmd/related
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/related /bin/related
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
RELATED_SLEEP=10 \
|
||||
RELATED_BATCH=200 \
|
||||
RELATED_TOPK=10 \
|
||||
EMB_MODEL=mxbai-embed-large
|
||||
|
||||
ENTRYPOINT ["/bin/related"]
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
FROM postgres:18-alpine
|
||||
|
||||
# Copy initialization script
|
||||
COPY init-replica/init-replica.sh /docker-entrypoint-initdb.d/
|
||||
|
||||
# Make script executable
|
||||
RUN chmod +x /docker-entrypoint-initdb.d/init-replica.sh
|
||||
|
||||
# Set environment for replication
|
||||
ENV PRIMARY_HOST=db
|
||||
ENV REPLICATION_USER=replicator
|
||||
ENV REPLICATION_PASSWORD=replica_password
|
||||
23
Dockerfile.scheduler
Normal file
23
Dockerfile.scheduler
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
RUN pip install --no-cache-dir psycopg2-binary langdetect
|
||||
|
||||
COPY workers/translation_scheduler.py ./workers/
|
||||
|
||||
ENV DB_HOST=db
|
||||
ENV DB_PORT=5432
|
||||
ENV DB_NAME=rss
|
||||
ENV DB_USER=rss
|
||||
ENV DB_PASS=x
|
||||
|
||||
CMD ["python", "workers/translation_scheduler.py"]
|
||||
32
Dockerfile.scraper
Normal file
32
Dockerfile.scraper
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
FROM golang:1.22-alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN go mod tidy
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/scraper ./cmd/scraper
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/scraper /bin/scraper
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
SCRAPER_SLEEP=60 \
|
||||
SCRAPER_BATCH=10
|
||||
|
||||
ENTRYPOINT ["/bin/scraper"]
|
||||
30
Dockerfile.topics
Normal file
30
Dockerfile.topics
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
FROM golang:1.22-alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/topics ./cmd/topics
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/topics /bin/topics
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
TOPICS_SLEEP=10 \
|
||||
TOPICS_BATCH=500
|
||||
|
||||
ENTRYPOINT ["/bin/topics"]
|
||||
43
Dockerfile.translator
Normal file
43
Dockerfile.translator
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
FROM python:3.11-slim-bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
patchelf libpq-dev gcc git curl wget \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
TOKENIZERS_PARALLELISM=false \
|
||||
HF_HOME=/root/.cache/huggingface
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
|
||||
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
RUN pip install --no-cache-dir \
|
||||
ctranslate2==3.24.0 \
|
||||
sentencepiece \
|
||||
transformers==4.36.0 \
|
||||
protobuf==3.20.3 \
|
||||
"numpy<2" \
|
||||
psycopg2-binary \
|
||||
langdetect
|
||||
|
||||
# === ARREGLAR EL EXECUTABLE STACK ===
|
||||
RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \
|
||||
-name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \
|
||||
xargs -I {} patchelf --clear-execstack {} || true
|
||||
|
||||
COPY workers/ ./workers/
|
||||
COPY init-db/ ./init-db/
|
||||
COPY migrations/ ./migrations/
|
||||
|
||||
ENV DB_HOST=db
|
||||
ENV DB_PORT=5432
|
||||
ENV DB_NAME=rss
|
||||
ENV DB_USER=rss
|
||||
ENV DB_PASS=x
|
||||
|
||||
CMD ["python", "-m", "workers.ctranslator_worker"]
|
||||
48
Dockerfile.translator-gpu
Normal file
48
Dockerfile.translator-gpu
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
FROM python:3.11-slim-bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
patchelf libpq-dev gcc git curl wget \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
TOKENIZERS_PARALLELISM=false \
|
||||
HF_HOME=/root/.cache/huggingface
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
|
||||
# Install PyTorch with CUDA support (cu118 for broader compatibility)
|
||||
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118
|
||||
|
||||
RUN pip install --no-cache-dir \
|
||||
ctranslate2==3.24.0 \
|
||||
sentencepiece \
|
||||
transformers==4.36.0 \
|
||||
protobuf==3.20.3 \
|
||||
"numpy<2" \
|
||||
psycopg2-binary \
|
||||
langdetect
|
||||
|
||||
# Fix executable stack
|
||||
RUN find /usr/local/lib/python3.11/site-packages/ctranslate2* \
|
||||
-name "libctranslate2-*.so.*" -o -name "libctranslate2.so*" | \
|
||||
xargs -I {} patchelf --clear-execstack {} || true
|
||||
|
||||
COPY workers/ ./workers/
|
||||
COPY init-db/ ./init-db/
|
||||
COPY migrations/ ./migrations/
|
||||
|
||||
ENV DB_HOST=db
|
||||
ENV DB_PORT=5432
|
||||
ENV DB_NAME=rss
|
||||
ENV DB_USER=rss
|
||||
ENV DB_PASS=x
|
||||
|
||||
# GPU Configuration - Override with: docker run --gpus all
|
||||
ENV CT2_DEVICE=cuda
|
||||
ENV CT2_COMPUTE_TYPE=float16
|
||||
|
||||
CMD ["python", "-m", "workers.ctranslator_worker"]
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
FROM python:3.10-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for lxml and general build
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
python3-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install python dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
psycopg2-binary \
|
||||
requests \
|
||||
newspaper3k \
|
||||
lxml_html_clean \
|
||||
python-dotenv
|
||||
|
||||
# Copy application code
|
||||
COPY . /app
|
||||
|
||||
# Set environment
|
||||
ENV PYTHONPATH=/app
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Run the worker daemon
|
||||
CMD ["python", "-m", "workers.url_worker_daemon"]
|
||||
31
Dockerfile.wiki
Normal file
31
Dockerfile.wiki
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
FROM golang:alpine AS builder
|
||||
|
||||
ENV GOTOOLCHAIN=auto
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY backend/go.mod backend/go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY backend/ ./
|
||||
|
||||
RUN go mod tidy
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/wiki_worker ./cmd/wiki_worker
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
COPY --from=builder /bin/wiki_worker /bin/wiki_worker
|
||||
|
||||
ENV DB_HOST=db \
|
||||
DB_PORT=5432 \
|
||||
DB_NAME=rss \
|
||||
DB_USER=rss \
|
||||
DB_PASS=rss \
|
||||
WIKI_SLEEP=10
|
||||
|
||||
ENTRYPOINT ["/bin/wiki_worker"]
|
||||
89
Makefile
Normal file
89
Makefile
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# RSS2 Workers Makefile
|
||||
|
||||
.PHONY: all build clean deps ingestor scraper discovery topics related qdrant server
|
||||
|
||||
# Binary output directory
|
||||
BIN_DIR := bin
|
||||
|
||||
# Main binaries
|
||||
SERVER := $(BIN_DIR)/server
|
||||
INGESTOR := $(BIN_DIR)/rss-ingestor
|
||||
SCRAPER := $(BIN_DIR)/scraper
|
||||
DISCOVERY := $(BIN_DIR)/discovery
|
||||
TOPICS := $(BIN_DIR)/topics
|
||||
RELATED := $(BIN_DIR)/related
|
||||
QDRANT := $(BIN_DIR)/qdrant-worker
|
||||
|
||||
all: deps build
|
||||
|
||||
deps:
|
||||
cd backend && go mod download
|
||||
cd backend && go mod tidy
|
||||
|
||||
# Build all workers
|
||||
build: ingestor scraper discovery topics related qdrant server
|
||||
|
||||
# Ingestor
|
||||
ingestor:
|
||||
cd rss-ingestor-go && go build -o ../$(INGESTOR) .
|
||||
|
||||
# Server
|
||||
server:
|
||||
cd backend && go build -o $(SERVER) ./cmd/server
|
||||
|
||||
# Workers
|
||||
scraper:
|
||||
cd backend && go build -o $(SCRAPER) ./cmd/scraper
|
||||
|
||||
discovery:
|
||||
cd backend && go build -o $(DISCOVERY) ./cmd/discovery
|
||||
|
||||
topics:
|
||||
cd backend && go build -o $(TOPICS) ./cmd/topics
|
||||
|
||||
related:
|
||||
cd backend && go build -o $(RELATED) ./cmd/related
|
||||
|
||||
qdrant:
|
||||
cd backend && go build -o $(QDRANT) ./cmd/qdrant
|
||||
|
||||
# Clean
|
||||
clean:
|
||||
rm -rf $(BIN_DIR)
|
||||
cd backend && go clean
|
||||
|
||||
# Run workers locally (requires DB and services)
|
||||
run-scraper:
|
||||
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(SCRAPER)
|
||||
|
||||
run-discovery:
|
||||
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(DISCOVERY)
|
||||
|
||||
run-topics:
|
||||
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss $(TOPICS)
|
||||
|
||||
run-related:
|
||||
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss RELATED_SLEEP=10 $(RELATED)
|
||||
|
||||
run-qdrant:
|
||||
DB_HOST=localhost DB_PORT=5432 DB_NAME=rss DB_USER=rss DB_PASS=rss \
|
||||
QDRANT_HOST=localhost QDRANT_PORT=6333 OLLAMA_URL=http://localhost:11434 $(QDRANT)
|
||||
|
||||
# Docker builds
|
||||
docker-build:
|
||||
docker build -t rss2-ingestor -f rss-ingestor-go/Dockerfile ./rss-ingestor-go
|
||||
docker build -t rss2-server -f backend/Dockerfile ./backend
|
||||
docker build -t rss2-scraper -f Dockerfile.scraper ./backend
|
||||
docker build -t rss2-discovery -f Dockerfile.discovery ./backend
|
||||
docker build -t rss2-topics -f Dockerfile.topics ./backend
|
||||
docker build -t rss2-related -f Dockerfile.related ./backend
|
||||
docker build -t rss2-qdrant -f Dockerfile.qdrant ./backend
|
||||
docker build -t rss2-langdetect -f Dockerfile .
|
||||
docker build -t rss2-scheduler -f Dockerfile.scheduler .
|
||||
docker build -t rss2-translator -f Dockerfile.translator .
|
||||
docker build -t rss2-translator-gpu -f Dockerfile.translator-gpu .
|
||||
docker build -t rss2-embeddings -f Dockerfile.embeddings_worker .
|
||||
docker build -t rss2-ner -f Dockerfile .
|
||||
docker build -t rss2-llm-categorizer -f Dockerfile.llm_worker .
|
||||
docker build -t rss2-frontend -f frontend/Dockerfile ./frontend
|
||||
docker build -t rss2-nginx -f Dockerfile.nginx .
|
||||
175
README.md
175
README.md
|
|
@ -1,135 +1,124 @@
|
|||
# RSS2 - Plataforma de Inteligencia de Noticias con IA 🚀
|
||||
# RSS2 - AI-Powered News Intelligence Platform
|
||||
|
||||
RSS2 es una plataforma avanzada de agregación, traducción, análisis y vectorización de noticias diseñada para transformar flujos masivos de información en inteligencia accionable. Utiliza una arquitectura de **microservicios híbrida (Go + Python)** con modelos de **Inteligencia Artificial** de vanguardia para ofrecer búsqueda semántica, clasificación inteligente y automatización de contenidos.
|
||||
RSS2 es una plataforma avanzada de agregación, traducción, análisis y vectorización de noticias, diseñada para transformar flujos masivos de información en inteligencia accionable. Utiliza una arquitectura híbrida de microservicios (Go + Python) integrada con modelos de inteligencia artificial de última generación para ofrecer búsqueda semántica, clasificación inteligente y automatización de contenidos.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Características Principales
|
||||
## 🚀 Capacidades Principales
|
||||
|
||||
* 🤖 **Categorización Inteligente (LLM)**: Clasificación de noticias mediante **Mistral-7B** local (ExLlamaV2/GPTQ), procesando lotes de alta velocidad.
|
||||
* 🔍 **Búsqueda Semántica**: Motor vectorial **Qdrant** para encontrar noticias por contexto y significado, no solo por palabras clave.
|
||||
* 🌍 **Traducción Neuronal de Alta Calidad**: Integración con **NLLB-200** para traducir noticias de múltiples idiomas al español con validación post-proceso para evitar repeticiones.
|
||||
* 📊 **Inteligencia de Entidades**: Extracción automática y normalización de Personas, Organizaciones y Lugares para análisis de tendencias.
|
||||
* 📺 **Automatización de Video**: Generación automática de noticias en formato video y gestión de "parrillas" de programación.
|
||||
* 📄 **Exportación Inteligente**: Generación de informes en **PDF** con diseño profesional y limpieza de ruido de red.
|
||||
* 🔔 **Notificaciones en Tiempo Real**: API de monitoreo para detectar eventos importantes al instante.
|
||||
* ⭐ **Gestión de Favoritos**: Sistema robusto para guardar y organizar noticias, compatible con usuarios y sesiones temporales.
|
||||
* **Enriquecimiento con Wikipedia**: Sistema automatizado que detecta personas y organizaciones, descarga sus biografías e imágenes oficiales de Wikipedia para mostrarlas en tooltips interactivos con avatares circulares.
|
||||
* **Categorización Inteligente (LLM)**: Clasificación de noticias mediante una instancia local de Mistral-7B / Llama-3 (vía Ollama), procesando contenido en tiempo real.
|
||||
* **Búsqueda Semántica**: Motor vectorial Qdrant para descubrir noticias por contexto y significado, yendo más allá de las palabras clave tradicionales.
|
||||
* **Traducción Neuronal de Alta Calidad**: Integración de NLLB-200 (vía CTranslate2) para traducir noticias de múltiples idiomas al español con precisión profesional.
|
||||
* **Inteligencia de Entidades (NER)**: Extracción y normalización automática de Personas, Organizaciones y Lugares para análisis de tendencias y mapeo de relaciones.
|
||||
* **Búsqueda de Noticias Relacionadas**: Algoritmos de similitud que agrupan noticias sobre el mismo tema automáticamente.
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Arquitectura de Servicios (Docker)
|
||||
|
||||
El sistema está orquestado mediante Docker Compose, garantizando aislamiento y escalabilidad.
|
||||
El sistema se orquestra mediante Docker Compose y se divide en capas especializadas:
|
||||
|
||||
### 🌐 Core & Acceso (Frontend)
|
||||
### Capa de Acceso y API
|
||||
| Servicio | Tecnología | Descripción |
|
||||
|----------|------------|-------------|
|
||||
|---------|------------|-------------|
|
||||
| **`nginx`** | Nginx Alpine | Gateway y Proxy Inverso (Puerto **8001**). |
|
||||
| **`rss2_web`** | Flask + Gunicorn | API principal e Interfaz Web de usuario. |
|
||||
| **`rss2_frontend`** | React + Vite | Interfaz web de usuario moderna y responsiva. |
|
||||
| **`backend-go`** | Go + Gin | API REST principal y gestión de lógica de negocio. |
|
||||
|
||||
### 📥 Ingesta y Descubrimiento (Backend)
|
||||
### Ingesta y Descubrimiento (Go)
|
||||
| Servicio | Tecnología | Descripción |
|
||||
|----------|------------|-------------|
|
||||
| **`rss-ingestor-go`** | **Go** | Crawler de ultra-alto rendimiento (Cientos de feeds/min). |
|
||||
| **`url-worker`** | Python | Scraper profundo con limpieza de HTML via `newspaper3k`. |
|
||||
| **`url-discovery`** | Python | Agente autónomo para el descubrimiento de nuevos feeds. |
|
||||
|---------|------------|-------------|
|
||||
| **`rss-ingestor-go`** | Go | Crawler de alto rendimiento para feeds RSS. |
|
||||
| **`scraper`** | Go | Scraper profundo con sanitización de HTML y extracción de texto. |
|
||||
| **`discovery`** | Go | Agente autónomo para descubrir nuevos feeds a partir de URLs. |
|
||||
|
||||
### 🧠 Procesamiento de IA (Background Workers)
|
||||
| Servicio | Modelo / Función | Descripción |
|
||||
|----------|-------------------|-------------|
|
||||
| **`llm-categorizer`** | **Mistral-7B** | Categorización contextual avanzada (15 categorías). |
|
||||
| **`translator`** (x3) | **NLLB-200** | Traducción neural masiva escalada horizontalmente. |
|
||||
| **`embeddings`** | **S-Transformers** | Conversión de texto a vectores para búsqueda semántica. |
|
||||
| **`ner`** | **Spacy/BERT** | Extracción de entidades (Personas, Lugares, Orgs). |
|
||||
| **`cluster` & `related`**| Algoritmos Propios | Agrupación de eventos y detección de noticias relacionadas. |
|
||||
### Procesamiento de Datos e IA (Go & Python)
|
||||
| Servicio | Tecnología | Descripción |
|
||||
|---------|------------|-------------|
|
||||
| **`translator`** | NLLB-200 (CPU) | Traducción neuronal optimizada con CTranslate2. |
|
||||
| **`translator-gpu`**| NLLB-200 (GPU) | Traducción acelerada por hardware (CUDA). |
|
||||
| **`wiki-worker`** | Go | **[NUEVO]** Integración con Wikipedia y gestión de imágenes locales. |
|
||||
| **`embeddings`** | S-Transformers | Generación de vectores para búsqueda semántica. |
|
||||
| **`ner`** | Spacy / BERT | Reconocimiento de entidades nombradas (NER). |
|
||||
| **`llm-categorizer`**| Ollama / Mistral | Clasificación avanzada mediante modelos de lenguaje. |
|
||||
| **`topics`** | Go | Matcher automático de países y temas predefinidos. |
|
||||
| **`related`** | Go | Motor de detección de noticias relacionadas. |
|
||||
|
||||
### 💾 Almacenamiento y Datos
|
||||
| Servicio | Rol | Descripción |
|
||||
|----------|-----|-------------|
|
||||
| **`db`** | **PostgreSQL 18** | Almacenamiento relacional principal y metadatos. |
|
||||
| **`qdrant`** | **Vector DB** | Motor de búsqueda por similitud de alta velocidad. |
|
||||
| **`redis`** | **Redis 7** | Gestión de colas de tareas (Celery-style) y caché. |
|
||||
### Capa de Almacenamiento
|
||||
| Servicio | Tecnología | Descripción |
|
||||
|---------|------------|-------------|
|
||||
| **`db`** | PostgreSQL 18 | Base de datos relacional principal. |
|
||||
| **`qdrant`** | Qdrant | Base de datos vectorial para búsqueda por similitud. |
|
||||
| **`redis`** | Redis 7 | Colas de mensajes y caché de alto desempeño. |
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Guía de Inicio Rápido
|
||||
## ⚙️ Guía de Configuración
|
||||
|
||||
### 1. Preparación
|
||||
### 1. Requisitos de Hardware
|
||||
* **Modo Básico (CPU)**: 4+ Cores CPU, 8GB RAM.
|
||||
* **Modo Avanzado (IA)**: NVIDIA GPU con 8GB+ VRAM (mínimo recomendado para LLM y Traducción GPU).
|
||||
|
||||
### 2. Instalación Rápida
|
||||
```bash
|
||||
git clone <repo>
|
||||
git clone <repo_url>
|
||||
cd rss2
|
||||
./generate_secure_credentials.sh # Genera .env seguro y contraseñas robustas
|
||||
cp .env.example .env
|
||||
# Edita .env con tus credenciales
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### 2. Configuración de Modelos (IA)
|
||||
Para activar la categorización inteligente y traducción, descarga los modelos:
|
||||
```bash
|
||||
./scripts/download_llm_model.sh # Recomendado: Mistral-7B GPTQ
|
||||
python3 scripts/download_models.py # Modelos NLLB y Embeddings
|
||||
```
|
||||
### 3. Escalado de Workers (¡Importante!)
|
||||
Para aumentar la velocidad de procesamiento (especialmente la traducción), puedes escalar los workers:
|
||||
|
||||
### 3. Arranque del Sistema
|
||||
```bash
|
||||
./start_docker.sh # Script de inicio con verificación de dependencias
|
||||
# Ejecutar 4 traductores en paralelo
|
||||
docker compose up -d --scale translator=4
|
||||
|
||||
# Si usas GPU y tienes capacidad
|
||||
docker compose up -d --scale translator-gpu=2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 Documentación Especializada
|
||||
## 🛡️ Administración y Mantenimiento
|
||||
|
||||
Consulte nuestras guías detalladas para configuraciones específicas:
|
||||
### Copias de Seguridad (Backups)
|
||||
Desde el panel de Administración (`/admin/settings`), puedes realizar:
|
||||
* **Backup Completo**: Volcado SQL de toda la base de datos.
|
||||
* **Backup de Noticias (ZIP)**: **[NUEVO]** Genera un archivo comprimido que incluye las tablas de noticias, traducciones y todas sus etiquetas. Ideal para migraciones de contenido.
|
||||
|
||||
* 📘 **[QUICKSTART_LLM.md](QUICKSTART_LLM.md)**: Guía rápida para el categorizador Mistral-7B.
|
||||
* 🚀 **[DEPLOY.md](DEPLOY.md)**: Guía detallada de despliegue en nuevos servidores.
|
||||
* 📊 **[TRANSLATION_FIX_SUMMARY.md](TRANSLATION_FIX_SUMMARY.md)**: Resumen de mejoras en calidad de traducción.
|
||||
* 🛡️ **[SECURITY_GUIDE.md](SECURITY_GUIDE.md)**: Manual avanzado de seguridad y endurecimiento.
|
||||
* 🏗️ **[QDRANT_SETUP.md](QDRANT_SETUP.md)**: Configuración y migración de la base de datos vectorial.
|
||||
* 📑 **[FUNCIONES_DE_ARCHIVOS.md](FUNCIONES_DE_ARCHIVOS.md)**: Inventario detallado de la lógica del proyecto.
|
||||
### Variables de Entorno Clave (`.env`)
|
||||
| Variable | Descripción |
|
||||
|----------|-------------|
|
||||
| `WIKI_SLEEP` | Tiempo de espera entre peticiones a Wikipedia (evita bloqueos). |
|
||||
| `SCHEDULER_BATCH`| Cantidad de noticias a enviar a traducir por ciclo. |
|
||||
| `TARGET_LANGS` | Idiomas destino (ej: `es`). |
|
||||
| `OLLAMA_HOST` | Dirección del servidor Ollama para categorización. |
|
||||
|
||||
---
|
||||
|
||||
## 💻 Requisitos de Hardware
|
||||
## 📖 Documentación de la API (Campos Wikipedia)
|
||||
|
||||
Para un rendimiento óptimo, se recomienda:
|
||||
* **GPU**: NVIDIA (mínimo 12GB VRAM para Mistral-7B y traducción simultánea).
|
||||
* **Drivers**: NVIDIA Container Toolkit instalado.
|
||||
* **AllTalk TTS**: Instancia activa (puerto 7851) para la generación de audio en videos.
|
||||
Las respuestas de noticias ahora incluyen el objeto `entities` enriquecido:
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Operaciones y Mantenimiento
|
||||
|
||||
### Verificación de Calidad de Traducción
|
||||
El sistema incluye herramientas para asegurar la calidad de los datos:
|
||||
```bash
|
||||
# Monitorear calidad en tiempo real
|
||||
docker exec rss2_web python3 scripts/monitor_translation_quality.py --watch
|
||||
|
||||
# Limpiar automáticamente traducciones defectuosas
|
||||
docker exec rss2_web python3 scripts/clean_repetitive_translations.py
|
||||
```
|
||||
|
||||
### Gestión de Contenidos
|
||||
```bash
|
||||
# Generar videos de noticias destacadas
|
||||
docker exec rss2_web python3 scripts/generar_videos_noticias.py
|
||||
|
||||
# Iniciar migración a Qdrant (Vectores)
|
||||
docker exec rss2_web python3 scripts/migrate_to_qdrant.py
|
||||
```
|
||||
|
||||
### Diagnóstico de Ingesta (Feeds)
|
||||
```bash
|
||||
docker exec rss2_web python3 scripts/diagnose_rss.py --url <FEED_URL>
|
||||
```json
|
||||
{
|
||||
"id": 67449,
|
||||
"titulo": "...",
|
||||
"entities": [
|
||||
{
|
||||
"valor": "Apple",
|
||||
"tipo": "organizacion",
|
||||
"wiki_summary": "Apple Inc. es una empresa estadounidense...",
|
||||
"wiki_url": "https://es.wikipedia.org/wiki/Apple",
|
||||
"image_path": "/api/wiki-images/wiki_5723.png"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Observabilidad
|
||||
Acceso a métricas de rendimiento (Solo vía Localhost/Tunel):
|
||||
* **Grafana**: [http://localhost:3001](http://localhost:3001) (Admin/Pass en `.env`)
|
||||
* **Proxy Nginx**: [http://localhost:8001](http://localhost:8001)
|
||||
|
||||
---
|
||||
|
||||
**RSS2** - *Transformando noticias en inteligencia con IA Local.*
|
||||
**RSS2** - *Transformando noticias en inteligencia con IA localizada.*
|
||||
|
|
|
|||
61
app.py
61
app.py
|
|
@ -1,61 +0,0 @@
|
|||
from flask import Flask
|
||||
|
||||
from config import SECRET_KEY
|
||||
from utils import safe_html, format_date, country_flag
|
||||
|
||||
from routers.home import home_bp
|
||||
from routers.feeds import feeds_bp
|
||||
from routers.urls import urls_bp
|
||||
from routers.noticia import noticia_bp
|
||||
from routers.backup import backup_bp
|
||||
from routers.config import config_bp
|
||||
from routers.favoritos import favoritos_bp
|
||||
from routers.search import search_bp
|
||||
from routers.rss import rss_bp
|
||||
from routers.stats import stats_bp
|
||||
from routers.pdf import pdf_bp
|
||||
from routers.notifications import notifications_bp
|
||||
from routers.auth import auth_bp
|
||||
from routers.account import account_bp
|
||||
from routers.parrillas import parrillas_bp
|
||||
|
||||
|
||||
def create_app() -> Flask:
|
||||
app = Flask(__name__)
|
||||
app.config["SECRET_KEY"] = SECRET_KEY
|
||||
|
||||
app.jinja_env.filters["safe_html"] = safe_html
|
||||
app.jinja_env.filters["format_date"] = format_date
|
||||
app.jinja_env.filters["country_flag"] = country_flag
|
||||
|
||||
app.register_blueprint(home_bp)
|
||||
app.register_blueprint(feeds_bp)
|
||||
app.register_blueprint(urls_bp)
|
||||
app.register_blueprint(noticia_bp)
|
||||
app.register_blueprint(backup_bp)
|
||||
app.register_blueprint(config_bp)
|
||||
app.register_blueprint(favoritos_bp)
|
||||
app.register_blueprint(search_bp)
|
||||
app.register_blueprint(rss_bp)
|
||||
app.register_blueprint(stats_bp)
|
||||
app.register_blueprint(pdf_bp)
|
||||
app.register_blueprint(notifications_bp)
|
||||
|
||||
from routers.conflicts import conflicts_bp
|
||||
from routers.topics import topics_bp
|
||||
|
||||
app.register_blueprint(conflicts_bp)
|
||||
app.register_blueprint(topics_bp)
|
||||
app.register_blueprint(auth_bp)
|
||||
app.register_blueprint(account_bp)
|
||||
app.register_blueprint(parrillas_bp)
|
||||
|
||||
|
||||
return app
|
||||
|
||||
|
||||
app = create_app()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8001, debug=True)
|
||||
|
||||
24
backend/Dockerfile
Normal file
24
backend/Dockerfile
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
FROM golang:1.23 AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y gcc musl-dev git
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o /server ./cmd/server
|
||||
|
||||
FROM alpine:3.19
|
||||
|
||||
RUN apk add --no-cache ca-certificates tzdata postgresql-client
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /server .
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD ["./server"]
|
||||
468
backend/cmd/discovery/main.go
Normal file
468
backend/cmd/discovery/main.go
Normal file
|
|
@ -0,0 +1,468 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/mmcdole/gofeed"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
pool *workers.Config
|
||||
dbPool *pgxpool.Pool
|
||||
sleepSec = 900 // 15 minutes
|
||||
batchSize = 10
|
||||
)
|
||||
|
||||
type URLSource struct {
|
||||
ID int64
|
||||
Nombre string
|
||||
URL string
|
||||
CategoriaID *int64
|
||||
PaisID *int64
|
||||
Idioma *string
|
||||
}
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[DISCOVERY] ", log.LstdFlags)
|
||||
}
|
||||
|
||||
func loadConfig() {
|
||||
sleepSec = getEnvInt("DISCOVERY_INTERVAL", 900)
|
||||
batchSize = getEnvInt("DISCOVERY_BATCH", 10)
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getPendingURLs(ctx context.Context) ([]URLSource, error) {
|
||||
rows, err := dbPool.Query(ctx, `
|
||||
SELECT id, nombre, url, categoria_id, pais_id, idioma
|
||||
FROM fuentes_url
|
||||
WHERE active = TRUE
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN last_check IS NULL THEN 1
|
||||
WHEN last_status = 'error' THEN 2
|
||||
WHEN last_status = 'no_feeds' THEN 3
|
||||
ELSE 4
|
||||
END,
|
||||
last_check ASC NULLS FIRST
|
||||
LIMIT $1
|
||||
`, batchSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var sources []URLSource
|
||||
for rows.Next() {
|
||||
var s URLSource
|
||||
if err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma); err != nil {
|
||||
continue
|
||||
}
|
||||
sources = append(sources, s)
|
||||
}
|
||||
return sources, nil
|
||||
}
|
||||
|
||||
func updateURLStatus(ctx context.Context, urlID int64, status, message string, httpCode int) error {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
UPDATE fuentes_url
|
||||
SET last_check = NOW(),
|
||||
last_status = $1,
|
||||
status_message = $2,
|
||||
last_http_code = $3
|
||||
WHERE id = $4
|
||||
`, status, message, httpCode, urlID)
|
||||
return err
|
||||
}
|
||||
|
||||
func discoverFeeds(pageURL string) ([]string, error) {
|
||||
client := &http.Client{
|
||||
Timeout: 15 * time.Second,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", pageURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
|
||||
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml, text/html")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Try to parse as feed first
|
||||
parser := gofeed.NewParser()
|
||||
feed, err := parser.Parse(resp.Body)
|
||||
if err == nil && feed != nil && len(feed.Items) > 0 {
|
||||
// It's a valid feed
|
||||
return []string{pageURL}, nil
|
||||
}
|
||||
|
||||
// If not a feed, try to find feeds in HTML
|
||||
return findFeedLinksInHTML(pageURL)
|
||||
}
|
||||
|
||||
func findFeedLinksInHTML(baseURL string) ([]string, error) {
|
||||
// Simple feed link finder - returns empty for now
|
||||
// In production, use goquery to parse HTML and find RSS/Atom links
|
||||
return []string{}, nil
|
||||
}
|
||||
|
||||
func parseFeed(feedURL string) (*gofeed.Feed, error) {
|
||||
client := &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", feedURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
|
||||
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
parser := gofeed.NewParser()
|
||||
return parser.Parse(resp.Body)
|
||||
}
|
||||
|
||||
func getFeedMetadata(feedURL string) (title, description, language string, entryCount int, err error) {
|
||||
feed, err := parseFeed(feedURL)
|
||||
if err != nil {
|
||||
return "", "", "", 0, err
|
||||
}
|
||||
|
||||
title = feed.Title
|
||||
if title == "" {
|
||||
title = "Feed sin título"
|
||||
}
|
||||
|
||||
description = feed.Description
|
||||
if len(description) > 500 {
|
||||
description = description[:500]
|
||||
}
|
||||
|
||||
language = feed.Language
|
||||
entryCount = len(feed.Items)
|
||||
|
||||
return title, description, language, entryCount, nil
|
||||
}
|
||||
|
||||
func analyzeFeed(title, url, description string) (country, category string) {
|
||||
// Simple heuristics - in production use ML or API
|
||||
lowerTitle := strings.ToLower(title)
|
||||
lowerDesc := strings.ToLower(description)
|
||||
combined := lowerTitle + " " + lowerDesc
|
||||
|
||||
// Detect country
|
||||
countries := map[string][]string{
|
||||
"España": {"españa", "español", "madrid", "barcelona"},
|
||||
"Argentina": {"argentino", "buenos aires"},
|
||||
"México": {"méxico", "mexicano", "cdmx", "ciudad de méxico"},
|
||||
"Colombia": {"colombiano", "bogotá"},
|
||||
"Chile": {"chileno", "santiago"},
|
||||
"Perú": {"peruano", "lima"},
|
||||
"EE.UU.": {"estados unidos", "washington", "trump", "biden"},
|
||||
"Reino Unido": {"reino unido", "londres", "uk"},
|
||||
"Francia": {"francia", "parís"},
|
||||
"Alemania": {"alemania", "berlín"},
|
||||
}
|
||||
|
||||
for country, keywords := range countries {
|
||||
for _, kw := range keywords {
|
||||
if strings.Contains(combined, kw) {
|
||||
return country, ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func getCountryIDByName(ctx context.Context, countryName string) (*int64, error) {
|
||||
var id int64
|
||||
err := dbPool.QueryRow(ctx, "SELECT id FROM paises WHERE LOWER(nombre) = LOWER($1)", countryName).Scan(&id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &id, nil
|
||||
}
|
||||
|
||||
func getCategoryIDByName(ctx context.Context, categoryName string) (*int64, error) {
|
||||
var id int64
|
||||
err := dbPool.QueryRow(ctx, "SELECT id FROM categorias WHERE LOWER(nombre) = LOWER($1)", categoryName).Scan(&id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &id, nil
|
||||
}
|
||||
|
||||
func createPendingFeed(ctx context.Context, fuenteURLID int64, feedURL string, metadata map[string]interface{}) error {
|
||||
feedTitle := metadata["title"].(string)
|
||||
if feedTitle == "" {
|
||||
feedTitle = "Feed sin título"
|
||||
}
|
||||
|
||||
description := ""
|
||||
if d, ok := metadata["description"].(string); ok {
|
||||
description = d
|
||||
}
|
||||
|
||||
language := ""
|
||||
if l, ok := metadata["language"].(string); ok {
|
||||
language = l
|
||||
}
|
||||
|
||||
entryCount := 0
|
||||
if c, ok := metadata["entry_count"].(int); ok {
|
||||
entryCount = c
|
||||
}
|
||||
|
||||
detectedCountry := ""
|
||||
if dc, ok := metadata["detected_country"].(string); ok {
|
||||
detectedCountry = dc
|
||||
}
|
||||
|
||||
var detectedCountryID *int64
|
||||
if detectedCountry != "" {
|
||||
if cid, err := getCountryIDByName(ctx, detectedCountry); err == nil {
|
||||
detectedCountryID = cid
|
||||
}
|
||||
}
|
||||
|
||||
suggestedCategory := ""
|
||||
if sc, ok := metadata["suggested_category"].(string); ok {
|
||||
suggestedCategory = sc
|
||||
}
|
||||
|
||||
var suggestedCategoryID *int64
|
||||
if suggestedCategory != "" {
|
||||
if caid, err := getCategoryIDByName(ctx, suggestedCategory); err == nil {
|
||||
suggestedCategoryID = caid
|
||||
}
|
||||
}
|
||||
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
INSERT INTO feeds_pending (
|
||||
fuente_url_id, feed_url, feed_title, feed_description,
|
||||
feed_language, feed_type, entry_count,
|
||||
detected_country_id, suggested_categoria_id,
|
||||
discovered_at
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5, 'rss', $6, $7, $8, NOW())
|
||||
ON CONFLICT (feed_url) DO UPDATE
|
||||
SET feed_title = EXCLUDED.feed_title,
|
||||
discovered_at = NOW()
|
||||
`, fuenteURLID, feedURL, feedTitle, description, language, entryCount, detectedCountryID, suggestedCategoryID)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func createFeedDirectly(ctx context.Context, feedURL string, fuenteURLID *int64, categoriaID, paisID *int64, idioma *string) (bool, error) {
|
||||
title, description, language, _, err := getFeedMetadata(feedURL)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if language == "" && idioma != nil {
|
||||
language = *idioma
|
||||
}
|
||||
|
||||
var feedID int64
|
||||
err = dbPool.QueryRow(ctx, `
|
||||
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, fuente_url_id, activo)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, TRUE)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
RETURNING id
|
||||
`, title, description, feedURL, categoriaID, paisID, language, fuenteURLID).Scan(&feedID)
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return feedID > 0, nil
|
||||
}
|
||||
|
||||
func processURLSource(ctx context.Context, source URLSource) {
|
||||
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
|
||||
|
||||
// Try to find feeds on this URL
|
||||
feeds, err := discoverFeeds(source.URL)
|
||||
if err != nil {
|
||||
logger.Printf("Error discovering feeds: %v", err)
|
||||
updateURLStatus(ctx, source.ID, "error", err.Error()[:200], 0)
|
||||
return
|
||||
}
|
||||
|
||||
if len(feeds) == 0 {
|
||||
logger.Printf("No feeds found for: %s", source.URL)
|
||||
updateURLStatus(ctx, source.ID, "no_feeds", "No feeds found", 200)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Printf("Found %d feeds for %s", len(feeds), source.URL)
|
||||
|
||||
maxFeeds := getEnvInt("MAX_FEEDS_PER_URL", 5)
|
||||
if len(feeds) > maxFeeds {
|
||||
feeds = feeds[:maxFeeds]
|
||||
}
|
||||
|
||||
autoApprove := source.CategoriaID != nil && source.PaisID != nil
|
||||
|
||||
created := 0
|
||||
pending := 0
|
||||
existing := 0
|
||||
errors := 0
|
||||
|
||||
for _, feedURL := range feeds {
|
||||
// Get feed metadata
|
||||
title, description, language, entryCount, err := getFeedMetadata(feedURL)
|
||||
if err != nil {
|
||||
logger.Printf("Error parsing feed %s: %v", feedURL, err)
|
||||
errors++
|
||||
continue
|
||||
}
|
||||
|
||||
// Analyze for country/category
|
||||
detectedCountry, suggestedCategory := analyzeFeed(title, feedURL, description)
|
||||
|
||||
metadata := map[string]interface{}{
|
||||
"title": title,
|
||||
"description": description,
|
||||
"language": language,
|
||||
"entry_count": entryCount,
|
||||
"detected_country": detectedCountry,
|
||||
"suggested_category": suggestedCategory,
|
||||
}
|
||||
|
||||
if !autoApprove {
|
||||
// Create pending feed for review
|
||||
if err := createPendingFeed(ctx, source.ID, feedURL, metadata); err != nil {
|
||||
logger.Printf("Error creating pending feed: %v", err)
|
||||
errors++
|
||||
} else {
|
||||
pending++
|
||||
}
|
||||
} else {
|
||||
// Create feed directly
|
||||
createdFeed, err := createFeedDirectly(ctx, feedURL, &source.ID, source.CategoriaID, source.PaisID, source.Idioma)
|
||||
if err != nil {
|
||||
logger.Printf("Error creating feed: %v", err)
|
||||
errors++
|
||||
} else if createdFeed {
|
||||
created++
|
||||
} else {
|
||||
existing++
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second) // Rate limiting
|
||||
}
|
||||
|
||||
// Update status
|
||||
var status string
|
||||
var message string
|
||||
if created > 0 || pending > 0 {
|
||||
status = "success"
|
||||
parts := []string{}
|
||||
if created > 0 {
|
||||
parts = append(parts, fmt.Sprintf("%d created", created))
|
||||
}
|
||||
if pending > 0 {
|
||||
parts = append(parts, fmt.Sprintf("%d pending", pending))
|
||||
}
|
||||
message = strings.Join(parts, ", ")
|
||||
} else if existing > 0 {
|
||||
status = "existing"
|
||||
message = fmt.Sprintf("%d already existed", existing)
|
||||
} else {
|
||||
status = "error"
|
||||
message = fmt.Sprintf("%d errors", errors)
|
||||
}
|
||||
|
||||
updateURLStatus(ctx, source.ID, status, message, 200)
|
||||
logger.Printf("Processed %s: created=%d, pending=%d, existing=%d, errors=%d",
|
||||
source.URL, created, pending, existing, errors)
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadConfig()
|
||||
logger.Println("Starting RSS Discovery Worker")
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
dbPool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
logger.Println("Connected to PostgreSQL")
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Shutting down...")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Config: interval=%ds, batch=%d", sleepSec, batchSize)
|
||||
|
||||
ticker := time.NewTicker(time.Duration(sleepSec) * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
sources, err := getPendingURLs(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error fetching URLs: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(sources) == 0 {
|
||||
logger.Println("No pending URLs to process")
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Printf("Processing %d sources", len(sources))
|
||||
|
||||
for _, source := range sources {
|
||||
processURLSource(ctx, source)
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
391
backend/cmd/qdrant/main.go
Normal file
391
backend/cmd/qdrant/main.go
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
dbPool *pgxpool.Pool
|
||||
qdrantURL string
|
||||
ollamaURL string
|
||||
collection = "news_vectors"
|
||||
sleepSec = 30
|
||||
batchSize = 100
|
||||
)
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[QDRANT] ", log.LstdFlags)
|
||||
}
|
||||
|
||||
func loadConfig() {
|
||||
sleepSec = getEnvInt("QDRANT_SLEEP", 30)
|
||||
batchSize = getEnvInt("QDRANT_BATCH", 100)
|
||||
qdrantHost := getEnv("QDRANT_HOST", "localhost")
|
||||
qdrantPort := getEnvInt("QDRANT_PORT", 6333)
|
||||
qdrantURL = fmt.Sprintf("http://%s:%d", qdrantHost, qdrantPort)
|
||||
ollamaURL = getEnv("OLLAMA_URL", "http://ollama:11434")
|
||||
collection = getEnv("QDRANT_COLLECTION", "news_vectors")
|
||||
}
|
||||
|
||||
func getEnv(key, defaultValue string) string {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
return value
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
type Translation struct {
|
||||
ID int64
|
||||
NoticiaID int64
|
||||
Lang string
|
||||
Titulo string
|
||||
Resumen string
|
||||
URL string
|
||||
Fecha *time.Time
|
||||
FuenteNombre string
|
||||
CategoriaID *int64
|
||||
PaisID *int64
|
||||
}
|
||||
|
||||
func getPendingTranslations(ctx context.Context) ([]Translation, error) {
|
||||
rows, err := dbPool.Query(ctx, `
|
||||
SELECT
|
||||
t.id as traduccion_id,
|
||||
t.noticia_id,
|
||||
TRIM(t.lang_to) as lang,
|
||||
t.titulo_trad as titulo,
|
||||
t.resumen_trad as resumen,
|
||||
n.url,
|
||||
n.fecha,
|
||||
n.fuente_nombre,
|
||||
n.categoria_id,
|
||||
n.pais_id
|
||||
FROM traducciones t
|
||||
INNER JOIN noticias n ON t.noticia_id = n.id
|
||||
WHERE t.vectorized = FALSE
|
||||
AND t.status = 'done'
|
||||
ORDER BY t.created_at ASC
|
||||
LIMIT $1
|
||||
`, batchSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var translations []Translation
|
||||
for rows.Next() {
|
||||
var t Translation
|
||||
if err := rows.Scan(
|
||||
&t.ID, &t.NoticiaID, &t.Lang, &t.Titulo, &t.Resumen,
|
||||
&t.URL, &t.Fecha, &t.FuenteNombre, &t.CategoriaID, &t.PaisID,
|
||||
); err != nil {
|
||||
continue
|
||||
}
|
||||
translations = append(translations, t)
|
||||
}
|
||||
return translations, nil
|
||||
}
|
||||
|
||||
type EmbeddingRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input string `json:"input"`
|
||||
}
|
||||
|
||||
type EmbeddingResponse struct {
|
||||
Embedding []float64 `json:"embedding"`
|
||||
}
|
||||
|
||||
func generateEmbedding(text string) ([]float64, error) {
|
||||
reqBody := EmbeddingRequest{
|
||||
Model: "mxbai-embed-large",
|
||||
Input: text,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 60 * time.Second}
|
||||
resp, err := client.Post(ollamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("Ollama returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result EmbeddingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result.Embedding, nil
|
||||
}
|
||||
|
||||
type QdrantPoint struct {
|
||||
ID interface{} `json:"id"`
|
||||
Vector []float64 `json:"vector"`
|
||||
Payload map[string]interface{} `json:"payload"`
|
||||
}
|
||||
|
||||
type QdrantUpsertRequest struct {
|
||||
Points []QdrantPoint `json:"points"`
|
||||
}
|
||||
|
||||
func ensureCollection() error {
|
||||
req, err := http.NewRequest("GET", qdrantURL+"/collections/"+collection, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == 200 {
|
||||
logger.Printf("Collection %s already exists", collection)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get embedding dimension
|
||||
emb, err := generateEmbedding("test")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get embedding dimension: %w", err)
|
||||
}
|
||||
dimension := len(emb)
|
||||
|
||||
// Create collection
|
||||
createReq := map[string]interface{}{
|
||||
"name": collection,
|
||||
"vectors": map[string]interface{}{
|
||||
"size": dimension,
|
||||
"distance": "Cosine",
|
||||
},
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(createReq)
|
||||
resp2, err := http.Post(qdrantURL+"/collections", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp2.Body.Close()
|
||||
|
||||
logger.Printf("Created collection %s with dimension %d", collection, dimension)
|
||||
return nil
|
||||
}
|
||||
|
||||
func uploadToQdrant(translations []Translation, embeddings [][]float64) error {
|
||||
points := make([]QdrantPoint, 0, len(translations))
|
||||
|
||||
for i, t := range translations {
|
||||
if embeddings[i] == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
pointID := uuid.New().String()
|
||||
|
||||
payload := map[string]interface{}{
|
||||
"news_id": t.NoticiaID,
|
||||
"traduccion_id": t.ID,
|
||||
"titulo": t.Titulo,
|
||||
"resumen": t.Resumen,
|
||||
"url": t.URL,
|
||||
"fuente_nombre": t.FuenteNombre,
|
||||
"lang": t.Lang,
|
||||
}
|
||||
|
||||
if t.Fecha != nil {
|
||||
payload["fecha"] = t.Fecha.Format(time.RFC3339)
|
||||
}
|
||||
if t.CategoriaID != nil {
|
||||
payload["categoria_id"] = *t.CategoriaID
|
||||
}
|
||||
if t.PaisID != nil {
|
||||
payload["pais_id"] = *t.PaisID
|
||||
}
|
||||
|
||||
points = append(points, QdrantPoint{
|
||||
ID: pointID,
|
||||
Vector: embeddings[i],
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
|
||||
if len(points) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
reqBody := QdrantUpsertRequest{Points: points}
|
||||
body, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/collections/%s/points", qdrantURL, collection)
|
||||
resp, err := http.Post(url, "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 && resp.StatusCode != 202 {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("Qdrant returned status %d: %s", resp.StatusCode, string(respBody))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateTranslationStatus(ctx context.Context, translations []Translation, pointIDs []string) error {
|
||||
for i, t := range translations {
|
||||
if i >= len(pointIDs) || pointIDs[i] == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
UPDATE traducciones
|
||||
SET
|
||||
vectorized = TRUE,
|
||||
vectorization_date = NOW(),
|
||||
qdrant_point_id = $1
|
||||
WHERE id = $2
|
||||
`, pointIDs[i], t.ID)
|
||||
|
||||
if err != nil {
|
||||
logger.Printf("Error updating translation %d: %v", t.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStats(ctx context.Context) (total, vectorized, pending int, err error) {
|
||||
err = dbPool.QueryRow(ctx, `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE vectorized = TRUE) as vectorized,
|
||||
COUNT(*) FILTER (WHERE vectorized = FALSE AND status = 'done') as pending
|
||||
FROM traducciones
|
||||
WHERE lang_to = 'es'
|
||||
`).Scan(&total, &vectorized, &pending)
|
||||
|
||||
return total, vectorized, pending, err
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadConfig()
|
||||
logger.Println("Starting Qdrant Vectorization Worker")
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
dbPool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
logger.Println("Connected to PostgreSQL")
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if err := ensureCollection(); err != nil {
|
||||
logger.Printf("Warning: Could not ensure collection: %v", err)
|
||||
}
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Shutting down...")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Config: qdrant=%s, ollama=%s, collection=%s, sleep=%ds, batch=%d",
|
||||
qdrantURL, ollamaURL, collection, sleepSec, batchSize)
|
||||
|
||||
totalProcessed := 0
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-time.After(time.Duration(sleepSec) * time.Second):
|
||||
translations, err := getPendingTranslations(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error fetching pending translations: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(translations) == 0 {
|
||||
logger.Println("No pending translations to process")
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Printf("Processing %d translations...", len(translations))
|
||||
|
||||
// Generate embeddings
|
||||
embeddings := make([][]float64, len(translations))
|
||||
for i, t := range translations {
|
||||
text := fmt.Sprintf("%s %s", t.Titulo, t.Resumen)
|
||||
emb, err := generateEmbedding(text)
|
||||
if err != nil {
|
||||
logger.Printf("Error generating embedding for %d: %v", t.ID, err)
|
||||
continue
|
||||
}
|
||||
embeddings[i] = emb
|
||||
}
|
||||
|
||||
// Upload to Qdrant
|
||||
if err := uploadToQdrant(translations, embeddings); err != nil {
|
||||
logger.Printf("Error uploading to Qdrant: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update DB status
|
||||
pointIDs := make([]string, len(translations))
|
||||
for i := range translations {
|
||||
pointIDs[i] = uuid.New().String()
|
||||
}
|
||||
|
||||
if err := updateTranslationStatus(ctx, translations, pointIDs); err != nil {
|
||||
logger.Printf("Error updating status: %v", err)
|
||||
}
|
||||
|
||||
totalProcessed += len(translations)
|
||||
logger.Printf("Processed %d translations (total: %d)", len(translations), totalProcessed)
|
||||
|
||||
total, vectorized, pending, err := getStats(ctx)
|
||||
if err == nil {
|
||||
logger.Printf("Stats: total=%d, vectorized=%d, pending=%d", total, vectorized, pending)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
384
backend/cmd/related/main.go
Normal file
384
backend/cmd/related/main.go
Normal file
|
|
@ -0,0 +1,384 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
dbPool *pgxpool.Pool
|
||||
sleepSec = 10
|
||||
topK = 10
|
||||
batchSz = 200
|
||||
minScore = 0.0
|
||||
)
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[RELATED] ", log.LstdFlags)
|
||||
}
|
||||
|
||||
func loadConfig() {
|
||||
sleepSec = getEnvInt("RELATED_SLEEP", 10)
|
||||
topK = getEnvInt("RELATED_TOPK", 10)
|
||||
batchSz = getEnvInt("RELATED_BATCH", 200)
|
||||
minScore = getEnvFloat("RELATED_MIN_SCORE", 0.0)
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvFloat(key string, defaultValue float64) float64 {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
|
||||
return floatVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
type Translation struct {
|
||||
ID int64
|
||||
Titulo string
|
||||
Resumen string
|
||||
Embedding []float64
|
||||
}
|
||||
|
||||
func ensureSchema(ctx context.Context) error {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS related_noticias (
|
||||
traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
|
||||
related_traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
|
||||
score FLOAT NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
PRIMARY KEY (traduccion_id, related_traduccion_id)
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ensure traduccion_embeddings table exists
|
||||
_, err = dbPool.Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS traduccion_embeddings (
|
||||
id SERIAL PRIMARY KEY,
|
||||
traduccion_id INTEGER NOT NULL REFERENCES traducciones(id) ON DELETE CASCADE,
|
||||
model TEXT NOT NULL,
|
||||
dim INTEGER NOT NULL,
|
||||
embedding DOUBLE PRECISION[] NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
UNIQUE (traduccion_id, model)
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = dbPool.Exec(ctx, `
|
||||
CREATE INDEX IF NOT EXISTS idx_tr_emb_model ON traduccion_embeddings(model);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = dbPool.Exec(ctx, `
|
||||
CREATE INDEX IF NOT EXISTS idx_tr_emb_traduccion_id ON traduccion_embeddings(traduccion_id);
|
||||
`)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func fetchAllEmbeddings(ctx context.Context, model string) ([]Translation, error) {
|
||||
rows, err := dbPool.Query(ctx, `
|
||||
SELECT e.traduccion_id,
|
||||
COALESCE(NULLIF(t.titulo_trad,''), ''),
|
||||
COALESCE(NULLIF(t.resumen_trad,''), ''),
|
||||
e.embedding
|
||||
FROM traduccion_embeddings e
|
||||
JOIN traducciones t ON t.id = e.traduccion_id
|
||||
WHERE e.model = $1
|
||||
AND t.status = 'done'
|
||||
AND t.lang_to = 'es'
|
||||
`, model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var translations []Translation
|
||||
for rows.Next() {
|
||||
var t Translation
|
||||
if err := rows.Scan(&t.ID, &t.Titulo, &t.Resumen, &t.Embedding); err != nil {
|
||||
continue
|
||||
}
|
||||
translations = append(translations, t)
|
||||
}
|
||||
return translations, nil
|
||||
}
|
||||
|
||||
func fetchPendingIDs(ctx context.Context, model string, limit int) ([]int64, error) {
|
||||
rows, err := dbPool.Query(ctx, `
|
||||
SELECT t.id
|
||||
FROM traducciones t
|
||||
JOIN traduccion_embeddings e ON e.traduccion_id = t.id AND e.model = $1
|
||||
LEFT JOIN related_noticias r ON r.traduccion_id = t.id
|
||||
WHERE t.lang_to = 'es'
|
||||
AND t.status = 'done'
|
||||
GROUP BY t.id
|
||||
HAVING COUNT(r.related_traduccion_id) = 0
|
||||
ORDER BY t.id DESC
|
||||
LIMIT $2
|
||||
`, model, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var ids []int64
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
continue
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func cosineSimilarity(a, b []float64) float64 {
|
||||
if len(a) != len(b) || len(a) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var dotProduct, normA, normB float64
|
||||
for i := range a {
|
||||
dotProduct += a[i] * b[i]
|
||||
normA += a[i] * a[i]
|
||||
normB += b[i] * b[i]
|
||||
}
|
||||
|
||||
normA = sqrt(normA)
|
||||
normB = sqrt(normB)
|
||||
|
||||
if normA == 0 || normB == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
return dotProduct / (normA * normB)
|
||||
}
|
||||
|
||||
func sqrt(x float64) float64 {
|
||||
if x <= 0 {
|
||||
return 0
|
||||
}
|
||||
// Simple Newton-Raphson
|
||||
z := x
|
||||
for i := 0; i < 20; i++ {
|
||||
z = (z + x/z) / 2
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
func findTopK(query Embedding, candidates []Translation, k int, minScore float64) []struct {
|
||||
ID int64
|
||||
Score float64
|
||||
} {
|
||||
type sim struct {
|
||||
id int64
|
||||
score float64
|
||||
}
|
||||
|
||||
var similarities []sim
|
||||
|
||||
for _, c := range candidates {
|
||||
if int64(c.ID) == query.ID {
|
||||
continue
|
||||
}
|
||||
|
||||
score := cosineSimilarity(query.Embedding, c.Embedding)
|
||||
if score <= minScore {
|
||||
continue
|
||||
}
|
||||
|
||||
similarities = append(similarities, sim{int64(c.ID), score})
|
||||
}
|
||||
|
||||
// Sort by score descending
|
||||
for i := 0; i < len(similarities)-1; i++ {
|
||||
for j := i + 1; j < len(similarities); j++ {
|
||||
if similarities[j].score > similarities[i].score {
|
||||
similarities[i], similarities[j] = similarities[j], similarities[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(similarities) > k {
|
||||
similarities = similarities[:k]
|
||||
}
|
||||
|
||||
result := make([]struct {
|
||||
ID int64
|
||||
Score float64
|
||||
}, len(similarities))
|
||||
for i, s := range similarities {
|
||||
result[i] = struct {
|
||||
ID int64
|
||||
Score float64
|
||||
}{s.id, s.score}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
type Embedding struct {
|
||||
ID int64
|
||||
Embedding []float64
|
||||
}
|
||||
|
||||
func findEmbeddingByID(embeddings []Embedding, id int64) *Embedding {
|
||||
for i := range embeddings {
|
||||
if embeddings[i].ID == id {
|
||||
return &embeddings[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func insertRelated(ctx context.Context, traduccionID int64, related []struct {
|
||||
ID int64
|
||||
Score float64
|
||||
}) error {
|
||||
if len(related) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, r := range related {
|
||||
if r.Score <= 0 {
|
||||
continue
|
||||
}
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
INSERT INTO related_noticias (traduccion_id, related_traduccion_id, score)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (traduccion_id, related_traduccion_id)
|
||||
DO UPDATE SET score = EXCLUDED.score
|
||||
`, traduccionID, r.ID, r.Score)
|
||||
if err != nil {
|
||||
logger.Printf("Error inserting related: %v", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func processBatch(ctx context.Context, model string) (int, error) {
|
||||
// Fetch all embeddings once
|
||||
allTranslations, err := fetchAllEmbeddings(ctx, model)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(allTranslations) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Convert to Embedding format for easier lookup
|
||||
var allEmbeddings []Embedding
|
||||
for _, t := range allTranslations {
|
||||
if t.Embedding != nil {
|
||||
allEmbeddings = append(allEmbeddings, Embedding{ID: t.ID, Embedding: t.Embedding})
|
||||
}
|
||||
}
|
||||
|
||||
// Get pending IDs
|
||||
pendingIDs, err := fetchPendingIDs(ctx, model, batchSz)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(pendingIDs) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
processed := 0
|
||||
|
||||
for _, tradID := range pendingIDs {
|
||||
emb := findEmbeddingByID(allEmbeddings, tradID)
|
||||
if emb == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
topRelated := findTopK(*emb, allTranslations, topK, minScore)
|
||||
|
||||
if err := insertRelated(ctx, tradID, topRelated); err != nil {
|
||||
logger.Printf("Error inserting related for %d: %v", tradID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
processed++
|
||||
}
|
||||
|
||||
return processed, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadConfig()
|
||||
logger.Println("Starting Related News Worker")
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
dbPool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Ensure schema
|
||||
if err := ensureSchema(ctx); err != nil {
|
||||
logger.Printf("Error ensuring schema: %v", err)
|
||||
}
|
||||
|
||||
model := os.Getenv("EMB_MODEL")
|
||||
if model == "" {
|
||||
model = "mxbai-embed-large"
|
||||
}
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Shutting down...")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Config: sleep=%ds, topK=%d, batch=%d, model=%s", sleepSec, topK, batchSz, model)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-time.After(time.Duration(sleepSec) * time.Second):
|
||||
count, err := processBatch(ctx, model)
|
||||
if err != nil {
|
||||
logger.Printf("Error processing batch: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
logger.Printf("Generated related news for %d translations", count)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
330
backend/cmd/scraper/main.go
Normal file
330
backend/cmd/scraper/main.go
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
dbPool *workers.Config
|
||||
pool *pgxpool.Pool
|
||||
sleepInterval = 60
|
||||
batchSize = 10
|
||||
)
|
||||
|
||||
type URLSource struct {
|
||||
ID int64
|
||||
Nombre string
|
||||
URL string
|
||||
CategoriaID *int64
|
||||
PaisID *int64
|
||||
Idioma *string
|
||||
Active bool
|
||||
}
|
||||
|
||||
type Article struct {
|
||||
Title string
|
||||
Summary string
|
||||
Content string
|
||||
URL string
|
||||
ImageURL string
|
||||
PubDate *time.Time
|
||||
}
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[SCRAPER] ", log.LstdFlags)
|
||||
logger.SetOutput(os.Stdout)
|
||||
}
|
||||
|
||||
func loadConfig() {
|
||||
sleepInterval = getEnvInt("SCRAPER_SLEEP", 60)
|
||||
batchSize = getEnvInt("SCRAPER_BATCH", 10)
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getActiveURLs(ctx context.Context) ([]URLSource, error) {
|
||||
rows, err := pool.Query(ctx, `
|
||||
SELECT id, nombre, url, categoria_id, pais_id, idioma, activo
|
||||
FROM fuentes_url
|
||||
WHERE activo = true
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var sources []URLSource
|
||||
for rows.Next() {
|
||||
var s URLSource
|
||||
err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma, &s.Active)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
sources = append(sources, s)
|
||||
}
|
||||
return sources, nil
|
||||
}
|
||||
|
||||
func updateSourceStatus(ctx context.Context, sourceID int64, status, message string, httpCode int) error {
|
||||
_, err := pool.Exec(ctx, `
|
||||
UPDATE fuentes_url
|
||||
SET last_check = NOW(),
|
||||
last_status = $1,
|
||||
status_message = $2,
|
||||
last_http_code = $3
|
||||
WHERE id = $4
|
||||
`, status, message, httpCode, sourceID)
|
||||
return err
|
||||
}
|
||||
|
||||
func extractArticle(source URLSource) (*Article, error) {
|
||||
client := &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", source.URL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
article := &Article{
|
||||
URL: source.URL,
|
||||
}
|
||||
|
||||
// Extract title
|
||||
article.Title = doc.Find("meta[property='og:title']").First().AttrOr("content", "")
|
||||
if article.Title == "" {
|
||||
article.Title = doc.Find("meta[name='title']").First().AttrOr("content", "")
|
||||
}
|
||||
if article.Title == "" {
|
||||
article.Title = doc.Find("h1").First().Text()
|
||||
}
|
||||
if article.Title == "" {
|
||||
article.Title = doc.Find("title").First().Text()
|
||||
}
|
||||
|
||||
// Extract description/summary
|
||||
article.Summary = doc.Find("meta[property='og:description']").First().AttrOr("content", "")
|
||||
if article.Summary == "" {
|
||||
article.Summary = doc.Find("meta[name='description']").First().AttrOr("content", "")
|
||||
}
|
||||
|
||||
// Extract image
|
||||
article.ImageURL = doc.Find("meta[property='og:image']").First().AttrOr("content", "")
|
||||
|
||||
// Extract main content - try common selectors
|
||||
contentSelectors := []string{
|
||||
"article",
|
||||
"[role='main']",
|
||||
"main",
|
||||
".article-content",
|
||||
".post-content",
|
||||
".entry-content",
|
||||
".content",
|
||||
"#content",
|
||||
}
|
||||
|
||||
for _, sel := range contentSelectors {
|
||||
content := doc.Find(sel).First()
|
||||
if content.Length() > 0 {
|
||||
article.Content = content.Text()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
article.Title = strings.TrimSpace(article.Title)
|
||||
article.Summary = strings.TrimSpace(article.Summary)
|
||||
article.Content = strings.TrimSpace(article.Content)
|
||||
|
||||
// Truncate summary if too long
|
||||
if len(article.Summary) > 500 {
|
||||
article.Summary = article.Summary[:500]
|
||||
}
|
||||
|
||||
return article, nil
|
||||
}
|
||||
|
||||
func saveArticle(ctx context.Context, source URLSource, article *Article) (bool, error) {
|
||||
finalURL := article.URL
|
||||
if finalURL == "" {
|
||||
finalURL = source.URL
|
||||
}
|
||||
|
||||
// Generate ID from URL
|
||||
articleID := fmt.Sprintf("%x", md5.Sum([]byte(finalURL)))
|
||||
|
||||
// Check if exists
|
||||
var exists bool
|
||||
err := pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM noticias WHERE id = $1)", articleID).Scan(&exists)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if exists {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
title := article.Title
|
||||
if title == "" {
|
||||
title = "Sin título"
|
||||
}
|
||||
|
||||
summary := article.Summary
|
||||
if summary == "" && article.Content != "" {
|
||||
summary = article.Content
|
||||
if len(summary) > 500 {
|
||||
summary = summary[:500]
|
||||
}
|
||||
}
|
||||
|
||||
pubDate := time.Now()
|
||||
if article.PubDate != nil {
|
||||
pubDate = *article.PubDate
|
||||
}
|
||||
|
||||
_, err = pool.Exec(ctx, `
|
||||
INSERT INTO noticias (
|
||||
id, titulo, resumen, url, fecha, imagen_url,
|
||||
fuente_nombre, categoria_id, pais_id
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
`, articleID, title, summary, finalURL, pubDate, article.ImageURL,
|
||||
source.Nombre, source.CategoriaID, source.PaisID)
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func processSource(ctx context.Context, source URLSource) {
|
||||
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
|
||||
|
||||
article, err := extractArticle(source)
|
||||
if err != nil {
|
||||
logger.Printf("Error extracting article from %s: %v", source.URL, err)
|
||||
status := "ERROR"
|
||||
if strings.Contains(err.Error(), "HTTP") {
|
||||
status = "ERROR_HTTP"
|
||||
}
|
||||
updateSourceStatus(ctx, source.ID, status, err.Error()[:200], 0)
|
||||
return
|
||||
}
|
||||
|
||||
if article.Title == "" {
|
||||
logger.Printf("No title found for %s", source.URL)
|
||||
updateSourceStatus(ctx, source.ID, "ERROR_PARSE", "No title extracted", 200)
|
||||
return
|
||||
}
|
||||
|
||||
saved, err := saveArticle(ctx, source, article)
|
||||
if err != nil {
|
||||
logger.Printf("Error saving article: %v", err)
|
||||
updateSourceStatus(ctx, source.ID, "ERROR_DB", err.Error()[:200], 0)
|
||||
return
|
||||
}
|
||||
|
||||
if saved {
|
||||
logger.Printf("Saved: %s", article.Title)
|
||||
updateSourceStatus(ctx, source.ID, "OK", "News created successfully", 200)
|
||||
} else {
|
||||
logger.Printf("Already exists: %s", article.Title)
|
||||
updateSourceStatus(ctx, source.ID, "OK", "News already exists", 200)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadConfig()
|
||||
logger.Println("Starting Scraper Worker")
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
pool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
logger.Println("Connected to PostgreSQL")
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Handle shutdown
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Shutting down...")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Config: sleep=%ds, batch=%d", sleepInterval, batchSize)
|
||||
|
||||
ticker := time.NewTicker(time.Duration(sleepInterval) * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
sources, err := getActiveURLs(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error fetching URLs: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(sources) == 0 {
|
||||
logger.Println("No active URLs to process")
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Printf("Processing %d sources", len(sources))
|
||||
|
||||
for _, source := range sources {
|
||||
processSource(ctx, source)
|
||||
time.Sleep(2 * time.Second) // Rate limiting
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
190
backend/cmd/server/main.go
Normal file
190
backend/cmd/server/main.go
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rss2/backend/internal/cache"
|
||||
"github.com/rss2/backend/internal/config"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/handlers"
|
||||
"github.com/rss2/backend/internal/middleware"
|
||||
"github.com/rss2/backend/internal/services"
|
||||
)
|
||||
|
||||
func initDB() {
|
||||
ctx := context.Background()
|
||||
|
||||
// Crear tabla entity_aliases si no existe
|
||||
_, err := db.GetPool().Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS entity_aliases (
|
||||
id SERIAL PRIMARY KEY,
|
||||
canonical_name VARCHAR(255) NOT NULL,
|
||||
alias VARCHAR(255) NOT NULL,
|
||||
tipo VARCHAR(50) NOT NULL CHECK (tipo IN ('persona', 'organizacion', 'lugar', 'tema')),
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
UNIQUE(alias, tipo)
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("Warning: Could not create entity_aliases table: %v", err)
|
||||
} else {
|
||||
log.Println("Table entity_aliases ready")
|
||||
}
|
||||
|
||||
// Añadir columna role a users si no existe
|
||||
_, err = db.GetPool().Exec(ctx, `
|
||||
ALTER TABLE users ADD COLUMN IF NOT EXISTS role VARCHAR(20) DEFAULT 'user'
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("Warning: Could not add role column: %v", err)
|
||||
} else {
|
||||
log.Println("Column role ready")
|
||||
}
|
||||
|
||||
// Crear tabla de configuración si no existe
|
||||
_, err = db.GetPool().Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS config (
|
||||
key VARCHAR(100) PRIMARY KEY,
|
||||
value TEXT,
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("Warning: Could not create config table: %v", err)
|
||||
} else {
|
||||
log.Println("Table config ready")
|
||||
}
|
||||
|
||||
// Insertar configuración por defecto si no existe
|
||||
db.GetPool().Exec(ctx, `
|
||||
INSERT INTO config (key, value) VALUES ('translator_type', 'cpu')
|
||||
ON CONFLICT (key) DO NOTHING
|
||||
`)
|
||||
db.GetPool().Exec(ctx, `
|
||||
INSERT INTO config (key, value) VALUES ('translator_workers', '2')
|
||||
ON CONFLICT (key) DO NOTHING
|
||||
`)
|
||||
db.GetPool().Exec(ctx, `
|
||||
INSERT INTO config (key, value) VALUES ('translator_status', 'stopped')
|
||||
ON CONFLICT (key) DO NOTHING
|
||||
`)
|
||||
}
|
||||
|
||||
func main() {
|
||||
cfg := config.Load()
|
||||
|
||||
if err := db.Connect(cfg.DatabaseURL); err != nil {
|
||||
log.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
log.Println("Connected to PostgreSQL")
|
||||
|
||||
// Auto-setup DB tables
|
||||
initDB()
|
||||
|
||||
if err := cache.Connect(cfg.RedisURL); err != nil {
|
||||
log.Printf("Warning: Failed to connect to Redis: %v", err)
|
||||
} else {
|
||||
defer cache.Close()
|
||||
log.Println("Connected to Redis")
|
||||
}
|
||||
|
||||
services.Init(cfg)
|
||||
|
||||
r := gin.Default()
|
||||
|
||||
r.Use(middleware.CORSMiddleware())
|
||||
r.Use(middleware.LoggerMiddleware())
|
||||
|
||||
r.GET("/health", func(c *gin.Context) {
|
||||
c.JSON(200, gin.H{"status": "ok"})
|
||||
})
|
||||
|
||||
api := r.Group("/api")
|
||||
{
|
||||
// Serve static images downloaded by wiki_worker
|
||||
api.StaticFS("/wiki-images", gin.Dir("/app/data/wiki_images", false))
|
||||
|
||||
api.POST("/auth/login", handlers.Login)
|
||||
api.POST("/auth/register", handlers.Register)
|
||||
api.GET("/auth/check-first-user", handlers.CheckFirstUser)
|
||||
|
||||
news := api.Group("/news")
|
||||
{
|
||||
news.GET("", handlers.GetNews)
|
||||
news.GET("/:id", handlers.GetNewsByID)
|
||||
news.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteNews)
|
||||
}
|
||||
|
||||
feeds := api.Group("/feeds")
|
||||
{
|
||||
feeds.GET("", handlers.GetFeeds)
|
||||
feeds.GET("/export", handlers.ExportFeeds)
|
||||
feeds.GET("/:id", handlers.GetFeedByID)
|
||||
feeds.POST("", middleware.AuthRequired(), handlers.CreateFeed)
|
||||
feeds.POST("/import", middleware.AuthRequired(), handlers.ImportFeeds)
|
||||
feeds.PUT("/:id", middleware.AuthRequired(), handlers.UpdateFeed)
|
||||
feeds.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteFeed)
|
||||
feeds.POST("/:id/toggle", middleware.AuthRequired(), handlers.ToggleFeedActive)
|
||||
feeds.POST("/:id/reactivate", middleware.AuthRequired(), handlers.ReactivateFeed)
|
||||
}
|
||||
|
||||
api.GET("/search", handlers.SearchNews)
|
||||
|
||||
api.GET("/entities", handlers.GetEntities)
|
||||
|
||||
api.GET("/stats", handlers.GetStats)
|
||||
|
||||
api.GET("/categories", handlers.GetCategories)
|
||||
api.GET("/countries", handlers.GetCountries)
|
||||
|
||||
admin := api.Group("/admin")
|
||||
admin.Use(middleware.AuthRequired(), middleware.AdminRequired())
|
||||
{
|
||||
admin.POST("/aliases", handlers.CreateAlias)
|
||||
admin.GET("/aliases/export", handlers.ExportAliases)
|
||||
admin.POST("/aliases/import", handlers.ImportAliases)
|
||||
admin.POST("/entities/retype", handlers.PatchEntityTipo)
|
||||
admin.GET("/backup", handlers.BackupDatabase)
|
||||
admin.GET("/backup/news", handlers.BackupNewsZipped)
|
||||
admin.GET("/users", handlers.GetUsers)
|
||||
admin.POST("/users/:id/promote", handlers.PromoteUser)
|
||||
admin.POST("/users/:id/demote", handlers.DemoteUser)
|
||||
admin.POST("/reset-db", handlers.ResetDatabase)
|
||||
admin.GET("/workers/status", handlers.GetWorkerStatus)
|
||||
admin.POST("/workers/config", handlers.SetWorkerConfig)
|
||||
admin.POST("/workers/start", handlers.StartWorkers)
|
||||
admin.POST("/workers/stop", handlers.StopWorkers)
|
||||
}
|
||||
|
||||
auth := api.Group("/auth")
|
||||
auth.Use(middleware.AuthRequired())
|
||||
{
|
||||
auth.GET("/me", handlers.GetCurrentUser)
|
||||
}
|
||||
}
|
||||
|
||||
middleware.SetJWTSecret(cfg.SecretKey)
|
||||
|
||||
port := cfg.ServerPort
|
||||
addr := fmt.Sprintf(":%s", port)
|
||||
|
||||
go func() {
|
||||
log.Printf("Server starting on %s", addr)
|
||||
if err := r.Run(addr); err != nil {
|
||||
log.Fatalf("Failed to start server: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
quit := make(chan os.Signal, 1)
|
||||
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-quit
|
||||
|
||||
log.Println("Shutting down server...")
|
||||
}
|
||||
383
backend/cmd/topics/main.go
Normal file
383
backend/cmd/topics/main.go
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
dbPool *pgxpool.Pool
|
||||
sleepSec = 10
|
||||
batchSz = 500
|
||||
)
|
||||
|
||||
type Topic struct {
|
||||
ID int64
|
||||
Weight int
|
||||
Keywords []string
|
||||
}
|
||||
|
||||
type Country struct {
|
||||
ID int64
|
||||
Name string
|
||||
Keywords []string
|
||||
}
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[TOPICS] ", log.LstdFlags)
|
||||
}
|
||||
|
||||
func loadConfig() {
|
||||
sleepSec = getEnvInt("TOPICS_SLEEP", 10)
|
||||
batchSz = getEnvInt("TOPICS_BATCH", 500)
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func ensureSchema(ctx context.Context) error {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS topics (
|
||||
id SERIAL PRIMARY KEY,
|
||||
slug VARCHAR(50) UNIQUE NOT NULL,
|
||||
name VARCHAR(100) NOT NULL,
|
||||
weight INTEGER DEFAULT 1,
|
||||
keywords TEXT,
|
||||
group_name VARCHAR(50)
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = dbPool.Exec(ctx, `
|
||||
CREATE TABLE IF NOT EXISTS news_topics (
|
||||
noticia_id VARCHAR(32) REFERENCES noticias(id) ON DELETE CASCADE,
|
||||
topic_id INTEGER REFERENCES topics(id) ON DELETE CASCADE,
|
||||
score INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
PRIMARY KEY (noticia_id, topic_id)
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = dbPool.Exec(ctx, `
|
||||
ALTER TABLE noticias ADD COLUMN IF NOT EXISTS topics_processed BOOLEAN DEFAULT FALSE;
|
||||
`)
|
||||
return err
|
||||
}
|
||||
|
||||
func loadTopics(ctx context.Context) ([]Topic, error) {
|
||||
rows, err := dbPool.Query(ctx, "SELECT id, weight, keywords FROM topics")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var topics []Topic
|
||||
for rows.Next() {
|
||||
var t Topic
|
||||
var kwStr *string
|
||||
if err := rows.Scan(&t.ID, &t.Weight, &kwStr); err != nil {
|
||||
continue
|
||||
}
|
||||
if kwStr != nil {
|
||||
keywords := strings.Split(*kwStr, ",")
|
||||
for i := range keywords {
|
||||
keywords[i] = strings.ToLower(strings.TrimSpace(keywords[i]))
|
||||
}
|
||||
t.Keywords = keywords
|
||||
}
|
||||
topics = append(topics, t)
|
||||
}
|
||||
return topics, nil
|
||||
}
|
||||
|
||||
func loadCountries(ctx context.Context) ([]Country, error) {
|
||||
rows, err := dbPool.Query(ctx, "SELECT id, nombre FROM paises")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
aliases := map[string][]string{
|
||||
"Estados Unidos": {"eeuu", "ee.uu.", "usa", "estadounidense", "washington"},
|
||||
"Rusia": {"ruso", "rusa", "moscú", "kremlin"},
|
||||
"China": {"chino", "china", "pekin", "beijing"},
|
||||
"Ucrania": {"ucraniano", "kiev", "kyiv"},
|
||||
"Israel": {"israelí", "tel aviv", "jerusalén"},
|
||||
"España": {"español", "madrid"},
|
||||
"Reino Unido": {"uk", "londres", "británico"},
|
||||
"Francia": {"francés", "parís"},
|
||||
"Alemania": {"alemán", "berlín"},
|
||||
"Palestina": {"palestino", "gaza", "cisjordania"},
|
||||
"Irán": {"iraní", "teherán"},
|
||||
}
|
||||
|
||||
var countries []Country
|
||||
for rows.Next() {
|
||||
var c Country
|
||||
if err := rows.Scan(&c.ID, &c.Name); err != nil {
|
||||
continue
|
||||
}
|
||||
c.Keywords = []string{strings.ToLower(c.Name)}
|
||||
if kw, ok := aliases[c.Name]; ok {
|
||||
c.Keywords = append(c.Keywords, kw...)
|
||||
}
|
||||
countries = append(countries, c)
|
||||
}
|
||||
return countries, nil
|
||||
}
|
||||
|
||||
type NewsItem struct {
|
||||
ID string
|
||||
Titulo *string
|
||||
Resumen *string
|
||||
}
|
||||
|
||||
func fetchPendingNews(ctx context.Context, limit int) ([]NewsItem, error) {
|
||||
rows, err := dbPool.Query(ctx, `
|
||||
SELECT id, titulo, resumen
|
||||
FROM noticias
|
||||
WHERE topics_processed = FALSE
|
||||
ORDER BY fecha DESC
|
||||
LIMIT $1
|
||||
`, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var items []NewsItem
|
||||
for rows.Next() {
|
||||
var n NewsItem
|
||||
if err := rows.Scan(&n.ID, &n.Titulo, &n.Resumen); err != nil {
|
||||
continue
|
||||
}
|
||||
items = append(items, n)
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func findTopics(text string, topics []Topic) []struct {
|
||||
TopicID int64
|
||||
Score int
|
||||
} {
|
||||
text = strings.ToLower(text)
|
||||
var matches []struct {
|
||||
TopicID int64
|
||||
Score int
|
||||
}
|
||||
|
||||
for _, topic := range topics {
|
||||
count := 0
|
||||
for _, kw := range topic.Keywords {
|
||||
if strings.Contains(text, kw) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
matches = append(matches, struct {
|
||||
TopicID int64
|
||||
Score int
|
||||
}{topic.ID, topic.Weight * count})
|
||||
}
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
func findBestCountry(text string, countries []Country) *int64 {
|
||||
text = strings.ToLower(text)
|
||||
bestID := new(int64)
|
||||
bestCount := 0
|
||||
|
||||
for _, c := range countries {
|
||||
count := 0
|
||||
for _, kw := range c.Keywords {
|
||||
if strings.Contains(text, kw) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count > bestCount {
|
||||
bestCount = count
|
||||
*bestID = c.ID
|
||||
}
|
||||
}
|
||||
|
||||
if bestCount > 0 {
|
||||
return bestID
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func processBatch(ctx context.Context, topics []Topic, countries []Country) (int, error) {
|
||||
items, err := fetchPendingNews(ctx, batchSz)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(items) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
type topicMatch struct {
|
||||
NoticiaID string
|
||||
TopicID int64
|
||||
Score int
|
||||
}
|
||||
|
||||
type countryUpdate struct {
|
||||
PaisID int64
|
||||
NoticiaID string
|
||||
}
|
||||
|
||||
var topicMatches []topicMatch
|
||||
var countryUpdates []countryUpdate
|
||||
var processedIDs []string
|
||||
|
||||
for _, item := range items {
|
||||
var text string
|
||||
if item.Titulo != nil {
|
||||
text += *item.Titulo
|
||||
}
|
||||
if item.Resumen != nil {
|
||||
text += " " + *item.Resumen
|
||||
}
|
||||
|
||||
// Find topics
|
||||
matches := findTopics(text, topics)
|
||||
for _, m := range matches {
|
||||
topicMatches = append(topicMatches, topicMatch{item.ID, m.TopicID, m.Score})
|
||||
}
|
||||
|
||||
// Find best country
|
||||
if countryID := findBestCountry(text, countries); countryID != nil {
|
||||
countryUpdates = append(countryUpdates, countryUpdate{*countryID, item.ID})
|
||||
}
|
||||
|
||||
processedIDs = append(processedIDs, item.ID)
|
||||
}
|
||||
|
||||
// Insert topic relations
|
||||
if len(topicMatches) > 0 {
|
||||
for _, tm := range topicMatches {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
INSERT INTO news_topics (noticia_id, topic_id, score)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (noticia_id, topic_id) DO UPDATE SET score = EXCLUDED.score
|
||||
`, tm.NoticiaID, tm.TopicID, tm.Score)
|
||||
if err != nil {
|
||||
logger.Printf("Error inserting topic: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update country
|
||||
if len(countryUpdates) > 0 {
|
||||
for _, cu := range countryUpdates {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
UPDATE noticias SET pais_id = $1 WHERE id = $2
|
||||
`, cu.PaisID, cu.NoticiaID)
|
||||
if err != nil {
|
||||
logger.Printf("Error updating country: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mark as processed
|
||||
if len(processedIDs) > 0 {
|
||||
_, err := dbPool.Exec(ctx, `
|
||||
UPDATE noticias SET topics_processed = TRUE WHERE id = ANY($1)
|
||||
`, processedIDs)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return len(items), nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
loadConfig()
|
||||
logger.Println("Starting Topics Worker")
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
dbPool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Ensure schema
|
||||
if err := ensureSchema(ctx); err != nil {
|
||||
logger.Printf("Error ensuring schema: %v", err)
|
||||
}
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Shutting down...")
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Config: sleep=%ds, batch=%d", sleepSec, batchSz)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-time.After(time.Duration(sleepSec) * time.Second):
|
||||
topics, err := loadTopics(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error loading topics: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(topics) == 0 {
|
||||
logger.Println("No topics found in DB")
|
||||
time.Sleep(time.Duration(sleepSec) * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
countries, err := loadCountries(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error loading countries: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
count, err := processBatch(ctx, topics, countries)
|
||||
if err != nil {
|
||||
logger.Printf("Error processing batch: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
logger.Printf("Processed %d news items", count)
|
||||
}
|
||||
|
||||
if count < batchSz {
|
||||
time.Sleep(time.Duration(sleepSec) * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
267
backend/cmd/wiki_worker/main.go
Normal file
267
backend/cmd/wiki_worker/main.go
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rss2/backend/internal/workers"
|
||||
)
|
||||
|
||||
var (
|
||||
logger *log.Logger
|
||||
pool *pgxpool.Pool
|
||||
sleepInterval = 30
|
||||
batchSize = 50
|
||||
imagesDir = "/app/data/wiki_images"
|
||||
)
|
||||
|
||||
type WikiSummary struct {
|
||||
Type string `json:"type"`
|
||||
Title string `json:"title"`
|
||||
DisplayTitle string `json:"displaytitle"`
|
||||
Extract string `json:"extract"`
|
||||
ContentUrls struct {
|
||||
Desktop struct {
|
||||
Page string `json:"page"`
|
||||
} `json:"desktop"`
|
||||
} `json:"content_urls"`
|
||||
Thumbnail *struct {
|
||||
Source string `json:"source"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"thumbnail"`
|
||||
}
|
||||
|
||||
type Tag struct {
|
||||
ID int64
|
||||
Valor string
|
||||
Tipo string
|
||||
}
|
||||
|
||||
func init() {
|
||||
logger = log.New(os.Stdout, "[WIKI_WORKER] ", log.LstdFlags)
|
||||
}
|
||||
|
||||
func getPendingTags(ctx context.Context) ([]Tag, error) {
|
||||
rows, err := pool.Query(ctx, `
|
||||
SELECT t.id, t.valor, t.tipo
|
||||
FROM tags t
|
||||
LEFT JOIN (
|
||||
SELECT tag_id, COUNT(*) as cnt
|
||||
FROM tags_noticia
|
||||
GROUP BY tag_id
|
||||
) c ON c.tag_id = t.id
|
||||
WHERE t.tipo IN ('persona', 'organizacion')
|
||||
AND t.wiki_checked = FALSE
|
||||
ORDER BY COALESCE(c.cnt, 0) DESC, t.id DESC
|
||||
LIMIT $1
|
||||
`, batchSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var tags []Tag
|
||||
for rows.Next() {
|
||||
var t Tag
|
||||
if err := rows.Scan(&t.ID, &t.Valor, &t.Tipo); err == nil {
|
||||
tags = append(tags, t)
|
||||
}
|
||||
}
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func downloadImage(imgURL, destPath string) error {
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
req, err := http.NewRequest("GET", imgURL, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (https://github.com/proyecto/rss2)")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
out, err := os.Create(destPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
return err
|
||||
}
|
||||
|
||||
func fetchWikipediaInfo(valor string) (*WikiSummary, error) {
|
||||
// Normalize the value to be wiki-compatible
|
||||
title := strings.ReplaceAll(strings.TrimSpace(valor), " ", "_")
|
||||
encodedTitle := url.PathEscape(title)
|
||||
|
||||
apiURL := fmt.Sprintf("https://es.wikipedia.org/api/rest_v1/page/summary/%s", encodedTitle)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Per MediaWiki API policy: https://meta.wikimedia.org/wiki/User-Agent_policy
|
||||
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (pietrelinux@gmail.com)")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
return nil, fmt.Errorf("HTTP 429: Too Many Requests (Rate Limited)")
|
||||
}
|
||||
if resp.StatusCode == 404 {
|
||||
return nil, nil // Not found, but handled successfully without error
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var summary WikiSummary
|
||||
if err := json.NewDecoder(resp.Body).Decode(&summary); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Filter out disambiguation pages
|
||||
if summary.Type == "disambiguation" {
|
||||
return nil, nil // Treat as not found to strictly avoid incorrect tooltips
|
||||
}
|
||||
|
||||
return &summary, nil
|
||||
}
|
||||
|
||||
func processTag(ctx context.Context, tag Tag) {
|
||||
logger.Printf("Procesando tag %d: %s", tag.ID, tag.Valor)
|
||||
|
||||
summary, err := fetchWikipediaInfo(tag.Valor)
|
||||
if err != nil {
|
||||
logger.Printf("Error al consultar Wikipedia para %s: %v", tag.Valor, err)
|
||||
return
|
||||
}
|
||||
|
||||
if summary == nil || summary.Extract == "" {
|
||||
// Not found or disambiguation
|
||||
_, _ = pool.Exec(ctx, "UPDATE tags SET wiki_checked = TRUE WHERE id = $1", tag.ID)
|
||||
logger.Printf("No se encontraron resultados válidos en Wikipedia para: %s", tag.Valor)
|
||||
return
|
||||
}
|
||||
|
||||
var localImagePath *string
|
||||
if summary.Thumbnail != nil && summary.Thumbnail.Source != "" {
|
||||
ext := ".jpg"
|
||||
if strings.HasSuffix(strings.ToLower(summary.Thumbnail.Source), ".png") {
|
||||
ext = ".png"
|
||||
}
|
||||
fileName := fmt.Sprintf("wiki_%d%s", tag.ID, ext)
|
||||
destPath := filepath.Join(imagesDir, fileName)
|
||||
|
||||
if err := downloadImage(summary.Thumbnail.Source, destPath); err != nil {
|
||||
logger.Printf("Error descargando imagen para %s: %v", tag.Valor, err)
|
||||
// Guardaremos la URL externa como fallback si falla la descarga
|
||||
src := summary.Thumbnail.Source
|
||||
localImagePath = &src
|
||||
} else {
|
||||
relativePath := "/api/wiki-images/" + fileName
|
||||
localImagePath = &relativePath
|
||||
}
|
||||
}
|
||||
|
||||
wikiURL := summary.ContentUrls.Desktop.Page
|
||||
|
||||
_, err = pool.Exec(ctx, `
|
||||
UPDATE tags
|
||||
SET wiki_summary = $1,
|
||||
wiki_url = $2,
|
||||
image_path = $3,
|
||||
wiki_checked = TRUE
|
||||
WHERE id = $4
|
||||
`, summary.Extract, wikiURL, localImagePath, tag.ID)
|
||||
|
||||
if err != nil {
|
||||
logger.Printf("Error al actualizar la base de datos para tag %d: %v", tag.ID, err)
|
||||
} else {
|
||||
logger.Printf("Actualizado con éxito: %s (Imagen: %v)", tag.Valor, localImagePath != nil)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
if val := os.Getenv("WIKI_SLEEP"); val != "" {
|
||||
if sleep, err := fmt.Sscanf(val, "%d", &sleepInterval); err == nil && sleep > 0 {
|
||||
sleepInterval = sleep
|
||||
}
|
||||
}
|
||||
|
||||
logger.Println("Iniciando Wiki Worker...")
|
||||
|
||||
if err := os.MkdirAll(imagesDir, 0755); err != nil {
|
||||
logger.Fatalf("Error creando directorio de imágenes: %v", err)
|
||||
}
|
||||
|
||||
cfg := workers.LoadDBConfig()
|
||||
if err := workers.Connect(cfg); err != nil {
|
||||
logger.Fatalf("Failed to connect to database: %v", err)
|
||||
}
|
||||
pool = workers.GetPool()
|
||||
defer workers.Close()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
logger.Println("Cerrando gracefully...")
|
||||
workers.Close()
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
logger.Printf("Configuración: sleep=%ds, batch=%d", sleepInterval, batchSize)
|
||||
|
||||
for {
|
||||
tags, err := getPendingTags(ctx)
|
||||
if err != nil {
|
||||
logger.Printf("Error recuperando tags pendientes: %v", err)
|
||||
time.Sleep(10 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(tags) == 0 {
|
||||
logger.Printf("No hay tags pendientes. Durmiendo %d segundos...", sleepInterval)
|
||||
time.Sleep(time.Duration(sleepInterval) * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Printf("Recuperados %d tags para procesar...", len(tags))
|
||||
|
||||
for _, tag := range tags {
|
||||
processTag(ctx, tag)
|
||||
time.Sleep(3 * time.Second) // Increased delay to avoid Wikipedia Rate Limits (429)
|
||||
}
|
||||
}
|
||||
}
|
||||
51
backend/go.mod
Normal file
51
backend/go.mod
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
module github.com/rss2/backend
|
||||
|
||||
go 1.22
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.9.2
|
||||
github.com/gin-gonic/gin v1.9.1
|
||||
github.com/golang-jwt/jwt/v5 v5.0.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/jackc/pgx/v5 v5.4.3
|
||||
github.com/mmcdole/gofeed v1.2.1
|
||||
github.com/redis/go-redis/v9 v9.0.5
|
||||
golang.org/x/crypto v0.26.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.2 // indirect
|
||||
github.com/bytedance/sonic v1.9.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.14.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/leodido/go-urn v1.2.4 // indirect
|
||||
github.com/mattn/go-isatty v0.0.19 // indirect
|
||||
github.com/mmcdole/goxpp v1.1.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||
github.com/rogpeppe/go-internal v1.14.1 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.11 // indirect
|
||||
golang.org/x/arch v0.3.0 // indirect
|
||||
golang.org/x/net v0.28.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/sys v0.26.0 // indirect
|
||||
golang.org/x/text v0.17.0 // indirect
|
||||
google.golang.org/protobuf v1.34.2 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
156
backend/go.sum
Normal file
156
backend/go.sum
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
|
||||
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
|
||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||
github.com/bsm/ginkgo/v2 v2.7.0 h1:ItPMPH90RbmZJt5GtkcNvIRuGEdwlBItdNVoyzaNQao=
|
||||
github.com/bsm/ginkgo/v2 v2.7.0/go.mod h1:AiKlXPm7ItEHNc/2+OkrNG4E0ITzojb9/xWzvQ9XZ9w=
|
||||
github.com/bsm/gomega v1.26.0 h1:LhQm+AFcgV2M0WyKroMASzAzCAJVpAxQXv4SaI9a69Y=
|
||||
github.com/bsm/gomega v1.26.0/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
||||
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
|
||||
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
|
||||
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
|
||||
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
|
||||
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
|
||||
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
|
||||
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/golang-jwt/jwt/v5 v5.0.0 h1:1n1XNM9hk7O9mnQoNBGolZvzebBQ7p93ULHRc28XJUE=
|
||||
github.com/golang-jwt/jwt/v5 v5.0.0/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||
github.com/jackc/pgx/v5 v5.4.3 h1:cxFyXhxlvAifxnkKKdlxv8XqUf59tDlYjnV5YYfsJJY=
|
||||
github.com/jackc/pgx/v5 v5.4.3/go.mod h1:Ig06C2Vu0t5qXC60W8sqIthScaEnFvojjj9dSljmHRA=
|
||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
|
||||
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
|
||||
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
|
||||
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
|
||||
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s=
|
||||
github.com/mmcdole/gofeed v1.2.1/go.mod h1:2wVInNpgmC85q16QTTuwbuKxtKkHLCDDtf0dCmnrNr4=
|
||||
github.com/mmcdole/goxpp v1.1.0 h1:WwslZNF7KNAXTFuzRtn/OKZxFLJAAyOA9w82mDz2ZGI=
|
||||
github.com/mmcdole/goxpp v1.1.0/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
|
||||
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl5o=
|
||||
github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
|
||||
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
|
||||
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
|
||||
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
|
||||
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
|
||||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
|
||||
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
|
||||
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
|
||||
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||
72
backend/internal/cache/redis.go
vendored
Normal file
72
backend/internal/cache/redis.go
vendored
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
var Client *redis.Client
|
||||
|
||||
func Connect(redisURL string) error {
|
||||
opt, err := redis.ParseURL(redisURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse redis URL: %w", err)
|
||||
}
|
||||
|
||||
Client = redis.NewClient(opt)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err = Client.Ping(ctx).Err(); err != nil {
|
||||
return fmt.Errorf("failed to ping redis: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Close() {
|
||||
if Client != nil {
|
||||
Client.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func GetClient() *redis.Client {
|
||||
return Client
|
||||
}
|
||||
|
||||
func SearchKey(query, lang string, page, perPage int) string {
|
||||
return fmt.Sprintf("search:%s:%s:%d:%d", query, lang, page, perPage)
|
||||
}
|
||||
|
||||
func NewsKey(newsID int64, lang string) string {
|
||||
return fmt.Sprintf("news:%d:%s", newsID, lang)
|
||||
}
|
||||
|
||||
func FeedKey(feedID int64) string {
|
||||
return fmt.Sprintf("feed:%d", feedID)
|
||||
}
|
||||
|
||||
func Set(ctx context.Context, key string, value interface{}, expiration time.Duration) error {
|
||||
data, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return Client.Set(ctx, key, data, expiration).Err()
|
||||
}
|
||||
|
||||
func Get(ctx context.Context, key string) (string, error) {
|
||||
return Client.Get(ctx, key).Result()
|
||||
}
|
||||
|
||||
func Unmarshal(data []byte, v interface{}) error {
|
||||
return json.Unmarshal(data, v)
|
||||
}
|
||||
|
||||
func Marshal(v interface{}) ([]byte, error) {
|
||||
return json.Marshal(v)
|
||||
}
|
||||
66
backend/internal/config/config.go
Normal file
66
backend/internal/config/config.go
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
ServerPort string
|
||||
DatabaseURL string
|
||||
RedisURL string
|
||||
QdrantHost string
|
||||
QdrantPort int
|
||||
SecretKey string
|
||||
JWTExpiration time.Duration
|
||||
TranslationURL string
|
||||
OllamaURL string
|
||||
SpacyURL string
|
||||
DefaultLang string
|
||||
NewsPerPage int
|
||||
RateLimitPerMinute int
|
||||
}
|
||||
|
||||
func Load() *Config {
|
||||
return &Config{
|
||||
ServerPort: getEnv("SERVER_PORT", "8080"),
|
||||
DatabaseURL: getEnv("DATABASE_URL", "postgres://rss:rss@localhost:5432/rss"),
|
||||
RedisURL: getEnv("REDIS_URL", "redis://localhost:6379"),
|
||||
QdrantHost: getEnv("QDRANT_HOST", "localhost"),
|
||||
QdrantPort: getEnvInt("QDRANT_PORT", 6333),
|
||||
SecretKey: getEnv("SECRET_KEY", "change-this-secret-key"),
|
||||
JWTExpiration: getEnvDuration("JWT_EXPIRATION", 24*time.Hour),
|
||||
TranslationURL: getEnv("TRANSLATION_URL", "http://libretranslate:7790"),
|
||||
OllamaURL: getEnv("OLLAMA_URL", "http://ollama:11434"),
|
||||
SpacyURL: getEnv("SPACY_URL", "http://spacy:8000"),
|
||||
DefaultLang: getEnv("DEFAULT_LANG", "es"),
|
||||
NewsPerPage: getEnvInt("NEWS_PER_PAGE", 30),
|
||||
RateLimitPerMinute: getEnvInt("RATE_LIMIT_PER_MINUTE", 60),
|
||||
}
|
||||
}
|
||||
|
||||
func getEnv(key, defaultValue string) string {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
return value
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvDuration(key string, defaultValue time.Duration) time.Duration {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if duration, err := time.ParseDuration(value); err == nil {
|
||||
return duration
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
44
backend/internal/db/postgres.go
Normal file
44
backend/internal/db/postgres.go
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
var Pool *pgxpool.Pool
|
||||
|
||||
func Connect(databaseURL string) error {
|
||||
config, err := pgxpool.ParseConfig(databaseURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse database URL: %w", err)
|
||||
}
|
||||
|
||||
config.MaxConns = 25
|
||||
config.MinConns = 5
|
||||
config.MaxConnLifetime = time.Hour
|
||||
config.MaxConnIdleTime = 30 * time.Minute
|
||||
|
||||
Pool, err = pgxpool.NewWithConfig(context.Background(), config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create pool: %w", err)
|
||||
}
|
||||
|
||||
if err = Pool.Ping(context.Background()); err != nil {
|
||||
return fmt.Errorf("failed to ping database: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Close() {
|
||||
if Pool != nil {
|
||||
Pool.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func GetPool() *pgxpool.Pool {
|
||||
return Pool
|
||||
}
|
||||
760
backend/internal/handlers/admin.go
Normal file
760
backend/internal/handlers/admin.go
Normal file
|
|
@ -0,0 +1,760 @@
|
|||
package handlers
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
)
|
||||
|
||||
|
||||
|
||||
func CreateAlias(c *gin.Context) {
|
||||
var req models.EntityAliasRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
tx, err := db.GetPool().Begin(ctx)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
// 1. Ensure the canonical tag exists in tags table
|
||||
var canonicalTagId int
|
||||
err = tx.QueryRow(ctx, `
|
||||
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
|
||||
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
|
||||
RETURNING id`, req.CanonicalName, req.Tipo).Scan(&canonicalTagId)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure canonical tag", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
for _, alias := range req.Aliases {
|
||||
alias = strings.TrimSpace(alias)
|
||||
if alias == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Insert the alias mapping into entity_aliases
|
||||
_, err = tx.Exec(ctx, `
|
||||
INSERT INTO entity_aliases (canonical_name, alias, tipo)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = EXCLUDED.canonical_name`,
|
||||
req.CanonicalName, alias, req.Tipo)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to insert alias", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if the original alias string actually exists as a tag
|
||||
var aliasTagId int
|
||||
err = tx.QueryRow(ctx, "SELECT id FROM tags WHERE valor = $1 AND tipo = $2", alias, req.Tipo).Scan(&aliasTagId)
|
||||
if err == nil && aliasTagId != 0 && aliasTagId != canonicalTagId {
|
||||
// 3. Move all mentions in tags_noticia to the canonical tag id safely
|
||||
_, err = tx.Exec(ctx, `
|
||||
UPDATE tags_noticia
|
||||
SET tag_id = $1
|
||||
WHERE tag_id = $2 AND NOT EXISTS (
|
||||
SELECT 1 FROM tags_noticia tn2
|
||||
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
|
||||
)
|
||||
`, canonicalTagId, aliasTagId)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions safely", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Delete any remaining orphaned mentions of the alias that couldn't be merged (duplicates)
|
||||
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", aliasTagId)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// 4. Delete the original alias tag
|
||||
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", aliasTagId)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, gin.H{
|
||||
"message": "Aliases created and metrics merged successfully",
|
||||
"canonical_name": req.CanonicalName,
|
||||
"aliases_added": req.Aliases,
|
||||
"tipo": req.Tipo,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
|
||||
func ExportAliases(c *gin.Context) {
|
||||
rows, err := db.GetPool().Query(c.Request.Context(),
|
||||
"SELECT alias, canonical_name, tipo FROM entity_aliases ORDER BY tipo, canonical_name")
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get aliases", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
c.Header("Content-Type", "text/csv")
|
||||
c.Header("Content-Disposition", "attachment; filename=aliases.csv")
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
|
||||
writer := csv.NewWriter(c.Writer)
|
||||
writer.Write([]string{"alias", "canonical_name", "tipo"})
|
||||
|
||||
for rows.Next() {
|
||||
var alias, canonical, tipo string
|
||||
rows.Scan(&alias, &canonical, &tipo)
|
||||
writer.Write([]string{alias, canonical, tipo})
|
||||
}
|
||||
writer.Flush()
|
||||
}
|
||||
|
||||
func ImportAliases(c *gin.Context) {
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "No file uploaded"})
|
||||
return
|
||||
}
|
||||
|
||||
src, err := file.Open()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to open file"})
|
||||
return
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
reader := csv.NewReader(src)
|
||||
records, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Failed to parse CSV", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if len(records) < 2 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "CSV file is empty or has no data rows"})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
tx, err := db.GetPool().Begin(ctx)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
|
||||
return
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
inserted := 0
|
||||
skipped := 0
|
||||
|
||||
for i, record := range records[1:] {
|
||||
if len(record) < 3 {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
alias := strings.TrimSpace(record[0])
|
||||
canonical := strings.TrimSpace(record[1])
|
||||
tipo := strings.TrimSpace(record[2])
|
||||
|
||||
if alias == "" || canonical == "" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
_, err = tx.Exec(ctx,
|
||||
"INSERT INTO entity_aliases (alias, canonical_name, tipo) VALUES ($1, $2, $3) ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = $2",
|
||||
alias, canonical, tipo)
|
||||
if err != nil {
|
||||
fmt.Printf("Error inserting row %d: %v\n", i+1, err)
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
inserted++
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Import completed",
|
||||
"inserted": inserted,
|
||||
"skipped": skipped,
|
||||
})
|
||||
}
|
||||
|
||||
func GetAdminStats(c *gin.Context) {
|
||||
var totalUsers, totalAliases int
|
||||
|
||||
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&totalUsers)
|
||||
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM entity_aliases").Scan(&totalAliases)
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"total_users": totalUsers,
|
||||
"total_aliases": totalAliases,
|
||||
})
|
||||
}
|
||||
|
||||
func GetUsers(c *gin.Context) {
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), `
|
||||
SELECT id, email, username, is_admin, created_at, updated_at
|
||||
FROM users ORDER BY created_at DESC`)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get users", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type UserRow struct {
|
||||
ID int64 `json:"id"`
|
||||
Email string `json:"email"`
|
||||
Username string `json:"username"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
var users []UserRow
|
||||
for rows.Next() {
|
||||
var u UserRow
|
||||
if err := rows.Scan(&u.ID, &u.Email, &u.Username, &u.IsAdmin, &u.CreatedAt, &u.UpdatedAt); err != nil {
|
||||
continue
|
||||
}
|
||||
users = append(users, u)
|
||||
}
|
||||
|
||||
if users == nil {
|
||||
users = []UserRow{}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"users": users, "total": len(users)})
|
||||
}
|
||||
|
||||
func PromoteUser(c *gin.Context) {
|
||||
id, err := strconv.Atoi(c.Param("id"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = true WHERE id = $1", id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to promote user", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"message": "User promoted to admin"})
|
||||
}
|
||||
|
||||
func DemoteUser(c *gin.Context) {
|
||||
id, err := strconv.Atoi(c.Param("id"))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = false WHERE id = $1", id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to demote user", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"message": "User demoted from admin"})
|
||||
}
|
||||
|
||||
func ResetDatabase(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
||||
tables := []string{
|
||||
"noticias",
|
||||
"feeds",
|
||||
"traducciones",
|
||||
"tags_noticia",
|
||||
"tags",
|
||||
"entity_aliases",
|
||||
"favoritos",
|
||||
"videos",
|
||||
"video_parrillas",
|
||||
"eventos",
|
||||
"search_history",
|
||||
}
|
||||
|
||||
tx, err := db.GetPool().Begin(ctx)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
|
||||
return
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
for _, table := range tables {
|
||||
_, err = tx.Exec(ctx, "DELETE FROM "+table)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete from " + table, "message": err.Error()})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Database reset successfully. All data has been deleted.",
|
||||
"tables_cleared": tables,
|
||||
})
|
||||
}
|
||||
|
||||
type WorkerConfig struct {
|
||||
Type string `json:"type"`
|
||||
Workers int `json:"workers"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
func GetWorkerStatus(c *gin.Context) {
|
||||
var translatorType, translatorWorkers, translatorStatus string
|
||||
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
|
||||
if err != nil {
|
||||
translatorType = "cpu"
|
||||
}
|
||||
|
||||
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
|
||||
if err != nil {
|
||||
translatorWorkers = "2"
|
||||
}
|
||||
|
||||
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_status'").Scan(&translatorStatus)
|
||||
if err != nil {
|
||||
translatorStatus = "stopped"
|
||||
}
|
||||
|
||||
workers, _ := strconv.Atoi(translatorWorkers)
|
||||
|
||||
// Verificar si los contenedores están corriendo
|
||||
runningCount := 0
|
||||
if translatorStatus == "running" {
|
||||
cmd := exec.Command("docker", "compose", "ps", "-q", "translator")
|
||||
output, _ := cmd.Output()
|
||||
if len(output) > 0 {
|
||||
runningCount = workers
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"type": translatorType,
|
||||
"workers": workers,
|
||||
"status": translatorStatus,
|
||||
"running": runningCount,
|
||||
})
|
||||
}
|
||||
|
||||
func SetWorkerConfig(c *gin.Context) {
|
||||
var req WorkerConfig
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Type != "cpu" && req.Type != "gpu" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Type must be 'cpu' or 'gpu'"})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Workers < 1 || req.Workers > 8 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Workers must be between 1 and 8"})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
_, err := db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", req.Type)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_type"})
|
||||
return
|
||||
}
|
||||
|
||||
_, err = db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", strconv.Itoa(req.Workers))
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_workers"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Worker configuration updated",
|
||||
"type": req.Type,
|
||||
"workers": req.Workers,
|
||||
"status": req.Status,
|
||||
})
|
||||
}
|
||||
|
||||
func StartWorkers(c *gin.Context) {
|
||||
var req WorkerConfig
|
||||
c.ShouldBindJSON(&req)
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// Obtener configuración actual
|
||||
var translatorType, translatorWorkers string
|
||||
err := db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
|
||||
if err != nil || translatorType == "" {
|
||||
translatorType = "cpu"
|
||||
}
|
||||
err = db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
|
||||
if err != nil || translatorWorkers == "" {
|
||||
translatorWorkers = "2"
|
||||
}
|
||||
|
||||
if req.Type != "" {
|
||||
translatorType = req.Type
|
||||
}
|
||||
if req.Workers > 0 {
|
||||
translatorWorkers = strconv.Itoa(req.Workers)
|
||||
}
|
||||
|
||||
workers, _ := strconv.Atoi(translatorWorkers)
|
||||
if workers < 1 {
|
||||
workers = 2
|
||||
}
|
||||
if workers > 8 {
|
||||
workers = 8
|
||||
}
|
||||
|
||||
// Determinar qué servicio iniciar
|
||||
serviceName := "translator"
|
||||
if translatorType == "gpu" {
|
||||
serviceName = "translator-gpu"
|
||||
}
|
||||
|
||||
// Detener cualquier translator existente
|
||||
stopCmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
|
||||
stopCmd.Dir = "/datos/rss2"
|
||||
stopCmd.Run()
|
||||
|
||||
// Iniciar con el número de workers
|
||||
startCmd := exec.Command("docker", "compose", "up", "-d", "--scale", fmt.Sprintf("%s=%d", serviceName, workers), serviceName)
|
||||
startCmd.Dir = "/datos/rss2"
|
||||
output, err := startCmd.CombinedOutput()
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "Failed to start workers",
|
||||
"details": string(output),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Actualizar estado en BD
|
||||
db.GetPool().Exec(ctx, "UPDATE config SET value = 'running', updated_at = NOW() WHERE key = 'translator_status'")
|
||||
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", translatorType)
|
||||
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", translatorWorkers)
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Workers started successfully",
|
||||
"type": translatorType,
|
||||
"workers": workers,
|
||||
"status": "running",
|
||||
})
|
||||
}
|
||||
|
||||
func StopWorkers(c *gin.Context) {
|
||||
// Detener traductores
|
||||
cmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
|
||||
cmd.Dir = "/datos/rss2"
|
||||
output, err := cmd.CombinedOutput()
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "Failed to stop workers",
|
||||
"details": string(output),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Actualizar estado en BD
|
||||
db.GetPool().Exec(c.Request.Context(), "UPDATE config SET value = 'stopped', updated_at = NOW() WHERE key = 'translator_status'")
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Workers stopped successfully",
|
||||
"status": "stopped",
|
||||
})
|
||||
}
|
||||
|
||||
// PatchEntityTipo changes the tipo of all tags matching a given valor
|
||||
func PatchEntityTipo(c *gin.Context) {
|
||||
var req struct {
|
||||
Valor string `json:"valor" binding:"required"`
|
||||
NewTipo string `json:"new_tipo" binding:"required"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
validTipos := map[string]bool{"persona": true, "organizacion": true, "lugar": true, "tema": true}
|
||||
if !validTipos[req.NewTipo] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid tipo. Must be persona, organizacion, lugar or tema"})
|
||||
return
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
tx, err := db.GetPool().Begin(ctx)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
// Since we don't know the exact old Tipo, we find all tags with this valor that ARE NOT already the new tipo
|
||||
rows, err := tx.Query(ctx, "SELECT id, tipo FROM tags WHERE valor = $1 AND tipo != $2", req.Valor, req.NewTipo)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch existing tags", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
type OldTag struct {
|
||||
ID int
|
||||
Tipo string
|
||||
}
|
||||
var tagsToMove []OldTag
|
||||
for rows.Next() {
|
||||
var ot OldTag
|
||||
if err := rows.Scan(&ot.ID, &ot.Tipo); err == nil {
|
||||
tagsToMove = append(tagsToMove, ot)
|
||||
}
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
if len(tagsToMove) == 0 {
|
||||
c.JSON(http.StatusOK, gin.H{"message": "No entities found to update or already the requested tipo"})
|
||||
return
|
||||
}
|
||||
|
||||
// Make sure the target tag (valor, new_tipo) exists
|
||||
var targetTagId int
|
||||
err = tx.QueryRow(ctx, `
|
||||
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
|
||||
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
|
||||
RETURNING id`, req.Valor, req.NewTipo).Scan(&targetTagId)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure target tag", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
totalMoved := 0
|
||||
for _, old := range tagsToMove {
|
||||
if old.ID == targetTagId {
|
||||
continue
|
||||
}
|
||||
|
||||
// Move valid tags_noticia references to the target tag id safely
|
||||
res, err := tx.Exec(ctx, `
|
||||
UPDATE tags_noticia
|
||||
SET tag_id = $1
|
||||
WHERE tag_id = $2 AND NOT EXISTS (
|
||||
SELECT 1 FROM tags_noticia tn2
|
||||
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
|
||||
)
|
||||
`, targetTagId, old.ID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
totalMoved += int(res.RowsAffected())
|
||||
|
||||
// Delete any remaining orphaned mentions (duplicates)
|
||||
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", old.ID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Delete the old tag since it's now merged
|
||||
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", old.ID)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Entity tipo updated and merged successfully",
|
||||
"valor": req.Valor,
|
||||
"new_tipo": req.NewTipo,
|
||||
"tags_merged": len(tagsToMove),
|
||||
"rows_affected": totalMoved,
|
||||
})
|
||||
}
|
||||
|
||||
// BackupDatabase runs pg_dump and returns the SQL as a downloadable file
|
||||
func BackupDatabase(c *gin.Context) {
|
||||
dbHost := os.Getenv("DB_HOST")
|
||||
if dbHost == "" {
|
||||
dbHost = "db"
|
||||
}
|
||||
dbPort := os.Getenv("DB_PORT")
|
||||
if dbPort == "" {
|
||||
dbPort = "5432"
|
||||
}
|
||||
dbName := os.Getenv("DB_NAME")
|
||||
if dbName == "" {
|
||||
dbName = "rss"
|
||||
}
|
||||
dbUser := os.Getenv("DB_USER")
|
||||
if dbUser == "" {
|
||||
dbUser = "rss"
|
||||
}
|
||||
dbPass := os.Getenv("DB_PASS")
|
||||
|
||||
cmd := exec.Command("pg_dump",
|
||||
"-h", dbHost,
|
||||
"-p", dbPort,
|
||||
"-U", dbUser,
|
||||
"-d", dbName,
|
||||
"--no-password",
|
||||
)
|
||||
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
|
||||
|
||||
var out bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "pg_dump failed",
|
||||
"details": stderr.String(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("backup_%s.sql", time.Now().Format("2006-01-02_15-04-05"))
|
||||
c.Header("Content-Type", "application/octet-stream")
|
||||
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Data(http.StatusOK, "application/octet-stream", out.Bytes())
|
||||
}
|
||||
|
||||
// BackupNewsZipped performs a pg_dump of news tables and returns a ZIP file
|
||||
func BackupNewsZipped(c *gin.Context) {
|
||||
dbHost := os.Getenv("DB_HOST")
|
||||
if dbHost == "" {
|
||||
dbHost = "db"
|
||||
}
|
||||
dbPort := os.Getenv("DB_PORT")
|
||||
if dbPort == "" {
|
||||
dbPort = "5432"
|
||||
}
|
||||
dbName := os.Getenv("DB_NAME")
|
||||
if dbName == "" {
|
||||
dbName = "rss"
|
||||
}
|
||||
dbUser := os.Getenv("DB_USER")
|
||||
if dbUser == "" {
|
||||
dbUser = "rss"
|
||||
}
|
||||
dbPass := os.Getenv("DB_PASS")
|
||||
|
||||
// Tables to backup
|
||||
tables := []string{"noticias", "traducciones", "tags", "tags_noticia"}
|
||||
|
||||
args := []string{
|
||||
"-h", dbHost,
|
||||
"-p", dbPort,
|
||||
"-U", dbUser,
|
||||
"-d", dbName,
|
||||
"--no-password",
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
args = append(args, "-t", table)
|
||||
}
|
||||
|
||||
cmd := exec.Command("pg_dump", args...)
|
||||
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
|
||||
|
||||
var sqlOut bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stdout = &sqlOut
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": "pg_dump failed",
|
||||
"details": stderr.String(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// Create ZIP
|
||||
buf := new(bytes.Buffer)
|
||||
zw := zip.NewWriter(buf)
|
||||
|
||||
sqlFileName := fmt.Sprintf("backup_noticias_%s.sql", time.Now().Format("2006-01-02"))
|
||||
f, err := zw.Create(sqlFileName)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create ZIP entry", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
_, err = f.Write(sqlOut.Bytes())
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to write to ZIP", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := zw.Close(); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to close ZIP writer", "message": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("backup_noticias_%s.zip", time.Now().Format("2006-01-02_15-04-05"))
|
||||
c.Header("Content-Type", "application/zip")
|
||||
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
|
||||
c.Header("Cache-Control", "no-cache")
|
||||
c.Data(http.StatusOK, "application/zip", buf.Bytes())
|
||||
}
|
||||
|
||||
183
backend/internal/handlers/auth.go
Normal file
183
backend/internal/handlers/auth.go
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
package handlers
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
"github.com/rss2/backend/internal/config"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
var jwtSecret []byte
|
||||
|
||||
func CheckFirstUser(c *gin.Context) {
|
||||
var count int
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&count)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check users"})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"is_first_user": count == 0, "total_users": count})
|
||||
}
|
||||
|
||||
func InitAuth(secret string) {
|
||||
jwtSecret = []byte(secret)
|
||||
}
|
||||
|
||||
type Claims struct {
|
||||
UserID int64 `json:"user_id"`
|
||||
Email string `json:"email"`
|
||||
Username string `json:"username"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
jwt.RegisteredClaims
|
||||
}
|
||||
|
||||
func Login(c *gin.Context) {
|
||||
var req models.LoginRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var user models.User
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), `
|
||||
SELECT id, email, username, password_hash, is_admin, created_at, updated_at
|
||||
FROM users WHERE email = $1`, req.Email).Scan(
|
||||
&user.ID, &user.Email, &user.Username, &user.PasswordHash, &user.IsAdmin,
|
||||
&user.CreatedAt, &user.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
|
||||
return
|
||||
}
|
||||
|
||||
if err := bcrypt.CompareHashAndPassword([]byte(user.PasswordHash), []byte(req.Password)); err != nil {
|
||||
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
|
||||
return
|
||||
}
|
||||
|
||||
expirationTime := time.Now().Add(24 * time.Hour)
|
||||
claims := &Claims{
|
||||
UserID: user.ID,
|
||||
Email: user.Email,
|
||||
Username: user.Username,
|
||||
IsAdmin: user.IsAdmin,
|
||||
RegisteredClaims: jwt.RegisteredClaims{
|
||||
ExpiresAt: jwt.NewNumericDate(expirationTime),
|
||||
IssuedAt: jwt.NewNumericDate(time.Now()),
|
||||
},
|
||||
}
|
||||
|
||||
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
|
||||
tokenString, err := token.SignedString(jwtSecret)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.AuthResponse{
|
||||
Token: tokenString,
|
||||
User: user,
|
||||
})
|
||||
}
|
||||
|
||||
func Register(c *gin.Context) {
|
||||
var req models.RegisterRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
hashedPassword, err := bcrypt.GenerateFromPassword([]byte(req.Password), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to hash password"})
|
||||
return
|
||||
}
|
||||
|
||||
var userCount int
|
||||
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&userCount)
|
||||
isFirstUser := userCount == 0
|
||||
|
||||
var userID int64
|
||||
err = db.GetPool().QueryRow(c.Request.Context(), `
|
||||
INSERT INTO users (email, username, password_hash, is_admin, created_at, updated_at)
|
||||
VALUES ($1, $2, $3, $4, NOW(), NOW())
|
||||
RETURNING id`,
|
||||
req.Email, req.Username, string(hashedPassword), isFirstUser,
|
||||
).Scan(&userID)
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create user", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var user models.User
|
||||
err = db.GetPool().QueryRow(c.Request.Context(), `
|
||||
SELECT id, email, username, is_admin, created_at, updated_at
|
||||
FROM users WHERE id = $1`, userID).Scan(
|
||||
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
|
||||
&user.CreatedAt, &user.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch user"})
|
||||
return
|
||||
}
|
||||
|
||||
expirationTime := time.Now().Add(24 * time.Hour)
|
||||
claims := &Claims{
|
||||
UserID: user.ID,
|
||||
Email: user.Email,
|
||||
Username: user.Username,
|
||||
IsAdmin: user.IsAdmin,
|
||||
RegisteredClaims: jwt.RegisteredClaims{
|
||||
ExpiresAt: jwt.NewNumericDate(expirationTime),
|
||||
IssuedAt: jwt.NewNumericDate(time.Now()),
|
||||
},
|
||||
}
|
||||
|
||||
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
|
||||
tokenString, err := token.SignedString(jwtSecret)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, models.AuthResponse{
|
||||
Token: tokenString,
|
||||
User: user,
|
||||
IsFirstUser: isFirstUser,
|
||||
})
|
||||
}
|
||||
|
||||
func GetCurrentUser(c *gin.Context) {
|
||||
userVal, exists := c.Get("user")
|
||||
if !exists {
|
||||
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Not authenticated"})
|
||||
return
|
||||
}
|
||||
|
||||
claims := userVal.(*Claims)
|
||||
|
||||
var user models.User
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), `
|
||||
SELECT id, email, username, is_admin, created_at, updated_at
|
||||
FROM users WHERE id = $1`, claims.UserID).Scan(
|
||||
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
|
||||
&user.CreatedAt, &user.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "User not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, user)
|
||||
}
|
||||
|
||||
func init() {
|
||||
cfg := config.Load()
|
||||
InitAuth(cfg.SecretKey)
|
||||
}
|
||||
540
backend/internal/handlers/feed.go
Normal file
540
backend/internal/handlers/feed.go
Normal file
|
|
@ -0,0 +1,540 @@
|
|||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
)
|
||||
|
||||
type FeedResponse struct {
|
||||
ID int64 `json:"id"`
|
||||
Nombre string `json:"nombre"`
|
||||
Descripcion *string `json:"descripcion"`
|
||||
URL string `json:"url"`
|
||||
CategoriaID *int64 `json:"categoria_id"`
|
||||
PaisID *int64 `json:"pais_id"`
|
||||
Idioma *string `json:"idioma"`
|
||||
Activo bool `json:"activo"`
|
||||
Fallos *int64 `json:"fallos"`
|
||||
LastError *string `json:"last_error"`
|
||||
FuenteURLID *int64 `json:"fuente_url_id"`
|
||||
}
|
||||
|
||||
func GetFeeds(c *gin.Context) {
|
||||
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
|
||||
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "50"))
|
||||
activo := c.Query("activo")
|
||||
categoriaID := c.Query("categoria_id")
|
||||
paisID := c.Query("pais_id")
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if perPage < 1 || perPage > 100 {
|
||||
perPage = 50
|
||||
}
|
||||
|
||||
offset := (page - 1) * perPage
|
||||
|
||||
where := "1=1"
|
||||
args := []interface{}{}
|
||||
argNum := 1
|
||||
|
||||
if activo != "" {
|
||||
where += fmt.Sprintf(" AND activo = $%d", argNum)
|
||||
args = append(args, activo == "true")
|
||||
argNum++
|
||||
}
|
||||
if categoriaID != "" {
|
||||
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
|
||||
args = append(args, categoriaID)
|
||||
argNum++
|
||||
}
|
||||
if paisID != "" {
|
||||
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
|
||||
args = append(args, paisID)
|
||||
argNum++
|
||||
}
|
||||
|
||||
var total int
|
||||
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM feeds WHERE %s", where)
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count feeds", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
sqlQuery := fmt.Sprintf(`
|
||||
SELECT f.id, f.nombre, f.descripcion, f.url,
|
||||
f.categoria_id, f.pais_id, f.idioma, f.activo, f.fallos, f.last_error,
|
||||
c.nombre AS categoria, p.nombre AS pais,
|
||||
(SELECT COUNT(*) FROM noticias n WHERE n.fuente_nombre = f.nombre) as noticias_count
|
||||
FROM feeds f
|
||||
LEFT JOIN categorias c ON c.id = f.categoria_id
|
||||
LEFT JOIN paises p ON p.id = f.pais_id
|
||||
WHERE %s
|
||||
ORDER BY p.nombre NULLS LAST, f.activo DESC, f.fallos ASC, c.nombre NULLS LAST, f.nombre
|
||||
LIMIT $%d OFFSET $%d
|
||||
`, where, argNum, argNum+1)
|
||||
|
||||
args = append(args, perPage, offset)
|
||||
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type FeedWithStats struct {
|
||||
FeedResponse
|
||||
Categoria *string `json:"categoria"`
|
||||
Pais *string `json:"pais"`
|
||||
NoticiasCount int64 `json:"noticias_count"`
|
||||
}
|
||||
|
||||
var feeds []FeedWithStats
|
||||
for rows.Next() {
|
||||
var f FeedWithStats
|
||||
err := rows.Scan(
|
||||
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
|
||||
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos, &f.LastError,
|
||||
&f.Categoria, &f.Pais, &f.NoticiasCount,
|
||||
)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
feeds = append(feeds, f)
|
||||
}
|
||||
|
||||
totalPages := (total + perPage - 1) / perPage
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"feeds": feeds,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": perPage,
|
||||
"total_pages": totalPages,
|
||||
})
|
||||
}
|
||||
|
||||
func GetFeedByID(c *gin.Context) {
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
|
||||
return
|
||||
}
|
||||
|
||||
var f FeedResponse
|
||||
err = db.GetPool().QueryRow(c.Request.Context(), `
|
||||
SELECT id, nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos
|
||||
FROM feeds WHERE id = $1`, id).Scan(
|
||||
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
|
||||
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, f)
|
||||
}
|
||||
|
||||
type CreateFeedRequest struct {
|
||||
Nombre string `json:"nombre" binding:"required"`
|
||||
URL string `json:"url" binding:"required,url"`
|
||||
Descripcion *string `json:"descripcion"`
|
||||
CategoriaID *int64 `json:"categoria_id"`
|
||||
PaisID *int64 `json:"pais_id"`
|
||||
Idioma *string `json:"idioma"`
|
||||
}
|
||||
|
||||
func CreateFeed(c *gin.Context) {
|
||||
var req CreateFeedRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
var feedID int64
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), `
|
||||
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
RETURNING id`,
|
||||
req.Nombre, req.Descripcion, req.URL, req.CategoriaID, req.PaisID, req.Idioma,
|
||||
).Scan(&feedID)
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create feed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, gin.H{"id": feedID, "message": "Feed created successfully"})
|
||||
}
|
||||
|
||||
type UpdateFeedRequest struct {
|
||||
Nombre string `json:"nombre" binding:"required"`
|
||||
URL string `json:"url" binding:"required,url"`
|
||||
Descripcion *string `json:"descripcion"`
|
||||
CategoriaID *int64 `json:"categoria_id"`
|
||||
PaisID *int64 `json:"pais_id"`
|
||||
Idioma *string `json:"idioma"`
|
||||
Activo *bool `json:"activo"`
|
||||
}
|
||||
|
||||
func UpdateFeed(c *gin.Context) {
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
|
||||
return
|
||||
}
|
||||
|
||||
var req UpdateFeedRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
activeVal := true
|
||||
if req.Activo != nil {
|
||||
activeVal = *req.Activo
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), `
|
||||
UPDATE feeds
|
||||
SET nombre = $1, descripcion = $2, url = $3,
|
||||
categoria_id = $4, pais_id = $5, idioma = $6, activo = $7
|
||||
WHERE id = $8`,
|
||||
req.Nombre, req.Descripcion, req.URL,
|
||||
req.CategoriaID, req.PaisID, req.Idioma, activeVal, id,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to update feed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed updated successfully"})
|
||||
}
|
||||
|
||||
func DeleteFeed(c *gin.Context) {
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM feeds WHERE id = $1", id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete feed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed deleted successfully"})
|
||||
}
|
||||
|
||||
func ToggleFeedActive(c *gin.Context) {
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), `
|
||||
UPDATE feeds SET activo = NOT activo WHERE id = $1`, id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to toggle feed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed toggled successfully"})
|
||||
}
|
||||
|
||||
func ReactivateFeed(c *gin.Context) {
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), `
|
||||
UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = $1`, id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to reactivate feed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed reactivated successfully"})
|
||||
}
|
||||
|
||||
func ExportFeeds(c *gin.Context) {
|
||||
activo := c.Query("activo")
|
||||
categoriaID := c.Query("categoria_id")
|
||||
paisID := c.Query("pais_id")
|
||||
|
||||
where := "1=1"
|
||||
args := []interface{}{}
|
||||
argNum := 1
|
||||
|
||||
if activo != "" {
|
||||
where += fmt.Sprintf(" AND activo = $%d", argNum)
|
||||
args = append(args, activo == "true")
|
||||
argNum++
|
||||
}
|
||||
if categoriaID != "" {
|
||||
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
|
||||
args = append(args, categoriaID)
|
||||
argNum++
|
||||
}
|
||||
if paisID != "" {
|
||||
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
|
||||
args = append(args, paisID)
|
||||
argNum++
|
||||
}
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
SELECT f.id, f.nombre, f.descripcion, f.url,
|
||||
f.categoria_id, c.nombre AS categoria,
|
||||
f.pais_id, p.nombre AS pais,
|
||||
f.idioma, f.activo, f.fallos
|
||||
FROM feeds f
|
||||
LEFT JOIN categorias c ON c.id = f.categoria_id
|
||||
LEFT JOIN paises p ON p.id = f.pais_id
|
||||
WHERE %s
|
||||
ORDER BY f.id
|
||||
`, where)
|
||||
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
c.Header("Content-Type", "text/csv")
|
||||
c.Header("Content-Disposition", "attachment; filename=feeds_export.csv")
|
||||
|
||||
writer := csv.NewWriter(c.Writer)
|
||||
defer writer.Flush()
|
||||
|
||||
writer.Write([]string{"id", "nombre", "descripcion", "url", "categoria_id", "categoria", "pais_id", "pais", "idioma", "activo", "fallos"})
|
||||
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
var nombre, url string
|
||||
var descripcion, idioma *string
|
||||
var categoriaID, paisID, fallos *int64
|
||||
var activo bool
|
||||
var categoria, pais *string
|
||||
|
||||
err := rows.Scan(&id, &nombre, &descripcion, &url, &categoriaID, &categoria, &paisID, &pais, &idioma, &activo, &fallos)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
writer.Write([]string{
|
||||
fmt.Sprintf("%d", id),
|
||||
nombre,
|
||||
stringOrEmpty(descripcion),
|
||||
url,
|
||||
int64ToString(categoriaID),
|
||||
stringOrEmpty(categoria),
|
||||
int64ToString(paisID),
|
||||
stringOrEmpty(pais),
|
||||
stringOrEmpty(idioma),
|
||||
fmt.Sprintf("%t", activo),
|
||||
int64ToString(fallos),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func ImportFeeds(c *gin.Context) {
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "No file provided"})
|
||||
return
|
||||
}
|
||||
|
||||
f, err := file.Open()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to open file", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
content, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to read file", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
reader := csv.NewReader(strings.NewReader(string(content)))
|
||||
_, err = reader.Read()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid CSV format"})
|
||||
return
|
||||
}
|
||||
|
||||
imported := 0
|
||||
skipped := 0
|
||||
failed := 0
|
||||
errors := []string{}
|
||||
|
||||
tx, err := db.GetPool().Begin(context.Background())
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to start transaction", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer tx.Rollback(context.Background())
|
||||
|
||||
for {
|
||||
record, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
if len(record) < 4 {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
nombre := strings.TrimSpace(record[1])
|
||||
url := strings.TrimSpace(record[3])
|
||||
|
||||
if nombre == "" || url == "" {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
var descripcion *string
|
||||
if len(record) > 2 && strings.TrimSpace(record[2]) != "" {
|
||||
descripcionStr := strings.TrimSpace(record[2])
|
||||
descripcion = &descripcionStr
|
||||
}
|
||||
|
||||
var categoriaID *int64
|
||||
if len(record) > 4 && strings.TrimSpace(record[4]) != "" {
|
||||
catID, err := strconv.ParseInt(strings.TrimSpace(record[4]), 10, 64)
|
||||
if err == nil {
|
||||
categoriaID = &catID
|
||||
}
|
||||
}
|
||||
|
||||
var paisID *int64
|
||||
if len(record) > 6 && strings.TrimSpace(record[6]) != "" {
|
||||
pID, err := strconv.ParseInt(strings.TrimSpace(record[6]), 10, 64)
|
||||
if err == nil {
|
||||
paisID = &pID
|
||||
}
|
||||
}
|
||||
|
||||
var idioma *string
|
||||
if len(record) > 8 && strings.TrimSpace(record[8]) != "" {
|
||||
lang := strings.TrimSpace(record[8])
|
||||
if len(lang) > 2 {
|
||||
lang = lang[:2]
|
||||
}
|
||||
idioma = &lang
|
||||
}
|
||||
|
||||
activo := true
|
||||
if len(record) > 9 && strings.TrimSpace(record[9]) != "" {
|
||||
activo = strings.ToLower(strings.TrimSpace(record[9])) == "true"
|
||||
}
|
||||
|
||||
var fallos int64
|
||||
if len(record) > 10 && strings.TrimSpace(record[10]) != "" {
|
||||
f, err := strconv.ParseInt(strings.TrimSpace(record[10]), 10, 64)
|
||||
if err == nil {
|
||||
fallos = f
|
||||
}
|
||||
}
|
||||
|
||||
var existingID int64
|
||||
err = tx.QueryRow(context.Background(), "SELECT id FROM feeds WHERE url = $1", url).Scan(&existingID)
|
||||
if err == nil {
|
||||
_, err = tx.Exec(context.Background(), `
|
||||
UPDATE feeds SET nombre=$1, descripcion=$2, categoria_id=$3, pais_id=$4, idioma=$5, activo=$6, fallos=$7
|
||||
WHERE id=$8`,
|
||||
nombre, descripcion, categoriaID, paisID, idioma, activo, fallos, existingID,
|
||||
)
|
||||
if err != nil {
|
||||
failed++
|
||||
errors = append(errors, fmt.Sprintf("Error updating %s: %v", url, err))
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
_, err = tx.Exec(context.Background(), `
|
||||
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
nombre, descripcion, url, categoriaID, paisID, idioma, activo, fallos,
|
||||
)
|
||||
if err != nil {
|
||||
failed++
|
||||
errors = append(errors, fmt.Sprintf("Error inserting %s: %v", url, err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
imported++
|
||||
}
|
||||
|
||||
if err := tx.Commit(context.Background()); err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to commit transaction", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"imported": imported,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"errors": errors,
|
||||
"message": fmt.Sprintf("Import completed. Imported: %d, Skipped: %d, Failed: %d", imported, skipped, failed),
|
||||
})
|
||||
}
|
||||
|
||||
func stringOrEmpty(s *string) string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return *s
|
||||
}
|
||||
|
||||
func int64ToString(i *int64) string {
|
||||
if i == nil {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf("%d", *i)
|
||||
}
|
||||
369
backend/internal/handlers/news.go
Normal file
369
backend/internal/handlers/news.go
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
package handlers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
)
|
||||
|
||||
type NewsResponse struct {
|
||||
ID string `json:"id"`
|
||||
Titulo string `json:"titulo"`
|
||||
Resumen string `json:"resumen"`
|
||||
URL string `json:"url"`
|
||||
Fecha *time.Time `json:"fecha"`
|
||||
ImagenURL *string `json:"imagen_url"`
|
||||
CategoriaID *int64 `json:"categoria_id"`
|
||||
PaisID *int64 `json:"pais_id"`
|
||||
FuenteNombre string `json:"fuente_nombre"`
|
||||
TitleTranslated *string `json:"title_translated"`
|
||||
SummaryTranslated *string `json:"summary_translated"`
|
||||
LangTranslated *string `json:"lang_translated"`
|
||||
Entities []Entity `json:"entities,omitempty"`
|
||||
}
|
||||
|
||||
func GetNews(c *gin.Context) {
|
||||
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
|
||||
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
|
||||
query := c.Query("q")
|
||||
categoryID := c.Query("category_id")
|
||||
countryID := c.Query("country_id")
|
||||
translatedOnly := c.Query("translated_only") == "true"
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if perPage < 1 || perPage > 100 {
|
||||
perPage = 30
|
||||
}
|
||||
|
||||
offset := (page - 1) * perPage
|
||||
|
||||
where := "1=1"
|
||||
args := []interface{}{}
|
||||
argNum := 1
|
||||
|
||||
if query != "" {
|
||||
where += fmt.Sprintf(" AND (n.titulo ILIKE $%d OR n.resumen ILIKE $%d)", argNum, argNum)
|
||||
args = append(args, "%"+query+"%")
|
||||
argNum++
|
||||
}
|
||||
if categoryID != "" {
|
||||
where += fmt.Sprintf(" AND n.categoria_id = $%d", argNum)
|
||||
args = append(args, categoryID)
|
||||
argNum++
|
||||
}
|
||||
if countryID != "" {
|
||||
where += fmt.Sprintf(" AND n.pais_id = $%d", argNum)
|
||||
args = append(args, countryID)
|
||||
argNum++
|
||||
}
|
||||
if translatedOnly {
|
||||
where += " AND t.status = 'done' AND t.titulo_trad IS NOT NULL AND t.titulo_trad != n.titulo"
|
||||
}
|
||||
|
||||
var total int
|
||||
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM noticias n LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' WHERE %s", where)
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count news", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
c.JSON(http.StatusOK, models.NewsListResponse{
|
||||
News: []models.NewsWithTranslations{},
|
||||
Total: 0,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
TotalPages: 0,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
sqlQuery := fmt.Sprintf(`
|
||||
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
|
||||
n.categoria_id, n.pais_id, n.fuente_nombre,
|
||||
t.titulo_trad,
|
||||
t.resumen_trad,
|
||||
t.lang_to as lang_trad
|
||||
FROM noticias n
|
||||
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
|
||||
WHERE %s
|
||||
ORDER BY n.fecha DESC LIMIT $%d OFFSET $%d
|
||||
`, where, argNum, argNum+1)
|
||||
|
||||
args = append(args, perPage, offset)
|
||||
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch news", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var newsList []NewsResponse
|
||||
for rows.Next() {
|
||||
var n NewsResponse
|
||||
var imagenURL, fuenteNombre *string
|
||||
var categoriaID, paisID *int32
|
||||
|
||||
err := rows.Scan(
|
||||
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
|
||||
&categoriaID, &paisID, &fuenteNombre,
|
||||
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
|
||||
)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if imagenURL != nil {
|
||||
n.ImagenURL = imagenURL
|
||||
}
|
||||
if fuenteNombre != nil {
|
||||
n.FuenteNombre = *fuenteNombre
|
||||
}
|
||||
if categoriaID != nil {
|
||||
catID := int64(*categoriaID)
|
||||
n.CategoriaID = &catID
|
||||
}
|
||||
if paisID != nil {
|
||||
pID := int64(*paisID)
|
||||
n.PaisID = &pID
|
||||
}
|
||||
newsList = append(newsList, n)
|
||||
}
|
||||
|
||||
totalPages := (total + perPage - 1) / perPage
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"news": newsList,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": perPage,
|
||||
"total_pages": totalPages,
|
||||
})
|
||||
}
|
||||
|
||||
func GetNewsByID(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
sqlQuery := `
|
||||
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
|
||||
n.categoria_id, n.pais_id, n.fuente_nombre,
|
||||
t.titulo_trad,
|
||||
t.resumen_trad,
|
||||
t.lang_to as lang_trad
|
||||
FROM noticias n
|
||||
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
|
||||
WHERE n.id = $1`
|
||||
|
||||
var n NewsResponse
|
||||
var imagenURL, fuenteNombre *string
|
||||
var categoriaID, paisID *int32
|
||||
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), sqlQuery, id).Scan(
|
||||
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
|
||||
&categoriaID, &paisID, &fuenteNombre,
|
||||
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
|
||||
return
|
||||
}
|
||||
|
||||
if imagenURL != nil {
|
||||
n.ImagenURL = imagenURL
|
||||
}
|
||||
if fuenteNombre != nil {
|
||||
n.FuenteNombre = *fuenteNombre
|
||||
}
|
||||
if categoriaID != nil {
|
||||
catID := int64(*categoriaID)
|
||||
n.CategoriaID = &catID
|
||||
}
|
||||
if paisID != nil {
|
||||
pID := int64(*paisID)
|
||||
n.PaisID = &pID
|
||||
}
|
||||
|
||||
// Fetch entities for this news
|
||||
entitiesQuery := `
|
||||
SELECT t.valor, t.tipo, 1 as cnt, t.wiki_summary, t.wiki_url, t.image_path
|
||||
FROM tags_noticia tn
|
||||
JOIN tags t ON tn.tag_id = t.id
|
||||
JOIN traducciones tr ON tn.traduccion_id = tr.id
|
||||
WHERE tr.noticia_id = $1 AND t.tipo IN ('persona', 'organizacion')
|
||||
`
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), entitiesQuery, id)
|
||||
var entities []Entity
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var e Entity
|
||||
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err == nil {
|
||||
entities = append(entities, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
if entities == nil {
|
||||
entities = []Entity{}
|
||||
}
|
||||
n.Entities = entities
|
||||
|
||||
c.JSON(http.StatusOK, n)
|
||||
}
|
||||
|
||||
func DeleteNews(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM noticias WHERE id = $1", id)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete news", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if result.RowsAffected() == 0 {
|
||||
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.SuccessResponse{Message: "News deleted successfully"})
|
||||
}
|
||||
|
||||
type Entity struct {
|
||||
Valor string `json:"valor"`
|
||||
Tipo string `json:"tipo"`
|
||||
Count int `json:"count"`
|
||||
WikiSummary *string `json:"wiki_summary"`
|
||||
WikiURL *string `json:"wiki_url"`
|
||||
ImagePath *string `json:"image_path"`
|
||||
}
|
||||
|
||||
type EntityListResponse struct {
|
||||
Entities []Entity `json:"entities"`
|
||||
Total int `json:"total"`
|
||||
Page int `json:"page"`
|
||||
PerPage int `json:"per_page"`
|
||||
TotalPages int `json:"total_pages"`
|
||||
}
|
||||
|
||||
func GetEntities(c *gin.Context) {
|
||||
countryID := c.Query("country_id")
|
||||
categoryID := c.Query("category_id")
|
||||
entityType := c.DefaultQuery("tipo", "persona")
|
||||
|
||||
q := c.Query("q")
|
||||
|
||||
pageStr := c.DefaultQuery("page", "1")
|
||||
perPageStr := c.DefaultQuery("per_page", "50")
|
||||
|
||||
page, _ := strconv.Atoi(pageStr)
|
||||
perPage, _ := strconv.Atoi(perPageStr)
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if perPage < 1 || perPage > 100 {
|
||||
perPage = 50
|
||||
}
|
||||
|
||||
offset := (page - 1) * perPage
|
||||
|
||||
where := "t.tipo = $1"
|
||||
args := []interface{}{entityType}
|
||||
|
||||
if countryID != "" {
|
||||
where += fmt.Sprintf(" AND n.pais_id = $%d", len(args)+1)
|
||||
args = append(args, countryID)
|
||||
}
|
||||
|
||||
if categoryID != "" {
|
||||
where += fmt.Sprintf(" AND n.categoria_id = $%d", len(args)+1)
|
||||
args = append(args, categoryID)
|
||||
}
|
||||
|
||||
if q != "" {
|
||||
where += fmt.Sprintf(" AND COALESCE(ea.canonical_name, t.valor) ILIKE $%d", len(args)+1)
|
||||
args = append(args, "%"+q+"%")
|
||||
}
|
||||
|
||||
// 1. Get the total count of distinct canonical entities matching the filter
|
||||
countQuery := fmt.Sprintf(`
|
||||
SELECT COUNT(DISTINCT COALESCE(ea.canonical_name, t.valor))
|
||||
FROM tags_noticia tn
|
||||
JOIN tags t ON tn.tag_id = t.id
|
||||
JOIN traducciones tr ON tn.traduccion_id = tr.id
|
||||
JOIN noticias n ON tr.noticia_id = n.id
|
||||
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
|
||||
WHERE %s
|
||||
`, where)
|
||||
|
||||
var total int
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities count", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
c.JSON(http.StatusOK, EntityListResponse{
|
||||
Entities: []Entity{},
|
||||
Total: 0,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
TotalPages: 0,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Fetch the paginated entities
|
||||
args = append(args, perPage, offset)
|
||||
query := fmt.Sprintf(`
|
||||
SELECT COALESCE(ea.canonical_name, t.valor) as valor, t.tipo, COUNT(*)::int as cnt,
|
||||
MAX(t.wiki_summary), MAX(t.wiki_url), MAX(t.image_path)
|
||||
FROM tags_noticia tn
|
||||
JOIN tags t ON tn.tag_id = t.id
|
||||
JOIN traducciones tr ON tn.traduccion_id = tr.id
|
||||
JOIN noticias n ON tr.noticia_id = n.id
|
||||
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
|
||||
WHERE %s
|
||||
GROUP BY COALESCE(ea.canonical_name, t.valor), t.tipo
|
||||
ORDER BY cnt DESC
|
||||
LIMIT $%d OFFSET $%d
|
||||
`, where, len(args)-1, len(args))
|
||||
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var entities []Entity
|
||||
for rows.Next() {
|
||||
var e Entity
|
||||
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err != nil {
|
||||
continue
|
||||
}
|
||||
entities = append(entities, e)
|
||||
}
|
||||
|
||||
if entities == nil {
|
||||
entities = []Entity{}
|
||||
}
|
||||
|
||||
totalPages := (total + perPage - 1) / perPage
|
||||
|
||||
c.JSON(http.StatusOK, EntityListResponse{
|
||||
Entities: entities,
|
||||
Total: total,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
TotalPages: totalPages,
|
||||
})
|
||||
}
|
||||
265
backend/internal/handlers/search.go
Normal file
265
backend/internal/handlers/search.go
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
package handlers
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/rss2/backend/internal/db"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
"github.com/rss2/backend/internal/services"
|
||||
)
|
||||
|
||||
func SearchNews(c *gin.Context) {
|
||||
query := c.Query("q")
|
||||
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
|
||||
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
|
||||
lang := c.DefaultQuery("lang", "")
|
||||
categoriaID := c.Query("categoria_id")
|
||||
paisID := c.Query("pais_id")
|
||||
useSemantic := c.Query("semantic") == "true"
|
||||
|
||||
if query == "" && categoriaID == "" && paisID == "" && lang == "" {
|
||||
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "At least one filter is required (q, categoria_id, pais_id, or lang)"})
|
||||
return
|
||||
}
|
||||
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
if perPage < 1 || perPage > 100 {
|
||||
perPage = 30
|
||||
}
|
||||
|
||||
// Default to Spanish if no lang specified
|
||||
if lang == "" {
|
||||
lang = "es"
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
if useSemantic {
|
||||
results, err := services.SemanticSearch(ctx, query, lang, page, perPage)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Semantic search failed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, results)
|
||||
return
|
||||
}
|
||||
|
||||
offset := (page - 1) * perPage
|
||||
|
||||
// Build dynamic query
|
||||
args := []interface{}{}
|
||||
argNum := 1
|
||||
whereClause := "WHERE 1=1"
|
||||
|
||||
if query != "" {
|
||||
whereClause += " AND (n.titulo ILIKE $" + strconv.Itoa(argNum) + " OR n.resumen ILIKE $" + strconv.Itoa(argNum) + " OR n.contenido ILIKE $" + strconv.Itoa(argNum) + ")"
|
||||
args = append(args, "%"+query+"%")
|
||||
argNum++
|
||||
}
|
||||
|
||||
if lang != "" {
|
||||
whereClause += " AND t.lang_to = $" + strconv.Itoa(argNum)
|
||||
args = append(args, lang)
|
||||
argNum++
|
||||
}
|
||||
|
||||
if categoriaID != "" {
|
||||
whereClause += " AND n.categoria_id = $" + strconv.Itoa(argNum)
|
||||
catID, err := strconv.ParseInt(categoriaID, 10, 64)
|
||||
if err == nil {
|
||||
args = append(args, catID)
|
||||
argNum++
|
||||
}
|
||||
}
|
||||
|
||||
if paisID != "" {
|
||||
whereClause += " AND n.pais_id = $" + strconv.Itoa(argNum)
|
||||
pID, err := strconv.ParseInt(paisID, 10, 64)
|
||||
if err == nil {
|
||||
args = append(args, pID)
|
||||
argNum++
|
||||
}
|
||||
}
|
||||
|
||||
args = append(args, perPage, offset)
|
||||
|
||||
sqlQuery := `
|
||||
SELECT n.id, n.titulo, n.resumen, n.contenido, n.url, n.imagen,
|
||||
n.feed_id, n.lang, n.categoria_id, n.pais_id, n.created_at, n.updated_at,
|
||||
COALESCE(t.titulo_trad, '') as titulo_trad,
|
||||
COALESCE(t.resumen_trad, '') as resumen_trad,
|
||||
t.lang_to as lang_trad,
|
||||
f.nombre as fuente_nombre
|
||||
FROM noticias n
|
||||
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $` + strconv.Itoa(argNum) + `
|
||||
LEFT JOIN feeds f ON f.id = n.feed_id
|
||||
` + whereClause + `
|
||||
ORDER BY n.created_at DESC
|
||||
LIMIT $` + strconv.Itoa(argNum+1) + ` OFFSET $` + strconv.Itoa(argNum+2)
|
||||
|
||||
rows, err := db.GetPool().Query(ctx, sqlQuery, args...)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Search failed", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var newsList []models.NewsWithTranslations
|
||||
for rows.Next() {
|
||||
var n models.NewsWithTranslations
|
||||
var imagen *string
|
||||
|
||||
err := rows.Scan(
|
||||
&n.ID, &n.Title, &n.Summary, &n.Content, &n.URL, &imagen,
|
||||
&n.FeedID, &n.Lang, &n.CategoryID, &n.CountryID, &n.CreatedAt, &n.UpdatedAt,
|
||||
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
|
||||
)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if imagen != nil {
|
||||
n.ImageURL = imagen
|
||||
}
|
||||
newsList = append(newsList, n)
|
||||
}
|
||||
|
||||
// Get total count
|
||||
countArgs := args[:len(args)-2]
|
||||
|
||||
// Remove LIMIT/OFFSET from args for count
|
||||
var total int
|
||||
err = db.GetPool().QueryRow(ctx, `
|
||||
SELECT COUNT(*) FROM noticias n
|
||||
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $`+strconv.Itoa(argNum)+`
|
||||
`+whereClause, countArgs...).Scan(&total)
|
||||
if err != nil {
|
||||
total = len(newsList)
|
||||
}
|
||||
|
||||
totalPages := (total + perPage - 1) / perPage
|
||||
|
||||
response := models.NewsListResponse{
|
||||
News: newsList,
|
||||
Total: total,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
TotalPages: totalPages,
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, response)
|
||||
}
|
||||
|
||||
func GetStats(c *gin.Context) {
|
||||
var stats models.Stats
|
||||
|
||||
err := db.GetPool().QueryRow(c.Request.Context(), `
|
||||
SELECT
|
||||
(SELECT COUNT(*) FROM noticias) as total_news,
|
||||
(SELECT COUNT(*) FROM feeds WHERE activo = true) as total_feeds,
|
||||
(SELECT COUNT(*) FROM users) as total_users,
|
||||
(SELECT COUNT(*) FROM noticias WHERE fecha::date = CURRENT_DATE) as news_today,
|
||||
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('week', CURRENT_DATE)) as news_this_week,
|
||||
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('month', CURRENT_DATE)) as news_this_month,
|
||||
(SELECT COUNT(DISTINCT noticia_id) FROM traducciones WHERE status = 'done') as total_translated
|
||||
`).Scan(
|
||||
&stats.TotalNews, &stats.TotalFeeds, &stats.TotalUsers,
|
||||
&stats.NewsToday, &stats.NewsThisWeek, &stats.NewsThisMonth,
|
||||
&stats.TotalTranslated,
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get stats", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), `
|
||||
SELECT c.id, c.nombre, COUNT(n.id) as count
|
||||
FROM categorias c
|
||||
LEFT JOIN noticias n ON n.categoria_id = c.id
|
||||
GROUP BY c.id, c.nombre
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
`)
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cs models.CategoryStat
|
||||
rows.Scan(&cs.CategoryID, &cs.CategoryName, &cs.Count)
|
||||
stats.TopCategories = append(stats.TopCategories, cs)
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = db.GetPool().Query(c.Request.Context(), `
|
||||
SELECT p.id, p.nombre, p.flag_emoji, COUNT(n.id) as count
|
||||
FROM paises p
|
||||
LEFT JOIN noticias n ON n.pais_id = p.id
|
||||
GROUP BY p.id, p.nombre, p.flag_emoji
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
`)
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cs models.CountryStat
|
||||
rows.Scan(&cs.CountryID, &cs.CountryName, &cs.FlagEmoji, &cs.Count)
|
||||
stats.TopCountries = append(stats.TopCountries, cs)
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, stats)
|
||||
}
|
||||
|
||||
func GetCategories(c *gin.Context) {
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), `
|
||||
SELECT id, nombre FROM categorias ORDER BY nombre`)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get categories", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type Category struct {
|
||||
ID int64 `json:"id"`
|
||||
Nombre string `json:"nombre"`
|
||||
}
|
||||
|
||||
var categories []Category
|
||||
for rows.Next() {
|
||||
var cat Category
|
||||
rows.Scan(&cat.ID, &cat.Nombre)
|
||||
categories = append(categories, cat)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, categories)
|
||||
}
|
||||
|
||||
func GetCountries(c *gin.Context) {
|
||||
rows, err := db.GetPool().Query(c.Request.Context(), `
|
||||
SELECT p.id, p.nombre, c.nombre as continente
|
||||
FROM paises p
|
||||
LEFT JOIN continentes c ON c.id = p.continente_id
|
||||
ORDER BY p.nombre`)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get countries", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type Country struct {
|
||||
ID int64 `json:"id"`
|
||||
Nombre string `json:"nombre"`
|
||||
Continente string `json:"continente"`
|
||||
}
|
||||
|
||||
var countries []Country
|
||||
for rows.Next() {
|
||||
var country Country
|
||||
rows.Scan(&country.ID, &country.Nombre, &country.Continente)
|
||||
countries = append(countries, country)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, countries)
|
||||
}
|
||||
108
backend/internal/middleware/auth.go
Normal file
108
backend/internal/middleware/auth.go
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
package middleware
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
var jwtSecret []byte
|
||||
|
||||
func SetJWTSecret(secret string) {
|
||||
jwtSecret = []byte(secret)
|
||||
}
|
||||
|
||||
type Claims struct {
|
||||
UserID int64 `json:"user_id"`
|
||||
Email string `json:"email"`
|
||||
Username string `json:"username"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
jwt.RegisteredClaims
|
||||
}
|
||||
|
||||
func AuthRequired() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
authHeader := c.GetHeader("Authorization")
|
||||
if authHeader == "" {
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "Authorization header required"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
tokenString := strings.TrimPrefix(authHeader, "Bearer ")
|
||||
if tokenString == authHeader {
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "Bearer token required"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
claims := &Claims{}
|
||||
token, err := jwt.ParseWithClaims(tokenString, claims, func(token *jwt.Token) (interface{}, error) {
|
||||
return jwtSecret, nil
|
||||
})
|
||||
|
||||
if err != nil || !token.Valid {
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "Invalid token"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
c.Set("user", claims)
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func AdminRequired() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
userVal, exists := c.Get("user")
|
||||
if !exists {
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "Not authenticated"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
claims := userVal.(*Claims)
|
||||
if !claims.IsAdmin {
|
||||
c.JSON(http.StatusForbidden, gin.H{"error": "Admin access required"})
|
||||
c.Abort()
|
||||
return
|
||||
}
|
||||
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func CORSMiddleware() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
|
||||
c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With")
|
||||
c.Writer.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS, GET, PUT, DELETE, PATCH")
|
||||
|
||||
if c.Request.Method == "OPTIONS" {
|
||||
c.AbortWithStatus(204)
|
||||
return
|
||||
}
|
||||
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func LoggerMiddleware() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
c.Next()
|
||||
|
||||
status := c.Writer.Status()
|
||||
if status >= 400 {
|
||||
// Log error responses
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RateLimitMiddleware(requestsPerMinute int) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
17
backend/internal/models/alias.go
Normal file
17
backend/internal/models/alias.go
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
package models
|
||||
|
||||
import "time"
|
||||
|
||||
type EntityAlias struct {
|
||||
ID int `json:"id"`
|
||||
CanonicalName string `json:"canonical_name"`
|
||||
Alias string `json:"alias"`
|
||||
Tipo string `json:"tipo"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
}
|
||||
|
||||
type EntityAliasRequest struct {
|
||||
CanonicalName string `json:"canonical_name" binding:"required"`
|
||||
Aliases []string `json:"aliases" binding:"required,min=1"`
|
||||
Tipo string `json:"tipo" binding:"required,oneof=persona organizacion lugar tema"`
|
||||
}
|
||||
171
backend/internal/models/models.go
Normal file
171
backend/internal/models/models.go
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type News struct {
|
||||
ID int64 `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Content string `json:"content"`
|
||||
URL string `json:"url"`
|
||||
ImageURL *string `json:"image_url"`
|
||||
PublishedAt *time.Time `json:"published_at"`
|
||||
Lang string `json:"lang"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type NewsWithTranslations struct {
|
||||
ID int64 `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Content string `json:"content"`
|
||||
URL string `json:"url"`
|
||||
ImageURL *string `json:"image_url"`
|
||||
PublishedAt *string `json:"published_at"`
|
||||
Lang string `json:"lang"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
CategoryID *int64 `json:"category_id"`
|
||||
CountryID *int64 `json:"country_id"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
TitleTranslated *string `json:"title_translated"`
|
||||
SummaryTranslated *string `json:"summary_translated"`
|
||||
ContentTranslated *string `json:"content_translated"`
|
||||
LangTranslated *string `json:"lang_translated"`
|
||||
}
|
||||
|
||||
type Feed struct {
|
||||
ID int64 `json:"id"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
SiteURL *string `json:"site_url"`
|
||||
Description *string `json:"description"`
|
||||
ImageURL *string `json:"image_url"`
|
||||
Language *string `json:"language"`
|
||||
CategoryID *int64 `json:"category_id"`
|
||||
CountryID *int64 `json:"country_id"`
|
||||
Active bool `json:"active"`
|
||||
LastFetched *time.Time `json:"last_fetched"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type Category struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Color string `json:"color"`
|
||||
Icon string `json:"icon"`
|
||||
ParentID *int64 `json:"parent_id"`
|
||||
}
|
||||
|
||||
type Country struct {
|
||||
ID int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Code string `json:"code"`
|
||||
Continent string `json:"continent"`
|
||||
FlagEmoji string `json:"flag_emoji"`
|
||||
}
|
||||
|
||||
type Translation struct {
|
||||
ID int64 `json:"id"`
|
||||
NewsID int64 `json:"news_id"`
|
||||
LangFrom string `json:"lang_from"`
|
||||
LangTo string `json:"lang_to"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Status string `json:"status"`
|
||||
Error *string `json:"error"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type User struct {
|
||||
ID int64 `json:"id"`
|
||||
Email string `json:"email"`
|
||||
Username string `json:"username"`
|
||||
PasswordHash string `json:"-"`
|
||||
IsAdmin bool `json:"is_admin"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type SearchHistory struct {
|
||||
ID int64 `json:"id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
Query string `json:"query"`
|
||||
CategoryID *int64 `json:"category_id"`
|
||||
CountryID *int64 `json:"country_id"`
|
||||
ResultsCount int `json:"results_count"`
|
||||
SearchedAt time.Time `json:"searched_at"`
|
||||
}
|
||||
|
||||
type NewsListResponse struct {
|
||||
News []NewsWithTranslations `json:"news"`
|
||||
Total int `json:"total"`
|
||||
Page int `json:"page"`
|
||||
PerPage int `json:"per_page"`
|
||||
TotalPages int `json:"total_pages"`
|
||||
}
|
||||
|
||||
type FeedListResponse struct {
|
||||
Feeds []Feed `json:"feeds"`
|
||||
Total int `json:"total"`
|
||||
Page int `json:"page"`
|
||||
PerPage int `json:"per_page"`
|
||||
TotalPages int `json:"total_pages"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
TotalNews int64 `json:"total_news"`
|
||||
TotalFeeds int64 `json:"total_feeds"`
|
||||
TotalUsers int64 `json:"total_users"`
|
||||
TotalTranslated int64 `json:"total_translated"`
|
||||
NewsToday int64 `json:"news_today"`
|
||||
NewsThisWeek int64 `json:"news_this_week"`
|
||||
NewsThisMonth int64 `json:"news_this_month"`
|
||||
TopCategories []CategoryStat `json:"top_categories"`
|
||||
TopCountries []CountryStat `json:"top_countries"`
|
||||
}
|
||||
|
||||
type CategoryStat struct {
|
||||
CategoryID int64 `json:"category_id"`
|
||||
CategoryName string `json:"category_name"`
|
||||
Count int64 `json:"count"`
|
||||
}
|
||||
|
||||
type CountryStat struct {
|
||||
CountryID int64 `json:"country_id"`
|
||||
CountryName string `json:"country_name"`
|
||||
FlagEmoji string `json:"flag_emoji"`
|
||||
Count int64 `json:"count"`
|
||||
}
|
||||
|
||||
type LoginRequest struct {
|
||||
Email string `json:"email" binding:"required,email"`
|
||||
Password string `json:"password" binding:"required,min=6"`
|
||||
}
|
||||
|
||||
type RegisterRequest struct {
|
||||
Email string `json:"email" binding:"required,email"`
|
||||
Username string `json:"username" binding:"required,min=3,max=50"`
|
||||
Password string `json:"password" binding:"required,min=6"`
|
||||
}
|
||||
|
||||
type AuthResponse struct {
|
||||
Token string `json:"token"`
|
||||
User User `json:"user"`
|
||||
IsFirstUser bool `json:"is_first_user,omitempty"`
|
||||
}
|
||||
|
||||
type ErrorResponse struct {
|
||||
Error string `json:"error"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
type SuccessResponse struct {
|
||||
Message string `json:"message"`
|
||||
}
|
||||
170
backend/internal/services/ml.go
Normal file
170
backend/internal/services/ml.go
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
package services
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/rss2/backend/internal/config"
|
||||
"github.com/rss2/backend/internal/models"
|
||||
)
|
||||
|
||||
var (
|
||||
cfg *config.Config
|
||||
)
|
||||
|
||||
func Init(c *config.Config) {
|
||||
cfg = c
|
||||
}
|
||||
|
||||
type TranslationRequest struct {
|
||||
SourceLang string `json:"source_lang"`
|
||||
TargetLang string `json:"target_lang"`
|
||||
Texts []string `json:"texts"`
|
||||
}
|
||||
|
||||
type TranslationResponse struct {
|
||||
Translations []string `json:"translations"`
|
||||
}
|
||||
|
||||
func Translate(ctx context.Context, sourceLang, targetLang string, texts []string) ([]string, error) {
|
||||
if len(texts) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
reqBody := TranslationRequest{
|
||||
SourceLang: sourceLang,
|
||||
TargetLang: targetLang,
|
||||
Texts: texts,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpClient := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := httpClient.Post(cfg.TranslationURL+"/translate", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("translation request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("translation service returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result TranslationResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result.Translations, nil
|
||||
}
|
||||
|
||||
type EmbeddingRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input []string `json:"input"`
|
||||
}
|
||||
|
||||
type EmbeddingResponse struct {
|
||||
Embeddings [][]float64 `json:"embeddings"`
|
||||
}
|
||||
|
||||
func GetEmbeddings(ctx context.Context, texts []string) ([][]float64, error) {
|
||||
if len(texts) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
reqBody := EmbeddingRequest{
|
||||
Model: "mxbai-embed-large",
|
||||
Input: texts,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpClient := &http.Client{Timeout: 60 * time.Second}
|
||||
resp, err := httpClient.Post(cfg.OllamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("embeddings request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("embeddings service returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result EmbeddingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result.Embeddings, nil
|
||||
}
|
||||
|
||||
type NERRequest struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type NERResponse struct {
|
||||
Entities []Entity `json:"entities"`
|
||||
}
|
||||
|
||||
type Entity struct {
|
||||
Text string `json:"text"`
|
||||
Label string `json:"label"`
|
||||
Start int `json:"start"`
|
||||
End int `json:"end"`
|
||||
}
|
||||
|
||||
func ExtractEntities(ctx context.Context, text string) ([]Entity, error) {
|
||||
reqBody := NERRequest{Text: text}
|
||||
|
||||
body, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpClient := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := httpClient.Post(cfg.SpacyURL+"/ner", "application/json", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("NER request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("NER service returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result NERResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result.Entities, nil
|
||||
}
|
||||
|
||||
func SemanticSearch(ctx context.Context, query, lang string, page, perPage int) (*models.NewsListResponse, error) {
|
||||
embeddings, err := GetEmbeddings(ctx, []string{query})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(embeddings) == 0 {
|
||||
return &models.NewsListResponse{}, nil
|
||||
}
|
||||
|
||||
return &models.NewsListResponse{
|
||||
News: []models.NewsWithTranslations{},
|
||||
Total: 0,
|
||||
Page: page,
|
||||
PerPage: perPage,
|
||||
TotalPages: 0,
|
||||
}, nil
|
||||
}
|
||||
83
backend/internal/workers/db.go
Normal file
83
backend/internal/workers/db.go
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
package workers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
var pool *pgxpool.Pool
|
||||
|
||||
type Config struct {
|
||||
Host string
|
||||
Port int
|
||||
DBName string
|
||||
User string
|
||||
Password string
|
||||
}
|
||||
|
||||
func LoadDBConfig() *Config {
|
||||
return &Config{
|
||||
Host: getEnv("DB_HOST", "localhost"),
|
||||
Port: getEnvInt("DB_PORT", 5432),
|
||||
DBName: getEnv("DB_NAME", "rss"),
|
||||
User: getEnv("DB_USER", "rss"),
|
||||
Password: getEnv("DB_PASS", "rss"),
|
||||
}
|
||||
}
|
||||
|
||||
func Connect(cfg *Config) error {
|
||||
dsn := fmt.Sprintf("postgres://%s:%s@%s:%d/%s?sslmode=disable",
|
||||
cfg.User, cfg.Password, cfg.Host, cfg.Port, cfg.DBName)
|
||||
|
||||
poolConfig, err := pgxpool.ParseConfig(dsn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse config: %w", err)
|
||||
}
|
||||
|
||||
poolConfig.MaxConns = 25
|
||||
poolConfig.MinConns = 5
|
||||
poolConfig.MaxConnLifetime = time.Hour
|
||||
poolConfig.MaxConnIdleTime = 30 * time.Minute
|
||||
|
||||
pool, err = pgxpool.NewWithConfig(context.Background(), poolConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create pool: %w", err)
|
||||
}
|
||||
|
||||
if err = pool.Ping(context.Background()); err != nil {
|
||||
return fmt.Errorf("failed to ping database: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetPool() *pgxpool.Pool {
|
||||
return pool
|
||||
}
|
||||
|
||||
func Close() {
|
||||
if pool != nil {
|
||||
pool.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func getEnv(key, defaultValue string) string {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
return value
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvInt(key string, defaultValue int) int {
|
||||
if value := os.Getenv(key); value != "" {
|
||||
if intVal, err := strconv.Atoi(value); err == nil {
|
||||
return intVal
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
BIN
backend/server
Executable file
BIN
backend/server
Executable file
Binary file not shown.
188
cache.py
188
cache.py
|
|
@ -1,188 +0,0 @@
|
|||
"""
|
||||
Redis cache module for high-traffic endpoints.
|
||||
Provides caching decorator and invalidation utilities.
|
||||
"""
|
||||
import redis
|
||||
import json
|
||||
import logging
|
||||
import hashlib
|
||||
import time
|
||||
from functools import wraps
|
||||
from config import REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_TTL_DEFAULT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_redis_client = None
|
||||
_redis_last_fail = 0
|
||||
|
||||
def get_redis():
|
||||
"""Get Redis client singleton with failure backoff."""
|
||||
global _redis_client, _redis_last_fail
|
||||
|
||||
if _redis_client is not None:
|
||||
return _redis_client
|
||||
|
||||
# Prevent retrying too often if it's failing (60s backoff)
|
||||
now = time.time()
|
||||
if now - _redis_last_fail < 60:
|
||||
return None
|
||||
|
||||
try:
|
||||
redis_config = {
|
||||
'host': REDIS_HOST,
|
||||
'port': REDIS_PORT,
|
||||
'decode_responses': True,
|
||||
'socket_connect_timeout': 1, # Faster timeout
|
||||
'socket_timeout': 1
|
||||
}
|
||||
|
||||
if REDIS_PASSWORD:
|
||||
redis_config['password'] = REDIS_PASSWORD
|
||||
|
||||
_redis_client = redis.Redis(**redis_config)
|
||||
_redis_client.ping()
|
||||
_redis_last_fail = 0
|
||||
return _redis_client
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis connection failed: {e}. Caching disabled for 60s.")
|
||||
_redis_client = None
|
||||
_redis_last_fail = now
|
||||
return None
|
||||
|
||||
|
||||
def cached(ttl_seconds=None, prefix="cache"):
|
||||
"""
|
||||
Decorator for caching function results in Redis.
|
||||
Falls back to calling function directly if Redis is unavailable.
|
||||
|
||||
Args:
|
||||
ttl_seconds: Time to live in seconds (default from config)
|
||||
prefix: Key prefix for cache entries
|
||||
"""
|
||||
if ttl_seconds is None:
|
||||
ttl_seconds = REDIS_TTL_DEFAULT
|
||||
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
r = get_redis()
|
||||
if r is None:
|
||||
# Redis unavailable, call function directly
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Build cache key from function name and arguments
|
||||
# Use md5 for deterministic hash across processes
|
||||
key_data = f"{args}:{sorted(kwargs.items())}"
|
||||
|
||||
# Add flask request args if available to prevent collision on filtered routes
|
||||
try:
|
||||
from flask import request
|
||||
if request:
|
||||
key_data += f":args:{sorted(request.args.items())}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
key_hash = hashlib.md5(key_data.encode('utf-8')).hexdigest()
|
||||
cache_key = f"cache:{prefix}:{func.__name__}:{key_hash}"
|
||||
|
||||
try:
|
||||
# Try to get from cache
|
||||
cached_value = r.get(cache_key)
|
||||
if cached_value is not None:
|
||||
# If it's a JSON response, we might need to return it correctly
|
||||
try:
|
||||
data = json.loads(cached_value)
|
||||
# Detect if we should return as JSON
|
||||
from flask import jsonify
|
||||
return jsonify(data)
|
||||
except (json.JSONDecodeError, ImportError):
|
||||
return cached_value
|
||||
|
||||
# Cache miss - call function and cache result
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
# Handle Flask Response objects
|
||||
cache_data = result
|
||||
try:
|
||||
from flask import Response
|
||||
if isinstance(result, Response):
|
||||
if result.is_json:
|
||||
cache_data = result.get_json()
|
||||
else:
|
||||
cache_data = result.get_data(as_text=True)
|
||||
except (ImportError, Exception):
|
||||
pass
|
||||
|
||||
r.setex(cache_key, ttl_seconds, json.dumps(cache_data, default=str))
|
||||
return result
|
||||
except (redis.RedisError, json.JSONDecodeError) as e:
|
||||
logger.warning(f"Cache error for {func.__name__}: {e}")
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def invalidate_pattern(pattern):
|
||||
"""
|
||||
Invalidate all cache keys matching pattern.
|
||||
|
||||
Args:
|
||||
pattern: Pattern to match (e.g., "home:*" or "stats:*")
|
||||
"""
|
||||
r = get_redis()
|
||||
if r is None:
|
||||
return
|
||||
|
||||
try:
|
||||
cursor = 0
|
||||
deleted = 0
|
||||
while True:
|
||||
cursor, keys = r.scan(cursor, match=f"cache:{pattern}", count=100)
|
||||
if keys:
|
||||
r.delete(*keys)
|
||||
deleted += len(keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
if deleted > 0:
|
||||
logger.info(f"Invalidated {deleted} cache keys matching '{pattern}'")
|
||||
except redis.RedisError as e:
|
||||
logger.warning(f"Cache invalidation failed: {e}")
|
||||
|
||||
|
||||
def cache_get(key):
|
||||
"""Get value from cache by key."""
|
||||
r = get_redis()
|
||||
if r is None:
|
||||
return None
|
||||
try:
|
||||
value = r.get(f"cache:{key}")
|
||||
return json.loads(value) if value else None
|
||||
except (redis.RedisError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
|
||||
def cache_set(key, value, ttl_seconds=None):
|
||||
"""Set value in cache with optional TTL."""
|
||||
if ttl_seconds is None:
|
||||
ttl_seconds = REDIS_TTL_DEFAULT
|
||||
r = get_redis()
|
||||
if r is None:
|
||||
return False
|
||||
try:
|
||||
r.setex(f"cache:{key}", ttl_seconds, json.dumps(value, default=str))
|
||||
return True
|
||||
except redis.RedisError:
|
||||
return False
|
||||
|
||||
|
||||
def cache_del(key):
|
||||
"""Delete a key from cache."""
|
||||
r = get_redis()
|
||||
if r is None:
|
||||
return False
|
||||
try:
|
||||
r.delete(f"cache:{key}")
|
||||
return True
|
||||
except redis.RedisError:
|
||||
return False
|
||||
69
config.py
69
config.py
|
|
@ -1,69 +0,0 @@
|
|||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
DB_CONFIG = {
|
||||
"dbname": os.getenv("DB_NAME", "rss"),
|
||||
"user": os.getenv("DB_USER", "rss"),
|
||||
"password": os.getenv("DB_PASS", ""),
|
||||
"host": os.getenv("DB_HOST", "localhost"),
|
||||
"port": int(os.getenv("DB_PORT", 5432)),
|
||||
}
|
||||
|
||||
# Write DB (primary) - for workers/ingestion
|
||||
DB_WRITE_CONFIG = {
|
||||
"dbname": os.getenv("DB_NAME", "rss"),
|
||||
"user": os.getenv("DB_USER", "rss"),
|
||||
"password": os.getenv("DB_PASS", ""),
|
||||
"host": os.getenv("DB_WRITE_HOST", os.getenv("DB_HOST", "localhost")),
|
||||
"port": int(os.getenv("DB_PORT", 5432)),
|
||||
}
|
||||
|
||||
# Read DB (replica) - for web queries
|
||||
DB_READ_CONFIG = {
|
||||
"dbname": os.getenv("DB_NAME", "rss"),
|
||||
"user": os.getenv("DB_USER", "rss"),
|
||||
"password": os.getenv("DB_PASS", ""),
|
||||
"host": os.getenv("DB_READ_HOST", os.getenv("DB_HOST", "localhost")),
|
||||
"port": int(os.getenv("DB_PORT", 5432)),
|
||||
}
|
||||
|
||||
# Redis Cache
|
||||
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
||||
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
||||
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None) # None = sin autenticación (para compatibilidad)
|
||||
REDIS_TTL_DEFAULT = int(os.getenv("REDIS_TTL_DEFAULT", 60))
|
||||
|
||||
SECRET_KEY = os.getenv("SECRET_KEY", "CAMBIA_ESTA_CLAVE_POR_ALGO_LARGO_Y_ALEATORIO")
|
||||
|
||||
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "es")
|
||||
DEFAULT_TRANSLATION_LANG = os.getenv("DEFAULT_TRANSLATION_LANG", "es")
|
||||
|
||||
WEB_TRANSLATED_DEFAULT = int(os.getenv("WEB_TRANSLATED_DEFAULT", "1"))
|
||||
# Configuración de paginación
|
||||
NEWS_PER_PAGE_DEFAULT = 30 # Reducido de 50 para mejor rendimiento
|
||||
|
||||
RSS_MAX_WORKERS = int(os.getenv("RSS_MAX_WORKERS", "3")) # Reducido de 10 a 3
|
||||
RSS_FEED_TIMEOUT = int(os.getenv("RSS_FEED_TIMEOUT", "60")) # Aumentado timeout
|
||||
RSS_MAX_FAILURES = int(os.getenv("RSS_MAX_FAILURES", "5"))
|
||||
|
||||
TARGET_LANGS = os.getenv("TARGET_LANGS", "es")
|
||||
|
||||
TRANSLATOR_BATCH = int(os.getenv("TRANSLATOR_BATCH", "2")) # Reducido de 4 a 2
|
||||
ENQUEUE = int(os.getenv("ENQUEUE", "50")) # Reducido de 200 a 50
|
||||
TRANSLATOR_SLEEP_IDLE = float(os.getenv("TRANSLATOR_SLEEP_IDLE", "10")) # Aumentado de 5 a 10
|
||||
|
||||
MAX_SRC_TOKENS = int(os.getenv("MAX_SRC_TOKENS", "512"))
|
||||
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
|
||||
|
||||
NUM_BEAMS_TITLE = int(os.getenv("NUM_BEAMS_TITLE", "1")) # Reducido beams para menos CPU
|
||||
NUM_BEAMS_BODY = int(os.getenv("NUM_BEAMS_BODY", "1"))
|
||||
|
||||
UNIVERSAL_MODEL = os.getenv("UNIVERSAL_MODEL", "facebook/nllb-200-1.3B")
|
||||
DEVICE = os.getenv("DEVICE", "cpu")
|
||||
|
||||
TOKENIZERS_PARALLELISM = os.getenv("TOKENIZERS_PARALLELISM", "false")
|
||||
PYTHONUNBUFFERED = os.getenv("PYTHONUNBUFFERED", "1")
|
||||
PYTORCH_CUDA_ALLOC_CONF = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
|
||||
|
||||
76
db.py
76
db.py
|
|
@ -1,76 +0,0 @@
|
|||
import os
|
||||
import psycopg2
|
||||
from contextlib import contextmanager
|
||||
|
||||
# Database configuration
|
||||
DB_HOST = os.environ.get("DB_HOST", "db")
|
||||
DB_NAME = os.environ.get("DB_NAME", "rss")
|
||||
DB_USER = os.environ.get("DB_USER", "rss")
|
||||
DB_PASS = os.environ.get("DB_PASS", "x")
|
||||
DB_PORT = os.environ.get("DB_PORT", "5432")
|
||||
DB_READ_HOST = os.environ.get("DB_READ_HOST", "db-replica")
|
||||
DB_WRITE_HOST = os.environ.get("DB_WRITE_HOST", "db")
|
||||
|
||||
@contextmanager
|
||||
def get_conn():
|
||||
"""Get a database connection (Default: Primary/Write)."""
|
||||
conn = None
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=DB_HOST,
|
||||
database=DB_NAME,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
port=DB_PORT
|
||||
)
|
||||
yield conn
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
@contextmanager
|
||||
def get_read_conn():
|
||||
"""Get a read-only database connection (Replica)."""
|
||||
conn = None
|
||||
try:
|
||||
try:
|
||||
# Attempt to connect to Replica first
|
||||
conn = psycopg2.connect(
|
||||
host=DB_READ_HOST,
|
||||
database=DB_NAME,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
port=DB_PORT,
|
||||
connect_timeout=5
|
||||
)
|
||||
except (psycopg2.OperationalError, psycopg2.InterfaceError) as e:
|
||||
# Fallback to Primary if Replica is down on initial connection
|
||||
print(f"Warning: Replica unreachable ({e}), falling back to Primary for read.")
|
||||
conn = psycopg2.connect(
|
||||
host=DB_WRITE_HOST,
|
||||
database=DB_NAME,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
port=DB_PORT
|
||||
)
|
||||
yield conn
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
@contextmanager
|
||||
def get_write_conn():
|
||||
"""Get a write database connection (Primary)."""
|
||||
conn = None
|
||||
try:
|
||||
conn = psycopg2.connect(
|
||||
host=DB_WRITE_HOST,
|
||||
database=DB_NAME,
|
||||
user=DB_USER,
|
||||
password=DB_PASS,
|
||||
port=DB_PORT
|
||||
)
|
||||
yield conn
|
||||
finally:
|
||||
if conn:
|
||||
conn.close()
|
||||
47
deploy-clean.sh
Executable file
47
deploy-clean.sh
Executable file
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
# Script para despliegue limpio de RSS2
|
||||
|
||||
echo "=== RSS2 Clean Deployment Script ==="
|
||||
echo ""
|
||||
|
||||
# Detener contenedores
|
||||
echo "1. Deteniendo contenedores..."
|
||||
docker compose down -v 2>/dev/null
|
||||
|
||||
# Eliminar volúmenes de datos (si hay permisos)
|
||||
echo "2. Eliminando volúmenes de datos..."
|
||||
docker volume rm rss2_db 2>/dev/null || true
|
||||
docker volume rm rss2_redis 2>/dev/null || true
|
||||
|
||||
# Si los volúmenes Docker tienen problemas, intentar con rm
|
||||
echo " Intentando limpiar /data/..."
|
||||
sudo rm -rf /datos/rss2/data/pgdata 2>/dev/null || true
|
||||
sudo rm -rf /datos/rss2/data/redis-data 2>/dev/null || true
|
||||
|
||||
# Iniciar base de datos
|
||||
echo "3. Iniciando base de datos..."
|
||||
docker compose up -d db
|
||||
|
||||
# Esperar a que esté lista
|
||||
echo "4. Esperando a que la base de datos esté lista..."
|
||||
sleep 10
|
||||
|
||||
# Verificar estado
|
||||
if docker compose ps db | grep -q "healthy"; then
|
||||
echo " ✓ Base de datos iniciada correctamente"
|
||||
|
||||
# Ejecutar script de schema
|
||||
echo "5. Ejecutando script de inicialización..."
|
||||
docker compose exec -T db psql -U rss -d rss -f /docker-entrypoint-initdb.d/00-complete-schema.sql 2>&1 | tail -5
|
||||
|
||||
# Iniciar demás servicios
|
||||
echo "6. Iniciando servicios..."
|
||||
docker compose up -d redis backend-go rss2_frontend nginx rss-ingestor-go
|
||||
|
||||
echo ""
|
||||
echo "=== Despliegue completado ==="
|
||||
echo "Accede a: http://localhost:8001"
|
||||
else
|
||||
echo " ✗ Error: La base de datos no está healthy"
|
||||
docker compose logs db
|
||||
fi
|
||||
|
|
@ -12,47 +12,16 @@ services:
|
|||
LC_ALL: C.UTF-8
|
||||
TZ: Europe/Madrid
|
||||
PGDATA: /var/lib/postgresql/data/18/main
|
||||
command:
|
||||
[
|
||||
"postgres",
|
||||
"-c",
|
||||
"max_connections=200",
|
||||
"-c",
|
||||
"shared_buffers=4GB",
|
||||
"-c",
|
||||
"effective_cache_size=12GB",
|
||||
"-c",
|
||||
"work_mem=16MB",
|
||||
"-c",
|
||||
"maintenance_work_mem=512MB",
|
||||
"-c",
|
||||
"autovacuum_max_workers=3",
|
||||
"-c",
|
||||
"autovacuum_vacuum_scale_factor=0.02",
|
||||
"-c",
|
||||
"autovacuum_vacuum_cost_limit=1000",
|
||||
# Parallel Query Optimization (Adjusted)
|
||||
"-c",
|
||||
"max_worker_processes=8",
|
||||
"-c",
|
||||
"max_parallel_workers=6",
|
||||
"-c",
|
||||
"max_parallel_workers_per_gather=2",
|
||||
# Streaming Replication
|
||||
"-c",
|
||||
"wal_level=replica",
|
||||
"-c",
|
||||
"max_wal_senders=5",
|
||||
"-c",
|
||||
"wal_keep_size=1GB",
|
||||
"-c",
|
||||
"hot_standby=on"
|
||||
]
|
||||
volumes:
|
||||
- ./pgdata:/var/lib/postgresql/data
|
||||
- ./init-db:/docker-entrypoint-initdb.d:ro
|
||||
- ./data/pgdata:/var/lib/postgresql/data
|
||||
- ./init-db:/docker-entrypoint-initdb.d:rw
|
||||
- ./docker-entrypoint-db.sh:/docker-entrypoint-db.sh:ro
|
||||
entrypoint: ["bash", "/docker-entrypoint-db.sh"]
|
||||
networks:
|
||||
- backend
|
||||
backend:
|
||||
aliases:
|
||||
- db
|
||||
- rss2_db
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U $$POSTGRES_USER -d $$POSTGRES_DB || exit 1" ]
|
||||
|
|
@ -67,40 +36,6 @@ services:
|
|||
reservations:
|
||||
memory: 4G
|
||||
|
||||
db-replica:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.replica
|
||||
container_name: rss2_db_replica
|
||||
shm_size: 2gb
|
||||
environment:
|
||||
POSTGRES_DB: ${POSTGRES_DB:-rss}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-rss}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
PGDATA: /var/lib/postgresql/data
|
||||
TZ: Europe/Madrid
|
||||
command: [ "postgres", "-c", "max_connections=200", "-c", "shared_buffers=256MB", "-c", "effective_cache_size=2GB", "-c", "hot_standby=on", "-c", "max_worker_processes=16", "-c", "hot_standby_feedback=on", "-c", "max_standby_streaming_delay=300s" ]
|
||||
volumes:
|
||||
- ./pgdata-replica:/var/lib/postgresql/data
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: [ "CMD-SHELL", "pg_isready -h 127.0.0.1 -p 5432 -U rss -d rss || exit 1" ]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 30
|
||||
start_period: 30s
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
reservations:
|
||||
memory: 2G
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: rss2_redis
|
||||
|
|
@ -110,11 +45,14 @@ services:
|
|||
command: >
|
||||
redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD}
|
||||
volumes:
|
||||
- ./redis-data:/data
|
||||
- ./data/redis-data:/data
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
networks:
|
||||
- backend
|
||||
backend:
|
||||
aliases:
|
||||
- redis
|
||||
- rss2_redis
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: [ "CMD", "redis-cli", "--no-auth-warning", "-a", "${REDIS_PASSWORD}", "ping" ]
|
||||
|
|
@ -156,73 +94,80 @@ services:
|
|||
reservations:
|
||||
memory: 512M
|
||||
|
||||
rss-tasks:
|
||||
build: .
|
||||
container_name: rss2_tasks_py
|
||||
command: bash -lc "python -m scheduler"
|
||||
langdetect:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_langdetect_py
|
||||
command: bash -lc "python -m workers.langdetect_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_PASSWORD: ${REDIS_PASSWORD}
|
||||
LANG_DETECT_SLEEP: 60
|
||||
LANG_DETECT_BATCH: 1000
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
# ==================================================================================
|
||||
# SCRAPER WORKER (Go) - Extrae artículos de URLs
|
||||
# ==================================================================================
|
||||
scraper:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.scraper
|
||||
container_name: rss2_scraper
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
SCRAPER_SLEEP: 60
|
||||
SCRAPER_BATCH: 10
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
memory: 512M
|
||||
|
||||
url-worker:
|
||||
# ==================================================================================
|
||||
# DISCOVERY WORKER (Go) - Descubre RSS feeds
|
||||
# ==================================================================================
|
||||
discovery:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.url_worker
|
||||
container_name: rss2_url_worker
|
||||
command: bash -lc "python -m workers.url_worker_daemon"
|
||||
dockerfile: Dockerfile.discovery
|
||||
container_name: rss2_discovery
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
DB_READ_HOST: db
|
||||
DB_WRITE_HOST: db
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
|
||||
url-discovery-worker:
|
||||
build: .
|
||||
container_name: rss2_url_discovery
|
||||
command: bash -lc "python -m workers.url_discovery_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
URL_DISCOVERY_INTERVAL_MIN: 15
|
||||
URL_DISCOVERY_BATCH_SIZE: 10
|
||||
DISCOVERY_INTERVAL: 900
|
||||
DISCOVERY_BATCH: 10
|
||||
MAX_FEEDS_PER_URL: 5
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
|
|
@ -235,104 +180,109 @@ services:
|
|||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
memory: 512M
|
||||
|
||||
rss2_web:
|
||||
build: .
|
||||
container_name: rss2_web
|
||||
command: bash -lc "gunicorn --config gunicorn_config.py app:app"
|
||||
volumes:
|
||||
# SEGURIDAD: Código en read-only donde sea posible
|
||||
- ./app.py:/app/app.py:ro
|
||||
- ./routers:/app/routers:ro
|
||||
- ./models:/app/models:ro
|
||||
- ./utils:/app/utils:ro
|
||||
- ./templates:/app/templates:ro
|
||||
- ./static:/app/static:ro
|
||||
- ./config.py:/app/config.py:ro
|
||||
- ./db.py:/app/db.py:ro
|
||||
- ./cache.py:/app/cache.py:ro
|
||||
- ./gunicorn_config.py:/app/gunicorn_config.py:ro
|
||||
# Directorios escribibles
|
||||
- ./hf_cache:/app/hf_cache
|
||||
- ./data:/app/data
|
||||
# ==================================================================================
|
||||
# WIKI WORKER (Go) - Wikipedia info and thumbnails
|
||||
# ==================================================================================
|
||||
wiki-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.wiki
|
||||
container_name: rss2_wiki_worker
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
DB_READ_HOST: db
|
||||
DB_WRITE_HOST: db
|
||||
REDIS_HOST: redis
|
||||
REDIS_PORT: 6379
|
||||
REDIS_PASSWORD: ${REDIS_PASSWORD}
|
||||
QDRANT_HOST: qdrant
|
||||
QDRANT_PORT: 6333
|
||||
QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
|
||||
EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
|
||||
SECRET_KEY: ${SECRET_KEY}
|
||||
GUNICORN_WORKERS: 8
|
||||
ALLTALK_URL: http://host.docker.internal:7851
|
||||
WIKI_SLEEP: 10
|
||||
TZ: Europe/Madrid
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ./data/wiki_images:/app/data/wiki_images
|
||||
networks:
|
||||
- frontend
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
# db-replica:
|
||||
# condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
qdrant:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '8'
|
||||
memory: 8G
|
||||
reservations:
|
||||
memory: 4G
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [ gpu ]
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
|
||||
# ==================================================================================
|
||||
# BACKEND GO (API REST)
|
||||
# ==================================================================================
|
||||
backend-go:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_backend_go
|
||||
environment:
|
||||
TZ: Europe/Madrid
|
||||
DATABASE_URL: postgres://${POSTGRES_USER:-rss}:${POSTGRES_PASSWORD}@db:5432/${POSTGRES_DB:-rss}?sslmode=disable
|
||||
REDIS_URL: redis://:${REDIS_PASSWORD:-rss_redis_pass_2024}@redis:6379
|
||||
SECRET_KEY: ${SECRET_KEY:-change_this_to_a_long_random_string}
|
||||
SERVER_PORT: "8080"
|
||||
volumes:
|
||||
- ./data/wiki_images:/app/data/wiki_images
|
||||
networks:
|
||||
- backend
|
||||
- frontend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
# ==================================================================================
|
||||
# FRONTEND REACT
|
||||
# ==================================================================================
|
||||
rss2_frontend:
|
||||
build:
|
||||
context: ./frontend
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_frontend
|
||||
environment:
|
||||
TZ: Europe/Madrid
|
||||
VITE_API_URL: /api
|
||||
networks:
|
||||
- frontend
|
||||
depends_on:
|
||||
- backend-go
|
||||
restart: unless-stopped
|
||||
|
||||
# ==================================================================================
|
||||
# NGINX (Puerto 8001 - sirve React + proxy API)
|
||||
# ==================================================================================
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: rss2_nginx
|
||||
environment:
|
||||
TZ: Europe/Madrid
|
||||
ports:
|
||||
# ÚNICO puerto expuesto públicamente
|
||||
- "8001:80"
|
||||
volumes:
|
||||
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
- ./static:/app/static:ro
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
networks:
|
||||
- frontend
|
||||
depends_on:
|
||||
- rss2_web
|
||||
- rss2_frontend
|
||||
- backend-go
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 512M
|
||||
|
||||
# ==================================================================================
|
||||
# TRANSLATOR CPU (CTranslate2) - Scale with: docker compose up -d --scale translator=3
|
||||
# ==================================================================================
|
||||
translator:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
dockerfile: Dockerfile.translator
|
||||
image: rss2-translator:latest
|
||||
container_name: rss2_translator_py
|
||||
command: bash -lc "python -m workers.translation_worker"
|
||||
command: bash -lc "python -m workers.ctranslator_worker"
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
|
|
@ -340,41 +290,36 @@ services:
|
|||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
TARGET_LANGS: es
|
||||
TRANSLATOR_BATCH: 128
|
||||
ENQUEUE: 300
|
||||
# CTranslate2 configuration
|
||||
TRANSLATOR_BATCH: 32
|
||||
CT2_MODEL_PATH: /app/models/nllb-ct2
|
||||
CT2_DEVICE: cuda
|
||||
CT2_COMPUTE_TYPE: int8_float16
|
||||
CT2_DEVICE: cpu
|
||||
CT2_COMPUTE_TYPE: int8
|
||||
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
|
||||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
TRANSLATOR_ID: ${TRANSLATOR_ID:-}
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
- ./hf_cache:/app/hf_cache
|
||||
- ./models:/app/models
|
||||
networks:
|
||||
- backend
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [ gpu ]
|
||||
profiles:
|
||||
- cpu-only
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
translator2:
|
||||
# ==================================================================================
|
||||
# TRANSLATION SCHEDULER - Creates translation jobs
|
||||
# ==================================================================================
|
||||
translation-scheduler:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: rss2-translator2:latest
|
||||
container_name: rss2_translator_py2
|
||||
command: bash -lc "python -m workers.translation_worker"
|
||||
dockerfile: Dockerfile.scheduler
|
||||
image: rss2-scheduler:latest
|
||||
container_name: rss2_translation_scheduler
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
|
|
@ -382,40 +327,35 @@ services:
|
|||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
TARGET_LANGS: es
|
||||
TRANSLATOR_BATCH: 128
|
||||
ENQUEUE: 300
|
||||
CT2_MODEL_PATH: /app/models/nllb-ct2
|
||||
CT2_DEVICE: cuda
|
||||
CT2_COMPUTE_TYPE: int8_float16
|
||||
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
|
||||
HF_HOME: /app/hf_cache
|
||||
SCHEDULER_BATCH: 1000
|
||||
SCHEDULER_SLEEP: 30
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./hf_cache:/app/hf_cache
|
||||
- ./models:/app/models
|
||||
- ./workers:/app/workers
|
||||
networks:
|
||||
- backend
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [ gpu ]
|
||||
cpus: '0.5'
|
||||
memory: 256M
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
translator3:
|
||||
# ==================================================================================
|
||||
# TRANSLATOR GPU (CTranslate2 with CUDA)
|
||||
# ==================================================================================
|
||||
translator-gpu:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: rss2-translator3:latest
|
||||
container_name: rss2_translator_py3
|
||||
command: bash -lc "python -m workers.translation_worker"
|
||||
dockerfile: Dockerfile.translator-gpu
|
||||
image: rss2-translator-gpu:latest
|
||||
container_name: rss2_translator_gpu
|
||||
command: bash -lc "python -m workers.ctranslator_worker"
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
|
|
@ -423,14 +363,15 @@ services:
|
|||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
TARGET_LANGS: es
|
||||
TRANSLATOR_BATCH: 128
|
||||
ENQUEUE: 300
|
||||
TRANSLATOR_BATCH: 64
|
||||
CT2_MODEL_PATH: /app/models/nllb-ct2
|
||||
CT2_DEVICE: cuda
|
||||
CT2_COMPUTE_TYPE: int8_float16
|
||||
CT2_COMPUTE_TYPE: float16
|
||||
UNIVERSAL_MODEL: facebook/nllb-200-distilled-600M
|
||||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
- ./hf_cache:/app/hf_cache
|
||||
- ./models:/app/models
|
||||
networks:
|
||||
|
|
@ -438,7 +379,7 @@ services:
|
|||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
memory: 4G
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
|
|
@ -470,6 +411,7 @@ services:
|
|||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
- ./hf_cache:/app/hf_cache
|
||||
networks:
|
||||
- backend
|
||||
|
|
@ -487,19 +429,53 @@ services:
|
|||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
related:
|
||||
# ==================================================================================
|
||||
# TOPICS WORKER (Go) - Matching temas y países
|
||||
# ==================================================================================
|
||||
topics:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_related_py
|
||||
command: bash -lc "python -m workers.related_worker"
|
||||
dockerfile: Dockerfile.topics
|
||||
container_name: rss2_topics
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
RELATED_WINDOW_H: 168
|
||||
TOPICS_SLEEP: 10
|
||||
TOPICS_BATCH: 500
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 512M
|
||||
|
||||
# ==================================================================================
|
||||
# RELATED WORKER (Go) - Noticias relacionadas
|
||||
# ==================================================================================
|
||||
related:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.related
|
||||
container_name: rss2_related
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
RELATED_SLEEP: 10
|
||||
RELATED_BATCH: 200
|
||||
RELATED_TOPK: 10
|
||||
EMB_MODEL: mxbai-embed-large
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
|
|
@ -513,6 +489,99 @@ services:
|
|||
cpus: '1'
|
||||
memory: 1G
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
container_name: rss2_qdrant
|
||||
environment:
|
||||
TZ: Europe/Madrid
|
||||
QDRANT__SERVICE__GRPC_PORT: 6334
|
||||
volumes:
|
||||
- ./data/qdrant_storage:/qdrant/storage
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '4'
|
||||
memory: 4G
|
||||
reservations:
|
||||
memory: 2G
|
||||
|
||||
# ==================================================================================
|
||||
# QDRANT WORKER (Go) - Vectorización y búsqueda semántica
|
||||
# ==================================================================================
|
||||
qdrant-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.qdrant
|
||||
container_name: rss2_qdrant_worker
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
QDRANT_HOST: qdrant
|
||||
QDRANT_PORT: 6333
|
||||
QDRANT_COLLECTION: news_vectors
|
||||
OLLAMA_URL: http://ollama:11434
|
||||
QDRANT_SLEEP: 30
|
||||
QDRANT_BATCH: 100
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
qdrant:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
|
||||
# ==================================================================================
|
||||
# NER WORKER (Python) - Extracción de entidades
|
||||
# ==================================================================================
|
||||
ner:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_ner
|
||||
command: bash -lc "python -m workers.ner_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
NER_LANG: es
|
||||
NER_BATCH: 64
|
||||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
- ./hf_cache:/app/hf_cache
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
|
||||
# ==================================================================================
|
||||
# CLUSTER WORKER (Python) - Agrupación de noticias
|
||||
# ==================================================================================
|
||||
cluster:
|
||||
build:
|
||||
context: .
|
||||
|
|
@ -528,34 +597,8 @@ services:
|
|||
EVENT_DIST_THRESHOLD: 0.35
|
||||
EMB_MODEL: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 2G
|
||||
|
||||
ner:
|
||||
build: .
|
||||
container_name: rss2_ner
|
||||
command: bash -lc "python -m workers.ner_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
NER_LANG: es
|
||||
NER_BATCH: 64
|
||||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./hf_cache:/app/hf_cache
|
||||
- ./workers:/app/workers
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
|
|
@ -568,33 +611,13 @@ services:
|
|||
cpus: '2'
|
||||
memory: 2G
|
||||
|
||||
topics:
|
||||
# ==================================================================================
|
||||
# LLM CATEGORIZER (Python) - Categorización con Ollama
|
||||
# ==================================================================================
|
||||
llm-categorizer:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_topics_worker
|
||||
command: bash -lc "python -m workers.topics_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
TZ: Europe/Madrid
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1'
|
||||
memory: 1G
|
||||
|
||||
llm-categorizer:
|
||||
build: .
|
||||
container_name: rss2_llm_categorizer
|
||||
command: bash -lc "python -m workers.simple_categorizer_worker"
|
||||
environment:
|
||||
|
|
@ -606,6 +629,8 @@ services:
|
|||
CATEGORIZER_BATCH_SIZE: 10
|
||||
CATEGORIZER_SLEEP_IDLE: 5
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./workers:/app/workers
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
|
|
@ -618,72 +643,6 @@ services:
|
|||
cpus: '2'
|
||||
memory: 1G
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
container_name: rss2_qdrant
|
||||
environment:
|
||||
TZ: Europe/Madrid
|
||||
QDRANT__SERVICE__GRPC_PORT: 6334
|
||||
# SEGURIDAD: Puertos NO expuestos - solo acceso interno
|
||||
# ports:
|
||||
# - "6333:6333"
|
||||
# - "6334:6334"
|
||||
volumes:
|
||||
- ./qdrant_storage:/qdrant/storage
|
||||
- /etc/timezone:/etc/timezone:ro
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
networks:
|
||||
- backend
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '4'
|
||||
memory: 4G
|
||||
reservations:
|
||||
memory: 2G
|
||||
|
||||
qdrant-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
container_name: rss2_qdrant_worker
|
||||
command: bash -lc "python -m workers.qdrant_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
DB_PORT: 5432
|
||||
DB_NAME: ${DB_NAME:-rss}
|
||||
DB_USER: ${DB_USER:-rss}
|
||||
DB_PASS: ${DB_PASS}
|
||||
DB_READ_HOST: db
|
||||
DB_WRITE_HOST: db
|
||||
QDRANT_HOST: qdrant
|
||||
QDRANT_PORT: 6333
|
||||
QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-news_vectors}
|
||||
EMB_MODEL: ${EMB_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
|
||||
EMB_DEVICE: cpu
|
||||
QDRANT_BATCH_SIZE: ${QDRANT_BATCH_SIZE:-100}
|
||||
QDRANT_SLEEP_IDLE: ${QDRANT_SLEEP_IDLE:-30}
|
||||
HF_HOME: /app/hf_cache
|
||||
TZ: Europe/Madrid
|
||||
volumes:
|
||||
- ./hf_cache:/app/hf_cache
|
||||
networks:
|
||||
- backend
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
# db-replica:
|
||||
# condition: service_healthy
|
||||
qdrant:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 4G
|
||||
|
||||
# ==================================================================================
|
||||
# MONITORING STACK - SECURED
|
||||
# ==================================================================================
|
||||
|
|
|
|||
42
docker-entrypoint-db.sh
Executable file
42
docker-entrypoint-db.sh
Executable file
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Detectar si la base de datos necesita reinicialización
|
||||
PGDATA_DIR="/var/lib/postgresql/data/18/main"
|
||||
|
||||
echo "RSS2: Checking database integrity..."
|
||||
|
||||
# Si no existe el archivo de versión, es una base de datos nueva
|
||||
if [ ! -f "$PGDATA_DIR/PG_VERSION" ]; then
|
||||
echo "RSS2: New database - will be initialized by docker-entrypoint"
|
||||
else
|
||||
# Verificar si la base de datos es funcional
|
||||
if ! pg_isready -h localhost -p 5432 -U "${POSTGRES_USER:-rss}" 2>/dev/null; then
|
||||
echo "RSS2: Database appears corrupted - removing old data files for fresh initialization..."
|
||||
# Eliminar solo los archivos de datos, no todo el directorio
|
||||
rm -rf "$PGDATA_DIR"/*
|
||||
echo "RSS2: Data files removed - docker-entrypoint will initialize fresh database"
|
||||
else
|
||||
echo "RSS2: Database is healthy"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Ejecutar el entrypoint original con los parámetros de PostgreSQL
|
||||
exec docker-entrypoint.sh \
|
||||
postgres \
|
||||
-c max_connections=200 \
|
||||
-c shared_buffers=4GB \
|
||||
-c effective_cache_size=12GB \
|
||||
-c work_mem=16MB \
|
||||
-c maintenance_work_mem=512MB \
|
||||
-c autovacuum_max_workers=3 \
|
||||
-c autovacuum_vacuum_scale_factor=0.02 \
|
||||
-c autovacuum_vacuum_cost_limit=1000 \
|
||||
-c max_worker_processes=8 \
|
||||
-c max_parallel_workers=6 \
|
||||
-c max_parallel_workers_per_gather=2 \
|
||||
-c wal_level=replica \
|
||||
-c max_wal_senders=5 \
|
||||
-c wal_keep_size=1GB \
|
||||
-c hot_standby=on \
|
||||
"$@"
|
||||
|
|
@ -1,391 +0,0 @@
|
|||
# Sistema de Descubrimiento y Gestión de Feeds RSS
|
||||
|
||||
Este documento describe el sistema mejorado de descubrimiento automático y gestión de feeds RSS implementado en RSS2.
|
||||
|
||||
## 📋 Visión General
|
||||
|
||||
El sistema ahora incluye dos mecanismos para gestionar feeds RSS:
|
||||
|
||||
1. **Gestión Manual Mejorada**: Interfaz web para descubrir y añadir feeds desde cualquier URL
|
||||
2. **Worker Automático**: Proceso en segundo plano que descubre feeds desde URLs almacenadas
|
||||
|
||||
## 🎯 Componentes del Sistema
|
||||
|
||||
### 1. Utilidad de Descubrimiento (`utils/feed_discovery.py`)
|
||||
|
||||
Módulo Python que proporciona funciones para:
|
||||
|
||||
- **`discover_feeds(url)`**: Descubre automáticamente todos los feeds RSS/Atom desde una URL
|
||||
- **`validate_feed(feed_url)`**: Valida un feed y extrae su información básica
|
||||
- **`get_feed_metadata(feed_url)`**: Obtiene metadatos detallados de un feed
|
||||
|
||||
```python
|
||||
from utils.feed_discovery import discover_feeds
|
||||
|
||||
# Descubrir feeds desde una URL
|
||||
feeds = discover_feeds('https://elpais.com')
|
||||
# Retorna: [{'url': '...', 'title': '...', 'valid': True, ...}, ...]
|
||||
```
|
||||
|
||||
### 2. Router de Feeds Mejorado (`routers/feeds.py`)
|
||||
|
||||
Nuevos endpoints añadidos:
|
||||
|
||||
#### Interfaz Web
|
||||
- **`GET/POST /feeds/discover`**: Interfaz para descubrir feeds desde una URL
|
||||
- Muestra todos los feeds encontrados
|
||||
- Permite seleccionar cuáles añadir
|
||||
- Aplica configuración global (categoría, país, idioma)
|
||||
|
||||
- **`POST /feeds/discover_and_add`**: Añade múltiples feeds seleccionados
|
||||
- Extrae automáticamente título y descripción
|
||||
- Evita duplicados
|
||||
- Muestra estadísticas de feeds añadidos
|
||||
|
||||
#### API JSON
|
||||
- **`POST /feeds/api/discover`**: API para descubrir feeds
|
||||
```json
|
||||
{
|
||||
"url": "https://example.com"
|
||||
}
|
||||
```
|
||||
Retorna:
|
||||
```json
|
||||
{
|
||||
"feeds": [...],
|
||||
"count": 5
|
||||
}
|
||||
```
|
||||
|
||||
- **`POST /feeds/api/validate`**: API para validar un feed específico
|
||||
```json
|
||||
{
|
||||
"url": "https://example.com/rss"
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Worker de Descubrimiento (`workers/url_discovery_worker.py`)
|
||||
|
||||
Worker automático que:
|
||||
|
||||
1. **Procesa URLs de la tabla `fuentes_url`**
|
||||
- Prioriza URLs nunca procesadas
|
||||
- Reintenta URLs con errores
|
||||
- Actualiza URLs antiguas
|
||||
|
||||
2. **Descubre y Crea Feeds Automáticamente**
|
||||
- Encuentra todos los feeds RSS en cada URL
|
||||
- Valida cada feed encontrado
|
||||
- Crea entradas en la tabla `feeds`
|
||||
- Evita duplicados
|
||||
|
||||
3. **Gestión de Estado**
|
||||
- Actualiza `last_check`, `last_status`, `status_message`
|
||||
- Maneja errores gracefully
|
||||
- Registra estadísticas detalladas
|
||||
|
||||
#### Configuración del Worker
|
||||
|
||||
Variables de entorno:
|
||||
|
||||
```bash
|
||||
# Intervalo de ejecución (minutos)
|
||||
URL_DISCOVERY_INTERVAL_MIN=15
|
||||
|
||||
# Número de URLs a procesar por ciclo
|
||||
URL_DISCOVERY_BATCH_SIZE=10
|
||||
|
||||
# Máximo de feeds a crear por URL
|
||||
MAX_FEEDS_PER_URL=5
|
||||
```
|
||||
|
||||
#### Estados de URLs en `fuentes_url`
|
||||
|
||||
| Estado | Descripción |
|
||||
|--------|-------------|
|
||||
| `success` | Feeds creados exitosamente |
|
||||
| `existing` | Feeds encontrados pero ya existían |
|
||||
| `no_feeds` | No se encontraron feeds RSS |
|
||||
| `no_valid_feeds` | Se encontraron feeds pero ninguno válido |
|
||||
| `error` | Error al procesar la URL |
|
||||
|
||||
## 🚀 Uso del Sistema
|
||||
|
||||
### Método 1: Interfaz Web Manual
|
||||
|
||||
1. **Navega a `/feeds/discover`**
|
||||
2. **Ingresa una URL** (ej: `https://elpais.com`)
|
||||
3. **Haz clic en "Buscar Feeds"**
|
||||
4. El sistema mostrará todos los feeds encontrados con:
|
||||
- Estado de validación
|
||||
- Título y descripción
|
||||
- Número de entradas
|
||||
- Tipo de feed (RSS/Atom)
|
||||
5. **Configura opciones globales**:
|
||||
- Categoría
|
||||
- País
|
||||
- Idioma
|
||||
6. **Selecciona los feeds deseados** y haz clic en "Añadir Feeds Seleccionados"
|
||||
|
||||
### Método 2: Worker Automático
|
||||
|
||||
1. **Añade URLs a la tabla `fuentes_url`**:
|
||||
```sql
|
||||
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma, active)
|
||||
VALUES ('El País', 'https://elpais.com', 1, 1, 'es', TRUE);
|
||||
```
|
||||
|
||||
2. **El worker procesará automáticamente**:
|
||||
- Cada 15 minutos (configurable)
|
||||
- Descubrirá todos los feeds
|
||||
- Creará entradas en `feeds`
|
||||
- Actualizará el estado
|
||||
|
||||
3. **Monitorea el progreso**:
|
||||
```sql
|
||||
SELECT nombre, url, last_check, last_status, status_message
|
||||
FROM fuentes_url
|
||||
ORDER BY last_check DESC;
|
||||
```
|
||||
|
||||
### Método 3: Interfaz de URLs (Existente)
|
||||
|
||||
Usa la interfaz web existente en `/urls/add_source` para añadir URLs que serán procesadas por el worker.
|
||||
|
||||
## 🔄 Flujo de Trabajo Completo
|
||||
|
||||
```
|
||||
┌─────────────────┐
|
||||
│ Usuario añade │
|
||||
│ URL del sitio │
|
||||
└────────┬────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ URL guardada en │
|
||||
│ tabla fuentes_url │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Worker ejecuta cada │
|
||||
│ 15 minutos │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Descubre feeds RSS │
|
||||
│ usando feedfinder2 │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Valida cada feed │
|
||||
│ encontrado │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Crea entradas en │
|
||||
│ tabla feeds │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Ingestor Go procesa │
|
||||
│ feeds cada 15 minutos │
|
||||
└────────┬────────────────┘
|
||||
│
|
||||
v
|
||||
┌─────────────────────────┐
|
||||
│ Noticias descargadas │
|
||||
│ y procesadas │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## 📊 Tablas de Base de Datos
|
||||
|
||||
### `fuentes_url`
|
||||
Almacena URLs de sitios web para descubrimiento automático:
|
||||
|
||||
```sql
|
||||
CREATE TABLE fuentes_url (
|
||||
id SERIAL PRIMARY KEY,
|
||||
nombre VARCHAR(255) NOT NULL,
|
||||
url TEXT NOT NULL UNIQUE,
|
||||
categoria_id INTEGER REFERENCES categorias(id),
|
||||
pais_id INTEGER REFERENCES paises(id),
|
||||
idioma CHAR(2) DEFAULT 'es',
|
||||
last_check TIMESTAMP,
|
||||
last_status VARCHAR(50),
|
||||
status_message TEXT,
|
||||
last_http_code INTEGER,
|
||||
active BOOLEAN DEFAULT TRUE
|
||||
);
|
||||
```
|
||||
|
||||
### `feeds`
|
||||
Almacena feeds RSS descubiertos y validados:
|
||||
|
||||
```sql
|
||||
CREATE TABLE feeds (
|
||||
id SERIAL PRIMARY KEY,
|
||||
nombre VARCHAR(255),
|
||||
descripcion TEXT,
|
||||
url TEXT NOT NULL UNIQUE,
|
||||
categoria_id INTEGER REFERENCES categorias(id),
|
||||
pais_id INTEGER REFERENCES paises(id),
|
||||
idioma CHAR(2),
|
||||
activo BOOLEAN DEFAULT TRUE,
|
||||
fallos INTEGER DEFAULT 0,
|
||||
last_etag TEXT,
|
||||
last_modified TEXT,
|
||||
last_error TEXT
|
||||
);
|
||||
```
|
||||
|
||||
## ⚙️ Configuración del Sistema
|
||||
|
||||
### Variables de Entorno
|
||||
|
||||
Añade al archivo `.env`:
|
||||
|
||||
```bash
|
||||
# RSS Ingestor
|
||||
RSS_POKE_INTERVAL_MIN=15 # Intervalo de ingesta (minutos)
|
||||
RSS_MAX_FAILURES=10 # Fallos máximos antes de desactivar feed
|
||||
RSS_FEED_TIMEOUT=60 # Timeout para descargar feeds (segundos)
|
||||
|
||||
# URL Discovery Worker
|
||||
URL_DISCOVERY_INTERVAL_MIN=15 # Intervalo de descubrimiento (minutos)
|
||||
URL_DISCOVERY_BATCH_SIZE=10 # URLs a procesar por ciclo
|
||||
MAX_FEEDS_PER_URL=5 # Máximo de feeds por URL
|
||||
```
|
||||
|
||||
### Docker Compose
|
||||
|
||||
El worker ya está configurado en `docker-compose.yml`:
|
||||
|
||||
```yaml
|
||||
url-discovery-worker:
|
||||
build: .
|
||||
container_name: rss2_url_discovery
|
||||
command: bash -lc "python -m workers.url_discovery_worker"
|
||||
environment:
|
||||
DB_HOST: db
|
||||
URL_DISCOVERY_INTERVAL_MIN: 15
|
||||
URL_DISCOVERY_BATCH_SIZE: 10
|
||||
MAX_FEEDS_PER_URL: 5
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
## 🔧 Comandos Útiles
|
||||
|
||||
### Ver logs del worker de descubrimiento
|
||||
```bash
|
||||
docker logs -f rss2_url_discovery
|
||||
```
|
||||
|
||||
### Reiniciar el worker
|
||||
```bash
|
||||
docker restart rss2_url_discovery
|
||||
```
|
||||
|
||||
### Ejecutar manualmente el worker (testing)
|
||||
```bash
|
||||
docker exec -it rss2_url_discovery python -m workers.url_discovery_worker
|
||||
```
|
||||
|
||||
### Ver estadísticas de descubrimiento
|
||||
```sql
|
||||
-- URLs procesadas recientemente
|
||||
SELECT nombre, url, last_check, last_status, status_message
|
||||
FROM fuentes_url
|
||||
WHERE last_check > NOW() - INTERVAL '1 day'
|
||||
ORDER BY last_check DESC;
|
||||
|
||||
-- Feeds creados recientemente
|
||||
SELECT nombre, url, fecha_creacion
|
||||
FROM feeds
|
||||
WHERE fecha_creacion > NOW() - INTERVAL '1 day'
|
||||
ORDER BY fecha_creacion DESC;
|
||||
```
|
||||
|
||||
## 🛠️ Troubleshooting
|
||||
|
||||
### El worker no encuentra feeds
|
||||
|
||||
1. Verifica que la URL sea accesible:
|
||||
```bash
|
||||
curl -I https://example.com
|
||||
```
|
||||
|
||||
2. Verifica los logs del worker:
|
||||
```bash
|
||||
docker logs rss2_url_discovery
|
||||
```
|
||||
|
||||
3. Prueba manualmente el descubrimiento:
|
||||
```python
|
||||
from utils.feed_discovery import discover_feeds
|
||||
feeds = discover_feeds('https://example.com')
|
||||
print(feeds)
|
||||
```
|
||||
|
||||
### Feeds duplicados
|
||||
|
||||
El sistema previene duplicados usando `ON CONFLICT (url) DO NOTHING`. Si un feed ya existe, simplemente se omite.
|
||||
|
||||
### Worker consume muchos recursos
|
||||
|
||||
Ajusta las configuraciones:
|
||||
|
||||
```bash
|
||||
# Reduce el batch size
|
||||
URL_DISCOVERY_BATCH_SIZE=5
|
||||
|
||||
# Aumenta el intervalo
|
||||
URL_DISCOVERY_INTERVAL_MIN=30
|
||||
|
||||
# Reduce feeds por URL
|
||||
MAX_FEEDS_PER_URL=3
|
||||
```
|
||||
|
||||
## 📝 Mejores Prácticas
|
||||
|
||||
1. **Añade URLs de sitios, no feeds directos**
|
||||
- ✅ `https://elpais.com`
|
||||
- ❌ `https://elpais.com/rss/feed.xml`
|
||||
|
||||
2. **Configura categoría y país correctamente**
|
||||
- Facilita la organización
|
||||
- Mejora la experiencia del usuario
|
||||
|
||||
3. **Monitorea el estado de las URLs**
|
||||
- Revisa periódicamente `fuentes_url`
|
||||
- Desactiva URLs que fallan consistentemente
|
||||
|
||||
4. **Limita el número de feeds por URL**
|
||||
- Evita sobrecarga de feeds similares
|
||||
- Mantén `MAX_FEEDS_PER_URL` entre 3-5
|
||||
|
||||
## 🎉 Ventajas del Sistema
|
||||
|
||||
✅ **Automatización completa**: Solo añade URLs, el sistema hace el resto
|
||||
✅ **Descubrimiento inteligente**: Encuentra todos los feeds disponibles
|
||||
✅ **Validación automática**: Solo crea feeds válidos y funcionales
|
||||
✅ **Sin duplicados**: Gestión inteligente de feeds existentes
|
||||
✅ **Escalable**: Procesa múltiples URLs en lotes
|
||||
✅ **Resiliente**: Manejo robusto de errores y reintentos
|
||||
✅ **Monitoreable**: Logs detallados y estados claros
|
||||
|
||||
## 📚 Referencias
|
||||
|
||||
- **feedfinder2**: https://github.com/dfm/feedfinder2
|
||||
- **feedparser**: https://feedparser.readthedocs.io/
|
||||
- **Tabla fuentes_url**: `/init-db/01.schema.sql`
|
||||
- **Worker**: `/workers/url_discovery_worker.py`
|
||||
- **Utilidades**: `/utils/feed_discovery.py`
|
||||
|
|
@ -1,370 +0,0 @@
|
|||
# Sistema de Categorización Automática con LLM
|
||||
|
||||
## Descripción
|
||||
|
||||
Este sistema utiliza **ExLlamaV2** con un modelo de lenguaje local (LLM) para categorizar automáticamente las noticias del feed RSS.
|
||||
|
||||
### ¿Qué hace?
|
||||
|
||||
1. **Recopila 10 noticias** sin categorizar de la base de datos
|
||||
2. **Envía al LLM local** con un prompt especializado
|
||||
3. **El LLM discrimina/categoriza** cada noticia en una de las categorías predefinidas
|
||||
4. **Actualiza la base de datos** con las categorías asignadas
|
||||
|
||||
### Ventajas
|
||||
|
||||
- ✅ **100% Local**: No envía datos a APIs externas
|
||||
- ✅ **Optimizado para RTX 3060 12GB**: Modelos cuantizados eficientes
|
||||
- ✅ **Categorización inteligente**: Entiende contexto, no solo keywords
|
||||
- ✅ **Escalable**: Procesa lotes de 10 noticias automáticamente
|
||||
- ✅ **Integrado**: Se ejecuta como un worker más del sistema
|
||||
|
||||
---
|
||||
|
||||
## Instalación
|
||||
|
||||
### Paso 1: Descargar el Modelo
|
||||
|
||||
El sistema necesita un modelo LLM compatible. Recomendamos **Mistral-7B-Instruct GPTQ** para RTX 3060 12GB.
|
||||
|
||||
```bash
|
||||
# Ejecutar el script de descarga
|
||||
./scripts/download_llm_model.sh
|
||||
```
|
||||
|
||||
El script te mostrará opciones:
|
||||
1. **Mistral-7B-Instruct-v0.2 (GPTQ)** - RECOMENDADO
|
||||
2. Mistral-7B-Instruct-v0.2 (EXL2)
|
||||
3. OpenHermes-2.5-Mistral-7B (GPTQ)
|
||||
4. Neural-Chat-7B (GPTQ)
|
||||
|
||||
**Tiempo estimado de descarga**: 10-30 minutos (según conexión)
|
||||
|
||||
**Espacio en disco**: ~4.5 GB
|
||||
|
||||
### Paso 2: Verificar la instalación
|
||||
|
||||
```bash
|
||||
# Verificar que el modelo se descargó correctamente
|
||||
ls -lh models/llm/
|
||||
|
||||
# Deberías ver archivos como:
|
||||
# - model.safetensors o *.safetensors
|
||||
# - config.json
|
||||
# - tokenizer.json
|
||||
# - etc.
|
||||
```
|
||||
|
||||
### Paso 3: Probar el sistema (opcional)
|
||||
|
||||
Antes de levantar el contenedor, puedes probar que funciona:
|
||||
|
||||
```bash
|
||||
# Instalar dependencias localmente (solo para prueba)
|
||||
pip3 install exllamav2 torch
|
||||
|
||||
# Ejecutar script de prueba
|
||||
python3 scripts/test_llm_categorizer.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Uso
|
||||
|
||||
### Iniciar el servicio
|
||||
|
||||
```bash
|
||||
# Construir y levantar el contenedor
|
||||
docker compose up -d llm-categorizer
|
||||
|
||||
# Ver logs en tiempo real
|
||||
docker compose logs -f llm-categorizer
|
||||
```
|
||||
|
||||
### Verificar funcionamiento
|
||||
|
||||
```bash
|
||||
# Ver estado del contenedor
|
||||
docker compose ps llm-categorizer
|
||||
|
||||
# Ver últimas 50 líneas de log
|
||||
docker compose logs --tail=50 llm-categorizer
|
||||
|
||||
# Ver categorías asignadas en la base de datos
|
||||
docker exec -it rss2_db psql -U rss -d rss -c \
|
||||
"SELECT llm_categoria, COUNT(*) FROM noticias WHERE llm_processed = TRUE GROUP BY llm_categoria;"
|
||||
```
|
||||
|
||||
### Detener el servicio
|
||||
|
||||
```bash
|
||||
docker compose stop llm-categorizer
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuración
|
||||
|
||||
### Variables de Entorno
|
||||
|
||||
Puedes ajustar el comportamiento editando `docker-compose.yml`:
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
# Número de noticias a procesar por lote (default: 10)
|
||||
LLM_BATCH_SIZE: 10
|
||||
|
||||
# Tiempo de espera cuando no hay noticias (segundos, default: 30)
|
||||
LLM_SLEEP_IDLE: 30
|
||||
|
||||
# Longitud máxima de contexto (default: 4096)
|
||||
LLM_MAX_SEQ_LEN: 4096
|
||||
|
||||
# Modo de caché: FP16 o Q4 (default: FP16)
|
||||
# Q4 usa menos VRAM pero puede ser más lento
|
||||
LLM_CACHE_MODE: FP16
|
||||
|
||||
# Distribución de GPU: "auto" para single GPU
|
||||
LLM_GPU_SPLIT: auto
|
||||
```
|
||||
|
||||
### Categorías
|
||||
|
||||
Las categorías están definidas en `workers/llm_categorizer_worker.py`:
|
||||
|
||||
```python
|
||||
CATEGORIES = [
|
||||
"Política",
|
||||
"Economía",
|
||||
"Tecnología",
|
||||
"Ciencia",
|
||||
"Salud",
|
||||
"Deportes",
|
||||
"Entretenimiento",
|
||||
"Internacional",
|
||||
"Nacional",
|
||||
"Sociedad",
|
||||
"Cultura",
|
||||
"Medio Ambiente",
|
||||
"Educación",
|
||||
"Seguridad",
|
||||
"Otros"
|
||||
]
|
||||
```
|
||||
|
||||
Para modificarlas, edita el archivo y reconstruye el contenedor:
|
||||
|
||||
```bash
|
||||
docker compose up -d --build llm-categorizer
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Base de Datos
|
||||
|
||||
### Nuevas columnas en `noticias`
|
||||
|
||||
El worker añade automáticamente estas columnas:
|
||||
|
||||
- `llm_categoria` (VARCHAR): Categoría asignada
|
||||
- `llm_confianza` (FLOAT): Nivel de confianza (0.0 - 1.0)
|
||||
- `llm_processed` (BOOLEAN): Si ya fue procesada
|
||||
- `llm_processed_at` (TIMESTAMP): Fecha de procesamiento
|
||||
|
||||
### Consultas útiles
|
||||
|
||||
```sql
|
||||
-- Ver distribución de categorías
|
||||
SELECT llm_categoria, COUNT(*) as total, AVG(llm_confianza) as confianza_media
|
||||
FROM noticias
|
||||
WHERE llm_processed = TRUE
|
||||
GROUP BY llm_categoria
|
||||
ORDER BY total DESC;
|
||||
|
||||
-- Ver noticias de una categoría específica
|
||||
SELECT id, titulo, llm_categoria, llm_confianza, fecha
|
||||
FROM noticias
|
||||
WHERE llm_categoria = 'Tecnología'
|
||||
AND llm_processed = TRUE
|
||||
ORDER BY fecha DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Ver noticias con baja confianza (revisar manualmente)
|
||||
SELECT id, titulo, llm_categoria, llm_confianza
|
||||
FROM noticias
|
||||
WHERE llm_processed = TRUE
|
||||
AND llm_confianza < 0.6
|
||||
ORDER BY llm_confianza ASC
|
||||
LIMIT 20;
|
||||
|
||||
-- Resetear procesamiento (para reprocesar)
|
||||
UPDATE noticias SET llm_processed = FALSE WHERE llm_categoria = 'Otros';
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Monitorización
|
||||
|
||||
### Prometheus/Grafana
|
||||
|
||||
El worker está integrado con el stack de monitorización. Puedes ver:
|
||||
|
||||
- Uso de GPU (VRAM)
|
||||
- Tiempo de procesamiento por lote
|
||||
- Tasa de categorización
|
||||
|
||||
Accede a Grafana: http://localhost:3001
|
||||
|
||||
### Logs
|
||||
|
||||
```bash
|
||||
# Ver logs en tiempo real
|
||||
docker compose logs -f llm-categorizer
|
||||
|
||||
# Buscar errores
|
||||
docker compose logs llm-categorizer | grep ERROR
|
||||
|
||||
# Ver estadísticas de categorización
|
||||
docker compose logs llm-categorizer | grep "Distribución"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Error: "Out of memory"
|
||||
|
||||
**Causa**: El modelo es demasiado grande para tu GPU.
|
||||
|
||||
**Solución**:
|
||||
1. Usa un modelo más pequeño (ej: EXL2 con menor bpw)
|
||||
2. Reduce el batch size: `LLM_BATCH_SIZE: 5`
|
||||
3. Usa cache Q4 en lugar de FP16: `LLM_CACHE_MODE: Q4`
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
LLM_BATCH_SIZE: 5
|
||||
LLM_CACHE_MODE: Q4
|
||||
```
|
||||
|
||||
### Error: "Model not found"
|
||||
|
||||
**Causa**: El modelo no se descargó correctamente.
|
||||
|
||||
**Solución**:
|
||||
```bash
|
||||
# Verificar directorio
|
||||
ls -la models/llm/
|
||||
|
||||
# Debería contener config.json y archivos .safetensors
|
||||
# Si está vacío, ejecutar de nuevo:
|
||||
./scripts/download_llm_model.sh
|
||||
```
|
||||
|
||||
### El worker no procesa noticias
|
||||
|
||||
**Causa**: Posiblemente ya están todas procesadas.
|
||||
|
||||
**Solución**:
|
||||
```bash
|
||||
# Verificar cuántas noticias faltan
|
||||
docker exec -it rss2_db psql -U rss -d rss -c \
|
||||
"SELECT COUNT(*) FROM noticias WHERE llm_processed = FALSE;"
|
||||
|
||||
# Si es 0, resetear algunas para probar
|
||||
docker exec -it rss2_db psql -U rss -d rss -c \
|
||||
"UPDATE noticias SET llm_processed = FALSE WHERE id IN (SELECT id FROM noticias ORDER BY fecha DESC LIMIT 20);"
|
||||
```
|
||||
|
||||
### Categorización incorrecta
|
||||
|
||||
**Causa**: El prompt puede necesitar ajustes o el modelo no es adecuado.
|
||||
|
||||
**Soluciones**:
|
||||
1. Ajustar el prompt en `workers/llm_categorizer_worker.py` (método `_build_prompt`)
|
||||
2. Probar un modelo diferente (ej: OpenHermes es mejor generalista)
|
||||
3. Ajustar la temperatura (más baja = más determinista):
|
||||
|
||||
```python
|
||||
self.settings.temperature = 0.05 # Muy determinista
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rendimiento
|
||||
|
||||
### RTX 3060 12GB
|
||||
|
||||
- **Modelo recomendado**: Mistral-7B-Instruct GPTQ 4-bit
|
||||
- **VRAM utilizada**: ~6-7 GB
|
||||
- **Tiempo por noticia**: ~2-5 segundos
|
||||
- **Throughput**: ~120-300 noticias/hora
|
||||
|
||||
### Optimizaciones
|
||||
|
||||
Para mejorar el rendimiento:
|
||||
|
||||
1. **Aumentar batch size** (si sobra VRAM):
|
||||
```yaml
|
||||
LLM_BATCH_SIZE: 20
|
||||
```
|
||||
|
||||
2. **Cache Q4** (menos VRAM, ligeramente más lento):
|
||||
```yaml
|
||||
LLM_CACHE_MODE: Q4
|
||||
```
|
||||
|
||||
3. **Modelo EXL2 optimizado**:
|
||||
- Usar Mistral EXL2 4.0bpw
|
||||
- Es más rápido que GPTQ en ExLlamaV2
|
||||
|
||||
---
|
||||
|
||||
## Integración con la Web
|
||||
|
||||
Para mostrar las categorías en la interfaz web, modifica `routers/search.py` o crea una nueva vista:
|
||||
|
||||
```python
|
||||
# Ejemplo de endpoint para estadísticas
|
||||
@app.route('/api/categories/stats')
|
||||
def category_stats():
|
||||
query = """
|
||||
SELECT llm_categoria, COUNT(*) as total
|
||||
FROM noticias
|
||||
WHERE llm_processed = TRUE
|
||||
GROUP BY llm_categoria
|
||||
ORDER BY total DESC
|
||||
"""
|
||||
# ... ejecutar query y devolver JSON
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Roadmap
|
||||
|
||||
Posibles mejoras futuras:
|
||||
|
||||
- [ ] Subcategorías automáticas
|
||||
- [ ] Detección de temas trending
|
||||
- [ ] Resúmenes automáticos por categoría
|
||||
- [ ] Alertas personalizadas por categoría
|
||||
- [ ] API REST para categorización bajo demanda
|
||||
- [ ] Fine-tuning del modelo con feedback de usuario
|
||||
|
||||
---
|
||||
|
||||
## Soporte
|
||||
|
||||
Para problemas o preguntas:
|
||||
|
||||
1. Revisar logs: `docker compose logs llm-categorizer`
|
||||
2. Verificar GPU: `nvidia-smi`
|
||||
3. Consultar documentación de ExLlamaV2: https://github.com/turboderp/exllamav2
|
||||
|
||||
---
|
||||
|
||||
## Licencia
|
||||
|
||||
Este componente se distribuye bajo la misma licencia que el proyecto principal RSS2.
|
||||
|
||||
Los modelos LLM tienen sus propias licencias (generalmente Apache 2.0 o MIT para los recomendados).
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
# 🎬 Sistema de Parrillas de Videos Automatizados
|
||||
|
||||
## 📋 Descripción
|
||||
|
||||
Este sistema permite generar videos automáticos de noticias filtradas según diferentes criterios:
|
||||
- **Por País**: Noticias de Bulgaria, España, Estados Unidos, etc.
|
||||
- **Por Categoría**: Ciencia, Tecnología, Deport, Política, etc.
|
||||
- **Por Entidad**: Personas u organizaciones específicas (ej: "Donald Trump", "OpenAI")
|
||||
- **Por Continente**: Europa, Asia, América, etc.
|
||||
|
||||
## 🎯 Características
|
||||
|
||||
### ✅ Sistema Implementado
|
||||
|
||||
1. **Base de Datos**
|
||||
- Tabla `video_parrillas`: Configuraciones de parrillas
|
||||
- Tabla `video_generados`: Registro de videos creados
|
||||
- Tabla `video_noticias`: Relación entre videos y noticias
|
||||
|
||||
2. **API REST**
|
||||
- `GET /parrillas/` - Listado de parrillas
|
||||
- `GET /parrillas/<id>` - Detalle de parrilla
|
||||
- `POST /parrillas/nueva` - Crear parrilla
|
||||
- `GET /parrillas/api/<id>/preview` - Preview de noticias
|
||||
- `POST /parrillas/api/<id>/generar` - Generar video
|
||||
- `POST /parrillas/api/<id>/toggle` - Activar/desactivar
|
||||
- `DELETE /parrillas/api/<id>` - Eliminar parrilla
|
||||
|
||||
3. **Generador de Videos**
|
||||
- Script: `generar_videos_noticias.py`
|
||||
- Integración con AllTalk TTS
|
||||
- Generación automática de subtítulos SRT
|
||||
- Soporte para múltiples idiomas
|
||||
|
||||
## 🚀 Uso Rápido
|
||||
|
||||
### 1. Crear una Parrilla
|
||||
|
||||
```bash
|
||||
# Acceder a la interfaz web
|
||||
http://localhost:8001/parrillas/
|
||||
|
||||
# O usar SQL directo
|
||||
docker-compose exec -T db psql -U rss -d rss -c "
|
||||
INSERT INTO video_parrillas (nombre, descripcion, tipo_filtro, pais_id, max_noticias, frecuencia, activo)
|
||||
VALUES ('Noticias de Bulgaria', 'Resumen diario de noticias de Bulgaria', 'pais',
|
||||
(SELECT id FROM paises WHERE nombre = 'Bulgaria'), 5, 'daily', true);
|
||||
"
|
||||
```
|
||||
|
||||
### 2. Generar Video Manualmente
|
||||
|
||||
```bash
|
||||
# Generar video para parrilla con ID 1
|
||||
docker-compose exec web python generar_videos_noticias.py 1
|
||||
```
|
||||
|
||||
### 3. Generación Automática (Diaria)
|
||||
|
||||
```bash
|
||||
# Procesar todas las parrillas activas con frecuencia 'daily'
|
||||
docker-compose exec web python generar_videos_noticias.py
|
||||
```
|
||||
|
||||
## 📝 Ejemplos de Parrillas
|
||||
|
||||
### Ejemplo 1: Noticias de Ciencia en Europa
|
||||
|
||||
```sql
|
||||
INSERT INTO video_parrillas (
|
||||
nombre, descripcion, tipo_filtro,
|
||||
categoria_id, continente_id,
|
||||
max_noticias, duracion_maxima, idioma_voz,
|
||||
frecuencia, activo
|
||||
) VALUES (
|
||||
'Ciencia en Europa',
|
||||
'Las últimas noticias científicas de Europa',
|
||||
'categoria',
|
||||
(SELECT id FROM categorias WHERE nombre ILIKE '%ciencia%'),
|
||||
(SELECT id FROM continentes WHERE nombre = 'Europa'),
|
||||
7, 300, 'es',
|
||||
'daily', true
|
||||
);
|
||||
```
|
||||
|
||||
### Ejemplo 2: Noticias sobre una Persona
|
||||
|
||||
```sql
|
||||
INSERT INTO video_parrillas (
|
||||
nombre, descripcion, tipo_filtro,
|
||||
entidad_nombre, entidad_tipo,
|
||||
max_noticias, idioma_voz,
|
||||
frecuencia, activo
|
||||
) VALUES (
|
||||
'Donald Trump en las Noticias',
|
||||
'Todas las menciones de Donald Trump',
|
||||
'entidad',
|
||||
'Donald Trump', 'persona',
|
||||
10, 'es',
|
||||
'daily', true
|
||||
);
|
||||
```
|
||||
|
||||
### Ejemplo 3: Noticias de Tecnología
|
||||
|
||||
```sql
|
||||
INSERT INTO video_parrillas (
|
||||
nombre, descripcion, tipo_filtro,
|
||||
categoria_id,
|
||||
max_noticias, idioma_voz,
|
||||
include_subtitles, template,
|
||||
frecuencia, activo
|
||||
) VALUES (
|
||||
'Tech News Daily',
|
||||
'Resumen diario de tecnología',
|
||||
'categoria',
|
||||
(SELECT id FROM categorias WHERE nombre ILIKE '%tecnolog%'),
|
||||
8, 'es',
|
||||
true, 'modern',
|
||||
'daily', true
|
||||
);
|
||||
```
|
||||
|
||||
## 🔧 Configuración Avanzada
|
||||
|
||||
### Opciones de Parrilla
|
||||
|
||||
| Campo | Tipo | Descripción |
|
||||
|-------|------|-------------|
|
||||
| `nombre` | string | Nombre único de la parrilla |
|
||||
| `descripcion` | text | Descripción detallada |
|
||||
| `tipo_filtro` | enum | 'pais', 'categoria', 'entidad', 'continente', 'custom' |
|
||||
| `pais_id` | int | ID del país (si tipo_filtro='pais') |
|
||||
| `categoria_id` | int | ID de categoría |
|
||||
| `continente_id` | int | ID de continente |
|
||||
| `entidad_nombre` | string | Nombre de persona/organización |
|
||||
| `entidad_tipo` | string | 'persona' o 'organizacion' |
|
||||
| `max_noticias` | int | Máximo de noticias por video (default: 5) |
|
||||
| `duracion_maxima` | int | Duración máxima en segundos (default: 180) |
|
||||
| `idioma_voz` | string | Idioma del TTS ('es', 'en', etc.) |
|
||||
| `template` | string | 'standard', 'modern', 'minimal' |
|
||||
| `include_images` | bool | Incluir imágenes en el video |
|
||||
| `include_subtitles` | bool | Generar subtítulos SRT |
|
||||
| `frecuencia` | string | 'daily', 'weekly', 'manual' |
|
||||
|
||||
### Configuración de AllTalk
|
||||
|
||||
El sistema utiliza AllTalk para generar la narración con voz. Configurar en docker-compose.yml:
|
||||
|
||||
```yaml
|
||||
environment:
|
||||
ALLTALK_URL: http://alltalk:7851
|
||||
```
|
||||
|
||||
## 📊 Estructura de Archivos Generados
|
||||
|
||||
```
|
||||
data/
|
||||
videos/
|
||||
<video_id>/
|
||||
script.txt # Libreto completo del video
|
||||
audio.wav # Audio generado con TTS
|
||||
subtitles.srt # Subtítulos (si enabled)
|
||||
metadata.json # Metadata del video
|
||||
```
|
||||
|
||||
## 🔄 Workflow de Generación
|
||||
|
||||
1. **Consulta de Noticias**: Filtra noticias según criterios de la parrilla
|
||||
2. **Construcción de Script**: Genera libreto narrativo
|
||||
3. **Síntesis de Voz**: Envía texto a AllTalk TTS
|
||||
4. **Generación de Subtítulos**: Crea archivo SRT con timestamps
|
||||
5. **Registro en BD**: Guarda paths y metadata en `video_generados`
|
||||
6. **Relación de Noticias**: Vincula noticias incluidas en `video_noticias`
|
||||
|
||||
## 🎨 Próximas Mejoras
|
||||
|
||||
- [ ] Integración con generador de videos (combinar audio + imágenes)
|
||||
- [ ] Templates visuales personalizados
|
||||
- [ ] Transiciones entre noticias
|
||||
- [ ] Música de fondo
|
||||
- [ ] Logo/branding personalizado
|
||||
- [ ] Exportación directa a YouTube/TikTok
|
||||
- [ ] Programación automática con cron
|
||||
- [ ] Dashboard de analíticas de videos
|
||||
- [ ] Sistema de thumbnails automáticos
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### Error: "No hay noticias disponibles"
|
||||
- Verificar que existan noticias traducidas (`traducciones.status = 'done'`)
|
||||
- Ajustar filtros de la parrilla
|
||||
- Verificar rango de fechas (por defecto últimas 24h)
|
||||
|
||||
### Error en AllTalk TTS
|
||||
- Verificar que el servicio AllTalk esté corriendo
|
||||
- Revisar URL en variable de entorno `ALLTALK_URL`
|
||||
- Comprobar logs: `docker-compose logs alltalk`
|
||||
|
||||
### Video no se genera
|
||||
- Revisar estado en tabla `video_generados`
|
||||
- Ver columna `error_message` si `status = 'error'`
|
||||
- Verificar permisos en directorio `/app/data/videos`
|
||||
|
||||
## 📞 Soporte
|
||||
|
||||
Para problemas o sugerencias, revisar los logs:
|
||||
|
||||
```bash
|
||||
# Logs del generador
|
||||
docker-compose exec web python generar_videos_noticias.py <id> 2>&1 | tee video_generation.log
|
||||
|
||||
# Ver videos en cola
|
||||
docker-compose exec -T db psql -U rss -d rss -c "
|
||||
SELECT id, parrilla_id, titulo, status, fecha_generacion
|
||||
FROM video_generados
|
||||
ORDER BY fecha_generacion DESC LIMIT 10;
|
||||
"
|
||||
```
|
||||
|
||||
## 📄 Licencia
|
||||
|
||||
Este módulo es parte del sistema RSS2 News Aggregator.
|
||||
|
|
@ -1,426 +0,0 @@
|
|||
# 📖 PROCESO COMPLETO: Descubrimiento y Gestión de Feeds RSS
|
||||
|
||||
## 🎯 Problema Resuelto
|
||||
|
||||
**Pregunta:** ¿Cómo asigno país y categoría a los feeds descubiertos automáticamente?
|
||||
|
||||
**Respuesta:** El sistema ahora usa un flujo inteligente de 3 niveles:
|
||||
|
||||
1. **Auto-aprobación** (feeds con categoría/país)
|
||||
2. **Revisión manual** (feeds sin metadata completa)
|
||||
3. **Análisis automático** (sugerencias inteligentes)
|
||||
|
||||
---
|
||||
|
||||
## 🔄 FLUJO COMPLETO DEL SISTEMA
|
||||
|
||||
### Paso 1: Añadir URL Fuente
|
||||
|
||||
Tienes 2 opciones para añadir URLs:
|
||||
|
||||
#### Opción A: Con Categoría y País (AUTO-APROBACIÓN)
|
||||
```sql
|
||||
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma, active)
|
||||
VALUES ('El País', 'https://elpais.com', 1, 44, 'es', TRUE);
|
||||
-- ^ ^
|
||||
-- categoria_id pais_id
|
||||
```
|
||||
|
||||
✅ **Resultado**: Feeds se crean **AUTOMÁTICAMENTE** y se activan
|
||||
- Worker descubre feeds
|
||||
- Hereda categoría (1) y país (44) del padre
|
||||
- Crea feeds en tabla `feeds` directam ente
|
||||
- Ingestor empieza a descargar noticias
|
||||
|
||||
#### Opción B: Sin Categoría o País (REQUIERE REVISIÓN)
|
||||
```sql
|
||||
INSERT INTO fuentes_url (nombre, url, active)
|
||||
VALUES ('BBC News', 'https://www.bbc.com/news', TRUE);
|
||||
-- Sin categoria_id ni pais_id
|
||||
```
|
||||
|
||||
⚠️ **Resultado**: Feeds van a **REVISIÓN MANUAL**
|
||||
- Worker descubre feeds
|
||||
- Analiza automáticamente:
|
||||
- Detecta país desde dominio (.com → Reino Unido)
|
||||
- Detecta idioma (en)
|
||||
- Sugiere categoría ("Internacional")
|
||||
- Crea feeds en tabla `feeds_pending`
|
||||
- **ESPERA APROBACIÓN MANUAL** antes de activar
|
||||
|
||||
---
|
||||
|
||||
### Paso 2: Worker Descubre Feeds (cada 15 min)
|
||||
|
||||
El worker `url_discovery_worker` ejecuta automaticamente:
|
||||
|
||||
```
|
||||
1. Lee fuentes_url activas
|
||||
2. Para cada URL:
|
||||
a. Descubre todos los feeds RSS
|
||||
b. Valida cada feed
|
||||
c. Analiza metadata:
|
||||
- Idioma del feed
|
||||
- País (desde dominio: .es, .uk, .fr, etc.)
|
||||
- Categoría sugerida (keywords en título/descripción)
|
||||
|
||||
d. DECIDE EL FLUJO:
|
||||
|
||||
┌─────────────────────────────────────┐
|
||||
│ ¿Parent tiene categoria_id Y pais_id? │
|
||||
└──────────┬──────────────────────────┘
|
||||
│
|
||||
┌────────┴────────┐
|
||||
│ SÍ │ NO
|
||||
▼ ▼
|
||||
┌──────────────┐ ┌─────────────────┐
|
||||
│ AUTO-APROBAR │ │ REQUIERE REVISIÓN│
|
||||
└───────┬──────┘ └─────────┬───────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
tabla: feeds tabla: feeds_pending
|
||||
activo: TRUE reviewed: FALSE
|
||||
✅ Listo para ⏳ Espera aprobación
|
||||
ingestor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Paso 3A: Feeds AUTO-APROBADOS
|
||||
|
||||
Si la URL padre tiene `categoria_id` y `pais_id`:
|
||||
|
||||
```sql
|
||||
-- Ejemplo: URL con metadata completa
|
||||
fuentes_url:
|
||||
id=1, url='https://elpais.com',
|
||||
categoria_id=1 (Noticias),
|
||||
pais_id=44 (España)
|
||||
|
||||
↓ Worker descubre 3 feeds:
|
||||
- https://elpais.com/rss/portada.xml
|
||||
- https://elpais.com/rss/internacional.xml
|
||||
- https://elpais.com/rss/deportes.xml
|
||||
|
||||
↓ Se crean DIRECTAMENTE en tabla feeds:
|
||||
INSERT INTO feeds (nombre, url, categoria_id, pais_id, activo)
|
||||
VALUES
|
||||
('El País - Portada', 'https://elpais.com/rss/portada.xml', 1, 44, TRUE),
|
||||
('El País - Internacional', 'https://elpais.com/rss/internacional.xml', 1, 44, TRUE),
|
||||
('El País - Deportes', 'https://elpais.com/rss/deportes.xml', 1, 44, TRUE);
|
||||
|
||||
✅ Feeds están ACTIVOS inmediatamente
|
||||
✅ Ingestor Go los procesa en siguiente ciclo (15 min)
|
||||
✅ Noticias empiezan a llegar
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Paso 3B: Feeds PENDIENTES (requieren revisión)
|
||||
|
||||
Si la URL padre NO tiene `categoria_id` o `pais_id`:
|
||||
|
||||
```sql
|
||||
-- Ejemplo: URL sin metadata
|
||||
fuentes_url:
|
||||
id=2, url='https://www.bbc.com/news',
|
||||
categoria_id=NULL,
|
||||
pais_id=NULL
|
||||
|
||||
↓ Worker descubre 2 feeds y ANALIZA automáticamente:
|
||||
|
||||
Feed 1: https://www.bbc.com/news/world/rss.xml
|
||||
- Título: "BBC News - World"
|
||||
- Idioma detectado: 'en'
|
||||
- País detectado: 'Reino Unido' (desde .com + idioma inglés)
|
||||
- Categoría sugerida: 'Internacional' (keyword "world")
|
||||
|
||||
Feed 2: https://www.bbc.com/sport/rss.xml
|
||||
- Título: "BBC Sport"
|
||||
- Idioma detectado: 'en'
|
||||
- País detectado: 'Reino Unido'
|
||||
- Categoría sugerida: 'Deportes' (keyword "sport")
|
||||
|
||||
↓ Se crean en tabla feeds_pending:
|
||||
INSERT INTO feeds_pending (
|
||||
fuente_url_id, feed_url, feed_title,
|
||||
feed_language, detected_country_id, suggested_categoria_id,
|
||||
reviewed, approved, notes
|
||||
) VALUES (
|
||||
2,
|
||||
'https://www.bbc.com/news/world/rss.xml',
|
||||
'BBC News - World',
|
||||
'en',
|
||||
74, -- Reino Unido (ID detectado)
|
||||
2, -- Internacional (ID sugerido)
|
||||
FALSE, FALSE,
|
||||
'Country from domain: Reino Un ido | Suggested category: Internacional (confidence: 85%)'
|
||||
);
|
||||
|
||||
⏳ Feeds están PENDIENTES
|
||||
⏳ NO están activos aún
|
||||
⏳ Requieren revisión manual en /feeds/pending
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 Tabla Comparativa
|
||||
|
||||
| Aspecto | Auto-Aprobación | Revisión Manual |
|
||||
|---------|----------------|-----------------|
|
||||
| **Requisito** | URL padre con `categoria_id` Y `pais_id` | URL padre sin uno o ambos |
|
||||
| **Tabla destino** | `feeds` (directa) | `feeds_pending` (temporal) |
|
||||
| **Estado inicial** | `activo = TRUE` | `reviewed = FALSE, approved = FALSE` |
|
||||
| **Análisis automático** | Hereda valores del padre | Detecta país, sugiere categoría |
|
||||
| **Intervención manual** | ❌ No necesaria | ✅ Requerida |
|
||||
| **Tiempo hasta activación** | Inmediato | Después de aprobación |
|
||||
| **Ingestor procesa** | Sí (próximo ciclo) | No (hasta aprobar) |
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ Interfaces de Gestión
|
||||
|
||||
### 1. Añadir URL con Metadata (Auto-aprobación)
|
||||
|
||||
**Ruta:** `/urls/add_source`
|
||||
|
||||
```
|
||||
Formulario:
|
||||
┌─────────────────────────────────────┐
|
||||
│ Nombre: El País │
|
||||
│ URL: https://elpais.com │
|
||||
│ Categoría: [Noticias ▼] ← IMPORTANTE
|
||||
│ País: [España ▼] ← IMPORTANTE
|
||||
│ Idioma: es │
|
||||
│ │
|
||||
│ [Añadir Fuente] │
|
||||
└─────────────────────────────────────┘
|
||||
|
||||
Resultado: Feeds se crearán AUTOMÁTICAMENTE
|
||||
```
|
||||
|
||||
### 2. Revisar Feeds Pendientes (Nueva interfaz)
|
||||
|
||||
**Ruta:** `/feeds/pending` (próximamente)
|
||||
|
||||
```
|
||||
Feeds Pendientes de Revisión
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
|
||||
Feed: BBC News - World
|
||||
URL: https://www.bbc.com/news/world/rss.xml
|
||||
Fuente: BBC News (https://www.bbc.com/news)
|
||||
|
||||
Análisis Automático:
|
||||
├─ Idioma: English (en)
|
||||
├─ País detectado: Reino Unido (.com domain + language)
|
||||
└─ Categoría sugerida: Internacional (85% confianza)
|
||||
Keywords: "world", "international", "global"
|
||||
|
||||
┌─────────────────────────────────────┐
|
||||
│ Categoría: [Internacional ▼] │ ← Pre-seleccionada
|
||||
│ País: [Reino Unido ▼] │ ← Pre-seleccionado
|
||||
│ Idioma: [en] │ ← Auto-detectado
|
||||
│ │
|
||||
│ [✓ Aprobar Feed] [✗ Rechazar] │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 3. Descubrir Feeds Manualmente
|
||||
|
||||
**Ruta:** `/feeds/discover`
|
||||
|
||||
```
|
||||
Perfecto para cuando quieres control total:
|
||||
1. Ingresar URL
|
||||
2. Ver todos los feeds encontrados
|
||||
3. Seleccionar cuáles añadir
|
||||
4. Asignar categoría/país globalmente
|
||||
5. Feeds se crean directamente (no van a pending)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💡 RECOMENDACIONES DE USO
|
||||
|
||||
### Estrategia 1: Auto-aprobación Total
|
||||
**Para fuentes conocidas y confiables:**
|
||||
|
||||
```sql
|
||||
-- Añadir fuentes con metadata completa
|
||||
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma) VALUES
|
||||
('El País', 'https://elpais.com', 1, 44, 'es'),
|
||||
('Le Monde', 'https://lemonde.fr', 1, 60, 'fr'),
|
||||
('The Guardian', 'https://theguardian.com', 1, 74, 'en');
|
||||
|
||||
-- Worker creará feeds automáticamente
|
||||
-- Sin intervención manual necesaria
|
||||
```
|
||||
|
||||
### Estrategia 2: Revisión Manual
|
||||
**Para fuentes nuevas o desconocidas:**
|
||||
|
||||
```sql
|
||||
-- Añadir sin metadata
|
||||
INSERT INTO fuentes_url (nombre, url) VALUES
|
||||
('Sitio Desconocido', 'https://ejemplo.com');
|
||||
|
||||
-- Worker crea feeds en feeds_pending
|
||||
-- Revisar en /feeds/pending
|
||||
-- Aprobar/rechazar manualmente
|
||||
```
|
||||
|
||||
### Estrategia 3: Híbrida (Recomendada)
|
||||
**Combinar ambas:**
|
||||
|
||||
- URLs conocidas → Con categoría/país
|
||||
- URLs nuevas → Sin metadata (revisión)
|
||||
- Usar análisis automático como guía
|
||||
- Ajustar manualmente si es necesario
|
||||
|
||||
---
|
||||
|
||||
## 🔍 Análisis Automático Explicado
|
||||
|
||||
### Detección de País
|
||||
|
||||
```python
|
||||
# 1. Desde dominio (TLD)
|
||||
.es → España
|
||||
.uk, .co.uk → Reino Unido
|
||||
.fr → Francia
|
||||
.de → Alemania
|
||||
.mx → México
|
||||
.ar → Argentina
|
||||
|
||||
# 2. Desde idioma (si no hay dominio claro)
|
||||
es → España (país principal)
|
||||
en → Reino Unido
|
||||
fr → Francia
|
||||
pt → Portugal
|
||||
|
||||
# 3. Desde subdominios
|
||||
es.example.com → España
|
||||
uk.example.com → Reino Unido
|
||||
```
|
||||
|
||||
### Sugerencia de Categoría
|
||||
|
||||
```python
|
||||
# Análisis de keywords en título + descripción
|
||||
|
||||
Keywords encontrados → Categoría sugerida (% confianza)
|
||||
|
||||
"política", "gobierno", "elecciones" → Política (75%)
|
||||
"economía", "bolsa", "mercado" → Economía (82%)
|
||||
"tecnología", "software", "digital" → Tecnología (90%)
|
||||
"deportes", "fútbol", "liga" → Deportes (95%)
|
||||
"internacional", "mundo", "global" → Internacional (70%)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📝 Ejemplos Completos
|
||||
|
||||
### Ejemplo 1: Periódico Español (Auto-aprobación)
|
||||
|
||||
```sql
|
||||
-- 1. Añadir fuente con metadata
|
||||
INSERT INTO fuentes_url (nombre, url, categoria_id, pais_id, idioma)
|
||||
VALUES ('El Mundo', 'https://elmundo.es', 1, 44, 'es');
|
||||
|
||||
-- 2. Worker ejecuta (15 min después):
|
||||
-- - Descubre: elmundo.es/rss/portada.xml
|
||||
-- - Descubre: elmundo.es/rss/deportes.xml
|
||||
-- - Hereda: categoria_id=1, pais_id=44
|
||||
-- - Crea en feeds directamente
|
||||
|
||||
-- 3. Resultado en tabla feeds:
|
||||
SELECT id, nombre, url, categoria_id, pais_id, activo
|
||||
FROM feeds
|
||||
WHERE fuente_nombre LIKE '%El Mundo%';
|
||||
|
||||
-- id | nombre | url | cat | pais | activo
|
||||
-- 1 | El Mundo - Portada | elmundo.es/rss/portada.xml | 1 | 44 | TRUE
|
||||
-- 2 | El Mundo - Deportes | elmundo.es/rss/deportes.xml | 1 | 44 | TRUE
|
||||
|
||||
-- ✅ Feeds activos, ingestor procesando
|
||||
```
|
||||
|
||||
### Ejemplo 2: Sitio Internacional (Revisión Manual)
|
||||
|
||||
```sql
|
||||
-- 1. Añadir fuente SIN metadata
|
||||
INSERT INTO fuentes_url (nombre, url)
|
||||
VALUES ('Reuters', 'https://www.reuters.com');
|
||||
|
||||
-- 2. Worker ejecuta (15 min después):
|
||||
-- - Descubre: reuters.com/rssfeed/worldNews
|
||||
-- - Analiza: idioma=en, país=Reino Unido (dominio+idioma)
|
||||
-- - Sugiere: categoría=Internacional (keyword "world")
|
||||
-- - Crea en feeds_pending
|
||||
|
||||
-- 3. Resultado en tabla feeds_pending:
|
||||
SELECT feed_title, detected_country_id, suggested_categoria_id, notes
|
||||
FROM feeds_pending
|
||||
WHERE fuente_url_id = 3;
|
||||
|
||||
-- feed_title | detected_country_id | suggested_cat | notes
|
||||
-- Reuters World News | 74 (Reino Unido) | 2 (Int.) | "Country from domain..."
|
||||
|
||||
-- ⏳ Requiere aprobación en /feeds/pending
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ CHECKLIST: Añadir Nueva Fuente
|
||||
|
||||
**Para auto-aprobación (recomendado si sabes país/categoría):**
|
||||
|
||||
- [ ] Ir a `/urls/add_source`
|
||||
- [ ] Ingresar nombre descriptivo
|
||||
- [ ] Ingresar URL del sitio (NO del feed RSS)
|
||||
- [ ] **IMPORTANTE:** Seleccionar categoría
|
||||
- [ ] **IMPORTANTE:** Seleccionar país
|
||||
- [ ] Ingresar idioma (opcional, se detecta)
|
||||
- [ ] Guardar
|
||||
- [ ] Esperar 15 minutos (máximo)
|
||||
- [ ] Ver feeds en `/feeds/` (activos automáticamente)
|
||||
|
||||
**Para revisión manual (si no estás seguro):**
|
||||
|
||||
- [ ] Ir a `/urls/add_source`
|
||||
- [ ] Ingresar nombre y URL
|
||||
- [ ] Dejar categoría/país vacíos
|
||||
- [ ] Guardar
|
||||
- [ ] Esperar 15 minutos
|
||||
- [ ] Ir a `/feeds/pending`
|
||||
- [ ] Revisar sugerencias automáticas
|
||||
- [ ] Ajustar categoría/país si necesario
|
||||
- [ ] Aprobar feeds
|
||||
- [ ] Feeds se activan inmediatamente
|
||||
|
||||
---
|
||||
|
||||
## 🎓 Resumen Ejecutivo
|
||||
|
||||
**3 Niveles de Automatización:**
|
||||
|
||||
| Nivel | Descripción | Cuándo Usar |
|
||||
|-------|-------------|-------------|
|
||||
| **Nivel 1: Totalmente Manual** | Descubrir en `/feeds/discover` | Control total, pocas URLs |
|
||||
| **Nivel 2: Auto-aprobación** | URL con cat/país → feeds activos | URLs confiables, muchas fuentes |
|
||||
| **Nivel 3: Revisión Asistida** | URL sin cat/país → análisis → aprobar | URLs nuevas, verificación |
|
||||
|
||||
**Flujo Recomendado:**
|
||||
1. Añade URL con categoría/país si la conoces
|
||||
2. Si no, déjalo vacío y revisa sugerencias automáticas
|
||||
3. Worker descubre y analiza todo automáticamente
|
||||
4. Tú solo apruebas/ajustas lo necesario
|
||||
|
||||
**Resultado:** Gestión eficiente de cientos de fuentes RSS con mínima intervención manual.
|
||||
|
||||
---
|
||||
|
||||
**📅 Fecha de última actualización:** 2026-01-07
|
||||
**📌 Versión del sistema:** 2.0 - Análisis Inteligente de Feeds
|
||||
|
|
@ -1,164 +0,0 @@
|
|||
# Problema de Traducciones Repetitivas - Análisis y Solución
|
||||
|
||||
## 📋 Descripción del Problema
|
||||
|
||||
Se detectaron traducciones con texto extremadamente repetitivo, como:
|
||||
- "la línea de la línea de la línea de la línea..."
|
||||
- "de Internet de Internet de Internet..."
|
||||
- "de la la la la..."
|
||||
|
||||
### Ejemplo Real Encontrado:
|
||||
```
|
||||
La red de conexión de Internet de Internet de la India (WIS) se encuentra
|
||||
en la línea de Internet de Internet de la India (WIS) y en la línea de
|
||||
Internet de Internet de la India (WIS) se encuentra en...
|
||||
```
|
||||
|
||||
## 🔍 Causas Identificadas
|
||||
|
||||
1. **Repetition Penalty Insuficiente**: El modelo estaba configurado con `repetition_penalty=1.2`, demasiado bajo para prevenir bucles.
|
||||
|
||||
2. **N-gram Blocking Inadecuado**: `no_repeat_ngram_size=4` permitía repeticiones de frases de 3 palabras.
|
||||
|
||||
3. **Falta de Validación Post-Traducción**: No había verificación de calidad después de traducir.
|
||||
|
||||
4. **Textos Fuente Corruptos**: Algunos RSS feeds contienen HTML mal formado o texto corrupto que confunde al modelo.
|
||||
|
||||
## ✅ Soluciones Implementadas
|
||||
|
||||
### 1. Mejoras en el Translation Worker (`workers/translation_worker.py`)
|
||||
|
||||
#### A. Parámetros de Traducción Mejorados
|
||||
```python
|
||||
# ANTES:
|
||||
repetition_penalty=1.2
|
||||
no_repeat_ngram_size=4
|
||||
|
||||
# AHORA:
|
||||
repetition_penalty=2.5 # Penalización mucho más agresiva
|
||||
no_repeat_ngram_size=3 # Bloquea repeticiones de 3-gramas
|
||||
```
|
||||
|
||||
#### B. Función de Validación de Calidad
|
||||
Nueva función `_is_repetitive_output()` que detecta:
|
||||
- Palabras repetidas 4+ veces consecutivas
|
||||
- Frases de 2 palabras repetidas 3+ veces
|
||||
- Patrones específicos conocidos: "de la la", "la línea de la línea", etc.
|
||||
- Baja diversidad de vocabulario (< 25% palabras únicas)
|
||||
|
||||
#### C. Validación Post-Traducción
|
||||
```python
|
||||
# Rechazar traducciones repetitivas automáticamente
|
||||
if _is_repetitive_output(ttr) or _is_repetitive_output(btr):
|
||||
LOG.warning(f"Rejecting repetitive translation for tr_id={i['tr_id']}")
|
||||
errors.append(("Repetitive output detected", i["tr_id"]))
|
||||
continue
|
||||
```
|
||||
|
||||
### 2. Script de Limpieza Automática
|
||||
|
||||
Creado `scripts/clean_repetitive_translations.py` que:
|
||||
- Escanea todas las traducciones completadas
|
||||
- Detecta patrones repetitivos
|
||||
- Marca traducciones defectuosas como 'pending' para re-traducción
|
||||
- Genera reportes de calidad
|
||||
|
||||
**Uso:**
|
||||
```bash
|
||||
docker exec rss2_web python3 scripts/clean_repetitive_translations.py
|
||||
```
|
||||
|
||||
### 3. Limpieza Inicial Ejecutada
|
||||
|
||||
Se identificaron y marcaron **3,093 traducciones defectuosas** para re-traducción:
|
||||
```sql
|
||||
UPDATE traducciones
|
||||
SET status='pending',
|
||||
titulo_trad=NULL,
|
||||
resumen_trad=NULL,
|
||||
error='Repetitive output - retranslating with improved settings'
|
||||
WHERE status='done'
|
||||
AND (resumen_trad LIKE '%la línea de la línea%'
|
||||
OR resumen_trad LIKE '%de la la %'
|
||||
OR resumen_trad LIKE '%de Internet de Internet%');
|
||||
```
|
||||
|
||||
## 🚀 Próximos Pasos
|
||||
|
||||
### 1. Reiniciar el Translation Worker
|
||||
```bash
|
||||
docker restart rss2_translation_worker
|
||||
```
|
||||
|
||||
### 2. Monitorear Re-traducciones
|
||||
Las 3,093 noticias marcadas se re-traducirán automáticamente con la nueva configuración mejorada.
|
||||
|
||||
### 3. Ejecutar Limpieza Periódica
|
||||
Agregar al cron o scheduler:
|
||||
```bash
|
||||
# Cada día a las 3 AM
|
||||
0 3 * * * docker exec rss2_web python3 scripts/clean_repetitive_translations.py
|
||||
```
|
||||
|
||||
### 4. Monitoreo de Calidad
|
||||
Verificar logs del translation worker para ver rechazos:
|
||||
```bash
|
||||
docker logs -f rss2_translation_worker | grep "Rejecting repetitive"
|
||||
```
|
||||
|
||||
## 📊 Métricas de Calidad
|
||||
|
||||
### Antes de la Solución:
|
||||
- ~3,093 traducciones defectuosas detectadas
|
||||
- ~X% de tasa de error (calculado sobre total de traducciones)
|
||||
|
||||
### Después de la Solución:
|
||||
- Validación automática en tiempo real
|
||||
- Rechazo inmediato de outputs repetitivos
|
||||
- Re-traducción automática con mejores parámetros
|
||||
|
||||
## 🔧 Configuración Adicional Recomendada
|
||||
|
||||
### Variables de Entorno (.env)
|
||||
```bash
|
||||
# Aumentar batch size para mejor contexto
|
||||
TRANSLATOR_BATCH=64 # Actual: 128 (OK)
|
||||
|
||||
# Ajustar beams para mejor calidad
|
||||
NUM_BEAMS_TITLE=3
|
||||
NUM_BEAMS_BODY=3
|
||||
|
||||
# Tokens máximos
|
||||
MAX_NEW_TOKENS_TITLE=128
|
||||
MAX_NEW_TOKENS_BODY=512
|
||||
```
|
||||
|
||||
## 📝 Notas Técnicas
|
||||
|
||||
### ¿Por qué ocurre este problema?
|
||||
|
||||
Los modelos de traducción neuronal (como NLLB) pueden entrar en "bucles de repetición" cuando:
|
||||
1. El texto fuente está corrupto o mal formado
|
||||
2. El contexto es muy largo y pierde coherencia
|
||||
3. La penalización por repetición es insuficiente
|
||||
4. Hay patrones ambiguos en el texto fuente
|
||||
|
||||
### Prevención a Largo Plazo
|
||||
|
||||
1. **Validación de Entrada**: Limpiar HTML y texto corrupto antes de traducir
|
||||
2. **Chunking Inteligente**: Dividir textos largos en segmentos coherentes
|
||||
3. **Monitoreo Continuo**: Ejecutar script de limpieza regularmente
|
||||
4. **Logs Detallados**: Analizar qué tipos de textos causan problemas
|
||||
|
||||
## 🎯 Resultados Esperados
|
||||
|
||||
Con estas mejoras, se espera:
|
||||
- ✅ Eliminación del 99%+ de traducciones repetitivas
|
||||
- ✅ Mejor calidad general de traducciones
|
||||
- ✅ Detección automática de problemas
|
||||
- ✅ Re-traducción automática de contenido defectuoso
|
||||
|
||||
---
|
||||
|
||||
**Fecha de Implementación**: 2026-01-28
|
||||
**Estado**: ✅ Implementado y Activo
|
||||
18
feeds.csv
Normal file
18
feeds.csv
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
id,nombre,descripcion,url,categoria_id,categoria,pais_id,pais,idioma,activo,fallos
|
||||
19,8am Daily – Dari,روزنامه ۸صبح افغانستان به زبان دری.,https://8am.af/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
20,8am Daily – Pashto,د ۸صبح ورځپاڼې پښتو خپرونه.,https://8am.af/ps/feed,7,Internacional,1,Afganistán,ps,False,30
|
||||
1,Afghanistan News.Net – Noticias,Cobertura continua de noticias generales sobre Afganistán y su entorno regional.,https://feeds.afghanistannews.net/rss/6e1d5c8e1f98f17c,7,Internacional,1,Afganistán,en,True,0
|
||||
36,Arezo TV – Dari,آرزو تلویزیون – خبر و گزارش به دری.,https://arezo.tv/fa/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
37,Arezo TV – Pashto,د آرزو تلویزیون پښتو خبرونه.,https://arezo.tv/ps/feed,7,Internacional,1,Afganistán,ps,False,30
|
||||
4,Ariana News – Dari,خبرها و تحلیلها از افغانستان به زبان دری.,https://ariananews.af/feed,7,Internacional,1,Afganistán,fa,True,0
|
||||
28,Avapress – Dari,خبرگزاری صدای افغان (آوا) به زبان دری.,https://avapress.com/fa/rss,7,Internacional,1,Afganistán,fa,False,30
|
||||
29,Avapress – Pashto,د افغان غږ خبري آژانس په پښتو ژبه.,https://avapress.com/ps/rss,7,Internacional,1,Afganistán,ps,False,30
|
||||
23,Bakhtar News Agency – Dari,آژانس خبری باختر به زبان دری.,https://bakhtarnews.af/fa/feed,7,Internacional,1,Afganistán,fa,False,5
|
||||
24,Bakhtar News Agency – Pashto,د باختر خبري اژانس پښتو پاڼه.,https://bakhtarnews.af/ps/feed,7,Internacional,1,Afganistán,ps,False,5
|
||||
38,Barya News – Dari,باریانیوز – رسانه خبری افغانستان.,https://barya.news/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
39,Barya News – Pashto,باریانیوز پښتو خپرونې.,https://barya.news/ps/feed,7,Internacional,1,Afganistán,ps,False,30
|
||||
47,Chaprast News – Dari,چپرست نیوز – خبرهای افغانستان.,https://chaprast.com/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
30,Ensaf News – Dari,رسانه تحلیلی به زبان دری.,https://www.ensafnews.com/fa/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
27,Hamshahri Afghanistan – Dari,نسخه افغانستان همشهری به زبان دری.,https://hamshahri.af/feed,7,Internacional,1,Afganistán,fa,False,30
|
||||
44,Jomhor News – Dari,جمهور نیوز – خبرگزاری مستقل دری.,https://jomhornews.com/fa/rss,7,Internacional,1,Afganistán,fa,False,30
|
||||
45,Jomhor News – Pashto,جمهور نیوز پښتو.,https://jomhornews.com/ps/rss,7,Internacional,1,Afganistán,ps,False,30
|
||||
|
19
frontend/Dockerfile
Normal file
19
frontend/Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
FROM node:20-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install
|
||||
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
FROM nginx:alpine
|
||||
|
||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||
|
||||
COPY nginx.conf /etc/nginx/nginx.conf
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
13
frontend/index.html
Normal file
13
frontend/index.html
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
<!doctype html>
|
||||
<html lang="es">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>RSS2 - Noticias del Mundo</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
33
frontend/nginx.conf
Normal file
33
frontend/nginx.conf
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
resolver 127.0.0.11 valid=10s;
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
location /api {
|
||||
proxy_pass http://backend-go:8080;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_set_header Host $host;
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
}
|
||||
1
frontend/node_modules/.bin/autoprefixer
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/autoprefixer
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../autoprefixer/bin/autoprefixer
|
||||
1
frontend/node_modules/.bin/baseline-browser-mapping
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/baseline-browser-mapping
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../baseline-browser-mapping/dist/cli.cjs
|
||||
1
frontend/node_modules/.bin/browserslist
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/browserslist
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../browserslist/cli.js
|
||||
1
frontend/node_modules/.bin/cssesc
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/cssesc
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../cssesc/bin/cssesc
|
||||
1
frontend/node_modules/.bin/esbuild
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/esbuild
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../esbuild/bin/esbuild
|
||||
1
frontend/node_modules/.bin/jiti
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/jiti
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../jiti/bin/jiti.js
|
||||
1
frontend/node_modules/.bin/jsesc
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/jsesc
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../jsesc/bin/jsesc
|
||||
1
frontend/node_modules/.bin/json5
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/json5
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../json5/lib/cli.js
|
||||
1
frontend/node_modules/.bin/loose-envify
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/loose-envify
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../loose-envify/cli.js
|
||||
1
frontend/node_modules/.bin/nanoid
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/nanoid
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../nanoid/bin/nanoid.cjs
|
||||
1
frontend/node_modules/.bin/parser
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/parser
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../@babel/parser/bin/babel-parser.js
|
||||
1
frontend/node_modules/.bin/resolve
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/resolve
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../resolve/bin/resolve
|
||||
1
frontend/node_modules/.bin/rollup
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/rollup
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../rollup/dist/bin/rollup
|
||||
1
frontend/node_modules/.bin/semver
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/semver
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../semver/bin/semver.js
|
||||
1
frontend/node_modules/.bin/sucrase
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/sucrase
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../sucrase/bin/sucrase
|
||||
1
frontend/node_modules/.bin/sucrase-node
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/sucrase-node
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../sucrase/bin/sucrase-node
|
||||
1
frontend/node_modules/.bin/tailwind
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/tailwind
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../tailwindcss/lib/cli.js
|
||||
1
frontend/node_modules/.bin/tailwindcss
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/tailwindcss
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../tailwindcss/lib/cli.js
|
||||
1
frontend/node_modules/.bin/tsc
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/tsc
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../typescript/bin/tsc
|
||||
1
frontend/node_modules/.bin/tsserver
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/tsserver
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../typescript/bin/tsserver
|
||||
1
frontend/node_modules/.bin/update-browserslist-db
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/update-browserslist-db
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../update-browserslist-db/cli.js
|
||||
1
frontend/node_modules/.bin/vite
generated
vendored
Symbolic link
1
frontend/node_modules/.bin/vite
generated
vendored
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
../vite/bin/vite.js
|
||||
2284
frontend/node_modules/.package-lock.json
generated
vendored
Normal file
2284
frontend/node_modules/.package-lock.json
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
128
frontend/node_modules/@alloc/quick-lru/index.d.ts
generated
vendored
Normal file
128
frontend/node_modules/@alloc/quick-lru/index.d.ts
generated
vendored
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
declare namespace QuickLRU {
|
||||
interface Options<KeyType, ValueType> {
|
||||
/**
|
||||
The maximum number of milliseconds an item should remain in the cache.
|
||||
|
||||
@default Infinity
|
||||
|
||||
By default, `maxAge` will be `Infinity`, which means that items will never expire.
|
||||
Lazy expiration upon the next write or read call.
|
||||
|
||||
Individual expiration of an item can be specified by the `set(key, value, maxAge)` method.
|
||||
*/
|
||||
readonly maxAge?: number;
|
||||
|
||||
/**
|
||||
The maximum number of items before evicting the least recently used items.
|
||||
*/
|
||||
readonly maxSize: number;
|
||||
|
||||
/**
|
||||
Called right before an item is evicted from the cache.
|
||||
|
||||
Useful for side effects or for items like object URLs that need explicit cleanup (`revokeObjectURL`).
|
||||
*/
|
||||
onEviction?: (key: KeyType, value: ValueType) => void;
|
||||
}
|
||||
}
|
||||
|
||||
declare class QuickLRU<KeyType, ValueType>
|
||||
implements Iterable<[KeyType, ValueType]> {
|
||||
/**
|
||||
The stored item count.
|
||||
*/
|
||||
readonly size: number;
|
||||
|
||||
/**
|
||||
Simple ["Least Recently Used" (LRU) cache](https://en.m.wikipedia.org/wiki/Cache_replacement_policies#Least_Recently_Used_.28LRU.29).
|
||||
|
||||
The instance is [`iterable`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Iteration_protocols) so you can use it directly in a [`for…of`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Statements/for...of) loop.
|
||||
|
||||
@example
|
||||
```
|
||||
import QuickLRU = require('quick-lru');
|
||||
|
||||
const lru = new QuickLRU({maxSize: 1000});
|
||||
|
||||
lru.set('🦄', '🌈');
|
||||
|
||||
lru.has('🦄');
|
||||
//=> true
|
||||
|
||||
lru.get('🦄');
|
||||
//=> '🌈'
|
||||
```
|
||||
*/
|
||||
constructor(options: QuickLRU.Options<KeyType, ValueType>);
|
||||
|
||||
[Symbol.iterator](): IterableIterator<[KeyType, ValueType]>;
|
||||
|
||||
/**
|
||||
Set an item. Returns the instance.
|
||||
|
||||
Individual expiration of an item can be specified with the `maxAge` option. If not specified, the global `maxAge` value will be used in case it is specified in the constructor, otherwise the item will never expire.
|
||||
|
||||
@returns The list instance.
|
||||
*/
|
||||
set(key: KeyType, value: ValueType, options?: {maxAge?: number}): this;
|
||||
|
||||
/**
|
||||
Get an item.
|
||||
|
||||
@returns The stored item or `undefined`.
|
||||
*/
|
||||
get(key: KeyType): ValueType | undefined;
|
||||
|
||||
/**
|
||||
Check if an item exists.
|
||||
*/
|
||||
has(key: KeyType): boolean;
|
||||
|
||||
/**
|
||||
Get an item without marking it as recently used.
|
||||
|
||||
@returns The stored item or `undefined`.
|
||||
*/
|
||||
peek(key: KeyType): ValueType | undefined;
|
||||
|
||||
/**
|
||||
Delete an item.
|
||||
|
||||
@returns `true` if the item is removed or `false` if the item doesn't exist.
|
||||
*/
|
||||
delete(key: KeyType): boolean;
|
||||
|
||||
/**
|
||||
Delete all items.
|
||||
*/
|
||||
clear(): void;
|
||||
|
||||
/**
|
||||
Update the `maxSize` in-place, discarding items as necessary. Insertion order is mostly preserved, though this is not a strong guarantee.
|
||||
|
||||
Useful for on-the-fly tuning of cache sizes in live systems.
|
||||
*/
|
||||
resize(maxSize: number): void;
|
||||
|
||||
/**
|
||||
Iterable for all the keys.
|
||||
*/
|
||||
keys(): IterableIterator<KeyType>;
|
||||
|
||||
/**
|
||||
Iterable for all the values.
|
||||
*/
|
||||
values(): IterableIterator<ValueType>;
|
||||
|
||||
/**
|
||||
Iterable for all entries, starting with the oldest (ascending in recency).
|
||||
*/
|
||||
entriesAscending(): IterableIterator<[KeyType, ValueType]>;
|
||||
|
||||
/**
|
||||
Iterable for all entries, starting with the newest (descending in recency).
|
||||
*/
|
||||
entriesDescending(): IterableIterator<[KeyType, ValueType]>;
|
||||
}
|
||||
|
||||
export = QuickLRU;
|
||||
263
frontend/node_modules/@alloc/quick-lru/index.js
generated
vendored
Normal file
263
frontend/node_modules/@alloc/quick-lru/index.js
generated
vendored
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
'use strict';
|
||||
|
||||
class QuickLRU {
|
||||
constructor(options = {}) {
|
||||
if (!(options.maxSize && options.maxSize > 0)) {
|
||||
throw new TypeError('`maxSize` must be a number greater than 0');
|
||||
}
|
||||
|
||||
if (typeof options.maxAge === 'number' && options.maxAge === 0) {
|
||||
throw new TypeError('`maxAge` must be a number greater than 0');
|
||||
}
|
||||
|
||||
this.maxSize = options.maxSize;
|
||||
this.maxAge = options.maxAge || Infinity;
|
||||
this.onEviction = options.onEviction;
|
||||
this.cache = new Map();
|
||||
this.oldCache = new Map();
|
||||
this._size = 0;
|
||||
}
|
||||
|
||||
_emitEvictions(cache) {
|
||||
if (typeof this.onEviction !== 'function') {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const [key, item] of cache) {
|
||||
this.onEviction(key, item.value);
|
||||
}
|
||||
}
|
||||
|
||||
_deleteIfExpired(key, item) {
|
||||
if (typeof item.expiry === 'number' && item.expiry <= Date.now()) {
|
||||
if (typeof this.onEviction === 'function') {
|
||||
this.onEviction(key, item.value);
|
||||
}
|
||||
|
||||
return this.delete(key);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
_getOrDeleteIfExpired(key, item) {
|
||||
const deleted = this._deleteIfExpired(key, item);
|
||||
if (deleted === false) {
|
||||
return item.value;
|
||||
}
|
||||
}
|
||||
|
||||
_getItemValue(key, item) {
|
||||
return item.expiry ? this._getOrDeleteIfExpired(key, item) : item.value;
|
||||
}
|
||||
|
||||
_peek(key, cache) {
|
||||
const item = cache.get(key);
|
||||
|
||||
return this._getItemValue(key, item);
|
||||
}
|
||||
|
||||
_set(key, value) {
|
||||
this.cache.set(key, value);
|
||||
this._size++;
|
||||
|
||||
if (this._size >= this.maxSize) {
|
||||
this._size = 0;
|
||||
this._emitEvictions(this.oldCache);
|
||||
this.oldCache = this.cache;
|
||||
this.cache = new Map();
|
||||
}
|
||||
}
|
||||
|
||||
_moveToRecent(key, item) {
|
||||
this.oldCache.delete(key);
|
||||
this._set(key, item);
|
||||
}
|
||||
|
||||
* _entriesAscending() {
|
||||
for (const item of this.oldCache) {
|
||||
const [key, value] = item;
|
||||
if (!this.cache.has(key)) {
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const item of this.cache) {
|
||||
const [key, value] = item;
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get(key) {
|
||||
if (this.cache.has(key)) {
|
||||
const item = this.cache.get(key);
|
||||
|
||||
return this._getItemValue(key, item);
|
||||
}
|
||||
|
||||
if (this.oldCache.has(key)) {
|
||||
const item = this.oldCache.get(key);
|
||||
if (this._deleteIfExpired(key, item) === false) {
|
||||
this._moveToRecent(key, item);
|
||||
return item.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set(key, value, {maxAge = this.maxAge === Infinity ? undefined : Date.now() + this.maxAge} = {}) {
|
||||
if (this.cache.has(key)) {
|
||||
this.cache.set(key, {
|
||||
value,
|
||||
maxAge
|
||||
});
|
||||
} else {
|
||||
this._set(key, {value, expiry: maxAge});
|
||||
}
|
||||
}
|
||||
|
||||
has(key) {
|
||||
if (this.cache.has(key)) {
|
||||
return !this._deleteIfExpired(key, this.cache.get(key));
|
||||
}
|
||||
|
||||
if (this.oldCache.has(key)) {
|
||||
return !this._deleteIfExpired(key, this.oldCache.get(key));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
peek(key) {
|
||||
if (this.cache.has(key)) {
|
||||
return this._peek(key, this.cache);
|
||||
}
|
||||
|
||||
if (this.oldCache.has(key)) {
|
||||
return this._peek(key, this.oldCache);
|
||||
}
|
||||
}
|
||||
|
||||
delete(key) {
|
||||
const deleted = this.cache.delete(key);
|
||||
if (deleted) {
|
||||
this._size--;
|
||||
}
|
||||
|
||||
return this.oldCache.delete(key) || deleted;
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.cache.clear();
|
||||
this.oldCache.clear();
|
||||
this._size = 0;
|
||||
}
|
||||
|
||||
resize(newSize) {
|
||||
if (!(newSize && newSize > 0)) {
|
||||
throw new TypeError('`maxSize` must be a number greater than 0');
|
||||
}
|
||||
|
||||
const items = [...this._entriesAscending()];
|
||||
const removeCount = items.length - newSize;
|
||||
if (removeCount < 0) {
|
||||
this.cache = new Map(items);
|
||||
this.oldCache = new Map();
|
||||
this._size = items.length;
|
||||
} else {
|
||||
if (removeCount > 0) {
|
||||
this._emitEvictions(items.slice(0, removeCount));
|
||||
}
|
||||
|
||||
this.oldCache = new Map(items.slice(removeCount));
|
||||
this.cache = new Map();
|
||||
this._size = 0;
|
||||
}
|
||||
|
||||
this.maxSize = newSize;
|
||||
}
|
||||
|
||||
* keys() {
|
||||
for (const [key] of this) {
|
||||
yield key;
|
||||
}
|
||||
}
|
||||
|
||||
* values() {
|
||||
for (const [, value] of this) {
|
||||
yield value;
|
||||
}
|
||||
}
|
||||
|
||||
* [Symbol.iterator]() {
|
||||
for (const item of this.cache) {
|
||||
const [key, value] = item;
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield [key, value.value];
|
||||
}
|
||||
}
|
||||
|
||||
for (const item of this.oldCache) {
|
||||
const [key, value] = item;
|
||||
if (!this.cache.has(key)) {
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield [key, value.value];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
* entriesDescending() {
|
||||
let items = [...this.cache];
|
||||
for (let i = items.length - 1; i >= 0; --i) {
|
||||
const item = items[i];
|
||||
const [key, value] = item;
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield [key, value.value];
|
||||
}
|
||||
}
|
||||
|
||||
items = [...this.oldCache];
|
||||
for (let i = items.length - 1; i >= 0; --i) {
|
||||
const item = items[i];
|
||||
const [key, value] = item;
|
||||
if (!this.cache.has(key)) {
|
||||
const deleted = this._deleteIfExpired(key, value);
|
||||
if (deleted === false) {
|
||||
yield [key, value.value];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
* entriesAscending() {
|
||||
for (const [key, value] of this._entriesAscending()) {
|
||||
yield [key, value.value];
|
||||
}
|
||||
}
|
||||
|
||||
get size() {
|
||||
if (!this._size) {
|
||||
return this.oldCache.size;
|
||||
}
|
||||
|
||||
let oldCacheSize = 0;
|
||||
for (const key of this.oldCache.keys()) {
|
||||
if (!this.cache.has(key)) {
|
||||
oldCacheSize++;
|
||||
}
|
||||
}
|
||||
|
||||
return Math.min(this._size + oldCacheSize, this.maxSize);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = QuickLRU;
|
||||
9
frontend/node_modules/@alloc/quick-lru/license
generated
vendored
Normal file
9
frontend/node_modules/@alloc/quick-lru/license
generated
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) Sindre Sorhus <sindresorhus@gmail.com> (sindresorhus.com)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
43
frontend/node_modules/@alloc/quick-lru/package.json
generated
vendored
Normal file
43
frontend/node_modules/@alloc/quick-lru/package.json
generated
vendored
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"name": "@alloc/quick-lru",
|
||||
"version": "5.2.0",
|
||||
"description": "Simple “Least Recently Used” (LRU) cache",
|
||||
"license": "MIT",
|
||||
"repository": "sindresorhus/quick-lru",
|
||||
"funding": "https://github.com/sponsors/sindresorhus",
|
||||
"author": {
|
||||
"name": "Sindre Sorhus",
|
||||
"email": "sindresorhus@gmail.com",
|
||||
"url": "https://sindresorhus.com"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "xo && nyc ava && tsd"
|
||||
},
|
||||
"files": [
|
||||
"index.js",
|
||||
"index.d.ts"
|
||||
],
|
||||
"keywords": [
|
||||
"lru",
|
||||
"quick",
|
||||
"cache",
|
||||
"caching",
|
||||
"least",
|
||||
"recently",
|
||||
"used",
|
||||
"fast",
|
||||
"map",
|
||||
"hash",
|
||||
"buffer"
|
||||
],
|
||||
"devDependencies": {
|
||||
"ava": "^2.0.0",
|
||||
"coveralls": "^3.0.3",
|
||||
"nyc": "^15.0.0",
|
||||
"tsd": "^0.11.0",
|
||||
"xo": "^0.26.0"
|
||||
}
|
||||
}
|
||||
139
frontend/node_modules/@alloc/quick-lru/readme.md
generated
vendored
Normal file
139
frontend/node_modules/@alloc/quick-lru/readme.md
generated
vendored
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
# quick-lru [](https://travis-ci.org/sindresorhus/quick-lru) [](https://coveralls.io/github/sindresorhus/quick-lru?branch=master)
|
||||
|
||||
> Simple [“Least Recently Used” (LRU) cache](https://en.m.wikipedia.org/wiki/Cache_replacement_policies#Least_Recently_Used_.28LRU.29)
|
||||
|
||||
Useful when you need to cache something and limit memory usage.
|
||||
|
||||
Inspired by the [`hashlru` algorithm](https://github.com/dominictarr/hashlru#algorithm), but instead uses [`Map`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Map) to support keys of any type, not just strings, and values can be `undefined`.
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
$ npm install quick-lru
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```js
|
||||
const QuickLRU = require('quick-lru');
|
||||
|
||||
const lru = new QuickLRU({maxSize: 1000});
|
||||
|
||||
lru.set('🦄', '🌈');
|
||||
|
||||
lru.has('🦄');
|
||||
//=> true
|
||||
|
||||
lru.get('🦄');
|
||||
//=> '🌈'
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### new QuickLRU(options?)
|
||||
|
||||
Returns a new instance.
|
||||
|
||||
### options
|
||||
|
||||
Type: `object`
|
||||
|
||||
#### maxSize
|
||||
|
||||
*Required*\
|
||||
Type: `number`
|
||||
|
||||
The maximum number of items before evicting the least recently used items.
|
||||
|
||||
#### maxAge
|
||||
|
||||
Type: `number`\
|
||||
Default: `Infinity`
|
||||
|
||||
The maximum number of milliseconds an item should remain in cache.
|
||||
By default maxAge will be Infinity, which means that items will never expire.
|
||||
|
||||
Lazy expiration happens upon the next `write` or `read` call.
|
||||
|
||||
Individual expiration of an item can be specified by the `set(key, value, options)` method.
|
||||
|
||||
#### onEviction
|
||||
|
||||
*Optional*\
|
||||
Type: `(key, value) => void`
|
||||
|
||||
Called right before an item is evicted from the cache.
|
||||
|
||||
Useful for side effects or for items like object URLs that need explicit cleanup (`revokeObjectURL`).
|
||||
|
||||
### Instance
|
||||
|
||||
The instance is [`iterable`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Iteration_protocols) so you can use it directly in a [`for…of`](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Statements/for...of) loop.
|
||||
|
||||
Both `key` and `value` can be of any type.
|
||||
|
||||
#### .set(key, value, options?)
|
||||
|
||||
Set an item. Returns the instance.
|
||||
|
||||
Individual expiration of an item can be specified with the `maxAge` option. If not specified, the global `maxAge` value will be used in case it is specified on the constructor, otherwise the item will never expire.
|
||||
|
||||
#### .get(key)
|
||||
|
||||
Get an item.
|
||||
|
||||
#### .has(key)
|
||||
|
||||
Check if an item exists.
|
||||
|
||||
#### .peek(key)
|
||||
|
||||
Get an item without marking it as recently used.
|
||||
|
||||
#### .delete(key)
|
||||
|
||||
Delete an item.
|
||||
|
||||
Returns `true` if the item is removed or `false` if the item doesn't exist.
|
||||
|
||||
#### .clear()
|
||||
|
||||
Delete all items.
|
||||
|
||||
#### .resize(maxSize)
|
||||
|
||||
Update the `maxSize`, discarding items as necessary. Insertion order is mostly preserved, though this is not a strong guarantee.
|
||||
|
||||
Useful for on-the-fly tuning of cache sizes in live systems.
|
||||
|
||||
#### .keys()
|
||||
|
||||
Iterable for all the keys.
|
||||
|
||||
#### .values()
|
||||
|
||||
Iterable for all the values.
|
||||
|
||||
#### .entriesAscending()
|
||||
|
||||
Iterable for all entries, starting with the oldest (ascending in recency).
|
||||
|
||||
#### .entriesDescending()
|
||||
|
||||
Iterable for all entries, starting with the newest (descending in recency).
|
||||
|
||||
#### .size
|
||||
|
||||
The stored item count.
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
<b>
|
||||
<a href="https://tidelift.com/subscription/pkg/npm-quick-lru?utm_source=npm-quick-lru&utm_medium=referral&utm_campaign=readme">Get professional support for this package with a Tidelift subscription</a>
|
||||
</b>
|
||||
<br>
|
||||
<sub>
|
||||
Tidelift helps make open source sustainable for maintainers while giving companies<br>assurances about security, maintenance, and licensing for their dependencies.
|
||||
</sub>
|
||||
</div>
|
||||
22
frontend/node_modules/@babel/code-frame/LICENSE
generated
vendored
Normal file
22
frontend/node_modules/@babel/code-frame/LICENSE
generated
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2014-present Sebastian McKenzie and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
19
frontend/node_modules/@babel/code-frame/README.md
generated
vendored
Normal file
19
frontend/node_modules/@babel/code-frame/README.md
generated
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# @babel/code-frame
|
||||
|
||||
> Generate errors that contain a code frame that point to source locations.
|
||||
|
||||
See our website [@babel/code-frame](https://babeljs.io/docs/babel-code-frame) for more information.
|
||||
|
||||
## Install
|
||||
|
||||
Using npm:
|
||||
|
||||
```sh
|
||||
npm install --save-dev @babel/code-frame
|
||||
```
|
||||
|
||||
or using yarn:
|
||||
|
||||
```sh
|
||||
yarn add @babel/code-frame --dev
|
||||
```
|
||||
217
frontend/node_modules/@babel/code-frame/lib/index.js
generated
vendored
Normal file
217
frontend/node_modules/@babel/code-frame/lib/index.js
generated
vendored
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
'use strict';
|
||||
|
||||
Object.defineProperty(exports, '__esModule', { value: true });
|
||||
|
||||
var picocolors = require('picocolors');
|
||||
var jsTokens = require('js-tokens');
|
||||
var helperValidatorIdentifier = require('@babel/helper-validator-identifier');
|
||||
|
||||
function isColorSupported() {
|
||||
return (typeof process === "object" && (process.env.FORCE_COLOR === "0" || process.env.FORCE_COLOR === "false") ? false : picocolors.isColorSupported
|
||||
);
|
||||
}
|
||||
const compose = (f, g) => v => f(g(v));
|
||||
function buildDefs(colors) {
|
||||
return {
|
||||
keyword: colors.cyan,
|
||||
capitalized: colors.yellow,
|
||||
jsxIdentifier: colors.yellow,
|
||||
punctuator: colors.yellow,
|
||||
number: colors.magenta,
|
||||
string: colors.green,
|
||||
regex: colors.magenta,
|
||||
comment: colors.gray,
|
||||
invalid: compose(compose(colors.white, colors.bgRed), colors.bold),
|
||||
gutter: colors.gray,
|
||||
marker: compose(colors.red, colors.bold),
|
||||
message: compose(colors.red, colors.bold),
|
||||
reset: colors.reset
|
||||
};
|
||||
}
|
||||
const defsOn = buildDefs(picocolors.createColors(true));
|
||||
const defsOff = buildDefs(picocolors.createColors(false));
|
||||
function getDefs(enabled) {
|
||||
return enabled ? defsOn : defsOff;
|
||||
}
|
||||
|
||||
const sometimesKeywords = new Set(["as", "async", "from", "get", "of", "set"]);
|
||||
const NEWLINE$1 = /\r\n|[\n\r\u2028\u2029]/;
|
||||
const BRACKET = /^[()[\]{}]$/;
|
||||
let tokenize;
|
||||
const JSX_TAG = /^[a-z][\w-]*$/i;
|
||||
const getTokenType = function (token, offset, text) {
|
||||
if (token.type === "name") {
|
||||
const tokenValue = token.value;
|
||||
if (helperValidatorIdentifier.isKeyword(tokenValue) || helperValidatorIdentifier.isStrictReservedWord(tokenValue, true) || sometimesKeywords.has(tokenValue)) {
|
||||
return "keyword";
|
||||
}
|
||||
if (JSX_TAG.test(tokenValue) && (text[offset - 1] === "<" || text.slice(offset - 2, offset) === "</")) {
|
||||
return "jsxIdentifier";
|
||||
}
|
||||
const firstChar = String.fromCodePoint(tokenValue.codePointAt(0));
|
||||
if (firstChar !== firstChar.toLowerCase()) {
|
||||
return "capitalized";
|
||||
}
|
||||
}
|
||||
if (token.type === "punctuator" && BRACKET.test(token.value)) {
|
||||
return "bracket";
|
||||
}
|
||||
if (token.type === "invalid" && (token.value === "@" || token.value === "#")) {
|
||||
return "punctuator";
|
||||
}
|
||||
return token.type;
|
||||
};
|
||||
tokenize = function* (text) {
|
||||
let match;
|
||||
while (match = jsTokens.default.exec(text)) {
|
||||
const token = jsTokens.matchToToken(match);
|
||||
yield {
|
||||
type: getTokenType(token, match.index, text),
|
||||
value: token.value
|
||||
};
|
||||
}
|
||||
};
|
||||
function highlight(text) {
|
||||
if (text === "") return "";
|
||||
const defs = getDefs(true);
|
||||
let highlighted = "";
|
||||
for (const {
|
||||
type,
|
||||
value
|
||||
} of tokenize(text)) {
|
||||
if (type in defs) {
|
||||
highlighted += value.split(NEWLINE$1).map(str => defs[type](str)).join("\n");
|
||||
} else {
|
||||
highlighted += value;
|
||||
}
|
||||
}
|
||||
return highlighted;
|
||||
}
|
||||
|
||||
let deprecationWarningShown = false;
|
||||
const NEWLINE = /\r\n|[\n\r\u2028\u2029]/;
|
||||
function getMarkerLines(loc, source, opts, startLineBaseZero) {
|
||||
const startLoc = Object.assign({
|
||||
column: 0,
|
||||
line: -1
|
||||
}, loc.start);
|
||||
const endLoc = Object.assign({}, startLoc, loc.end);
|
||||
const {
|
||||
linesAbove = 2,
|
||||
linesBelow = 3
|
||||
} = opts || {};
|
||||
const startLine = startLoc.line - startLineBaseZero;
|
||||
const startColumn = startLoc.column;
|
||||
const endLine = endLoc.line - startLineBaseZero;
|
||||
const endColumn = endLoc.column;
|
||||
let start = Math.max(startLine - (linesAbove + 1), 0);
|
||||
let end = Math.min(source.length, endLine + linesBelow);
|
||||
if (startLine === -1) {
|
||||
start = 0;
|
||||
}
|
||||
if (endLine === -1) {
|
||||
end = source.length;
|
||||
}
|
||||
const lineDiff = endLine - startLine;
|
||||
const markerLines = {};
|
||||
if (lineDiff) {
|
||||
for (let i = 0; i <= lineDiff; i++) {
|
||||
const lineNumber = i + startLine;
|
||||
if (!startColumn) {
|
||||
markerLines[lineNumber] = true;
|
||||
} else if (i === 0) {
|
||||
const sourceLength = source[lineNumber - 1].length;
|
||||
markerLines[lineNumber] = [startColumn, sourceLength - startColumn + 1];
|
||||
} else if (i === lineDiff) {
|
||||
markerLines[lineNumber] = [0, endColumn];
|
||||
} else {
|
||||
const sourceLength = source[lineNumber - i].length;
|
||||
markerLines[lineNumber] = [0, sourceLength];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (startColumn === endColumn) {
|
||||
if (startColumn) {
|
||||
markerLines[startLine] = [startColumn, 0];
|
||||
} else {
|
||||
markerLines[startLine] = true;
|
||||
}
|
||||
} else {
|
||||
markerLines[startLine] = [startColumn, endColumn - startColumn];
|
||||
}
|
||||
}
|
||||
return {
|
||||
start,
|
||||
end,
|
||||
markerLines
|
||||
};
|
||||
}
|
||||
function codeFrameColumns(rawLines, loc, opts = {}) {
|
||||
const shouldHighlight = opts.forceColor || isColorSupported() && opts.highlightCode;
|
||||
const startLineBaseZero = (opts.startLine || 1) - 1;
|
||||
const defs = getDefs(shouldHighlight);
|
||||
const lines = rawLines.split(NEWLINE);
|
||||
const {
|
||||
start,
|
||||
end,
|
||||
markerLines
|
||||
} = getMarkerLines(loc, lines, opts, startLineBaseZero);
|
||||
const hasColumns = loc.start && typeof loc.start.column === "number";
|
||||
const numberMaxWidth = String(end + startLineBaseZero).length;
|
||||
const highlightedLines = shouldHighlight ? highlight(rawLines) : rawLines;
|
||||
let frame = highlightedLines.split(NEWLINE, end).slice(start, end).map((line, index) => {
|
||||
const number = start + 1 + index;
|
||||
const paddedNumber = ` ${number + startLineBaseZero}`.slice(-numberMaxWidth);
|
||||
const gutter = ` ${paddedNumber} |`;
|
||||
const hasMarker = markerLines[number];
|
||||
const lastMarkerLine = !markerLines[number + 1];
|
||||
if (hasMarker) {
|
||||
let markerLine = "";
|
||||
if (Array.isArray(hasMarker)) {
|
||||
const markerSpacing = line.slice(0, Math.max(hasMarker[0] - 1, 0)).replace(/[^\t]/g, " ");
|
||||
const numberOfMarkers = hasMarker[1] || 1;
|
||||
markerLine = ["\n ", defs.gutter(gutter.replace(/\d/g, " ")), " ", markerSpacing, defs.marker("^").repeat(numberOfMarkers)].join("");
|
||||
if (lastMarkerLine && opts.message) {
|
||||
markerLine += " " + defs.message(opts.message);
|
||||
}
|
||||
}
|
||||
return [defs.marker(">"), defs.gutter(gutter), line.length > 0 ? ` ${line}` : "", markerLine].join("");
|
||||
} else {
|
||||
return ` ${defs.gutter(gutter)}${line.length > 0 ? ` ${line}` : ""}`;
|
||||
}
|
||||
}).join("\n");
|
||||
if (opts.message && !hasColumns) {
|
||||
frame = `${" ".repeat(numberMaxWidth + 1)}${opts.message}\n${frame}`;
|
||||
}
|
||||
if (shouldHighlight) {
|
||||
return defs.reset(frame);
|
||||
} else {
|
||||
return frame;
|
||||
}
|
||||
}
|
||||
function index (rawLines, lineNumber, colNumber, opts = {}) {
|
||||
if (!deprecationWarningShown) {
|
||||
deprecationWarningShown = true;
|
||||
const message = "Passing lineNumber and colNumber is deprecated to @babel/code-frame. Please use `codeFrameColumns`.";
|
||||
if (process.emitWarning) {
|
||||
process.emitWarning(message, "DeprecationWarning");
|
||||
} else {
|
||||
const deprecationError = new Error(message);
|
||||
deprecationError.name = "DeprecationWarning";
|
||||
console.warn(new Error(message));
|
||||
}
|
||||
}
|
||||
colNumber = Math.max(colNumber, 0);
|
||||
const location = {
|
||||
start: {
|
||||
column: colNumber,
|
||||
line: lineNumber
|
||||
}
|
||||
};
|
||||
return codeFrameColumns(rawLines, location, opts);
|
||||
}
|
||||
|
||||
exports.codeFrameColumns = codeFrameColumns;
|
||||
exports.default = index;
|
||||
exports.highlight = highlight;
|
||||
//# sourceMappingURL=index.js.map
|
||||
1
frontend/node_modules/@babel/code-frame/lib/index.js.map
generated
vendored
Normal file
1
frontend/node_modules/@babel/code-frame/lib/index.js.map
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
32
frontend/node_modules/@babel/code-frame/package.json
generated
vendored
Normal file
32
frontend/node_modules/@babel/code-frame/package.json
generated
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"name": "@babel/code-frame",
|
||||
"version": "7.29.0",
|
||||
"description": "Generate errors that contain a code frame that point to source locations.",
|
||||
"author": "The Babel Team (https://babel.dev/team)",
|
||||
"homepage": "https://babel.dev/docs/en/next/babel-code-frame",
|
||||
"bugs": "https://github.com/babel/babel/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen",
|
||||
"license": "MIT",
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/babel/babel.git",
|
||||
"directory": "packages/babel-code-frame"
|
||||
},
|
||||
"main": "./lib/index.js",
|
||||
"dependencies": {
|
||||
"@babel/helper-validator-identifier": "^7.28.5",
|
||||
"js-tokens": "^4.0.0",
|
||||
"picocolors": "^1.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"charcodes": "^0.2.0",
|
||||
"import-meta-resolve": "^4.1.0",
|
||||
"strip-ansi": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
},
|
||||
"type": "commonjs"
|
||||
}
|
||||
22
frontend/node_modules/@babel/compat-data/LICENSE
generated
vendored
Normal file
22
frontend/node_modules/@babel/compat-data/LICENSE
generated
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2014-present Sebastian McKenzie and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
19
frontend/node_modules/@babel/compat-data/README.md
generated
vendored
Normal file
19
frontend/node_modules/@babel/compat-data/README.md
generated
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# @babel/compat-data
|
||||
|
||||
> The compat-data to determine required Babel plugins
|
||||
|
||||
See our website [@babel/compat-data](https://babeljs.io/docs/babel-compat-data) for more information.
|
||||
|
||||
## Install
|
||||
|
||||
Using npm:
|
||||
|
||||
```sh
|
||||
npm install --save @babel/compat-data
|
||||
```
|
||||
|
||||
or using yarn:
|
||||
|
||||
```sh
|
||||
yarn add @babel/compat-data
|
||||
```
|
||||
2
frontend/node_modules/@babel/compat-data/corejs2-built-ins.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/corejs2-built-ins.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file as Babel 8 drop support of core-js 2
|
||||
module.exports = require("./data/corejs2-built-ins.json");
|
||||
2
frontend/node_modules/@babel/compat-data/corejs3-shipped-proposals.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/corejs3-shipped-proposals.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file now that it is included in babel-plugin-polyfill-corejs3
|
||||
module.exports = require("./data/corejs3-shipped-proposals.json");
|
||||
2
frontend/node_modules/@babel/compat-data/native-modules.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/native-modules.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
|
||||
module.exports = require("./data/native-modules.json");
|
||||
2
frontend/node_modules/@babel/compat-data/overlapping-plugins.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/overlapping-plugins.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
|
||||
module.exports = require("./data/overlapping-plugins.json");
|
||||
40
frontend/node_modules/@babel/compat-data/package.json
generated
vendored
Normal file
40
frontend/node_modules/@babel/compat-data/package.json
generated
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"name": "@babel/compat-data",
|
||||
"version": "7.29.0",
|
||||
"author": "The Babel Team (https://babel.dev/team)",
|
||||
"license": "MIT",
|
||||
"description": "The compat-data to determine required Babel plugins",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/babel/babel.git",
|
||||
"directory": "packages/babel-compat-data"
|
||||
},
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
},
|
||||
"exports": {
|
||||
"./plugins": "./plugins.js",
|
||||
"./native-modules": "./native-modules.js",
|
||||
"./corejs2-built-ins": "./corejs2-built-ins.js",
|
||||
"./corejs3-shipped-proposals": "./corejs3-shipped-proposals.js",
|
||||
"./overlapping-plugins": "./overlapping-plugins.js",
|
||||
"./plugin-bugfixes": "./plugin-bugfixes.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build-data": "./scripts/download-compat-table.sh && node ./scripts/build-data.mjs && node ./scripts/build-modules-support.mjs && node ./scripts/build-bugfixes-targets.mjs"
|
||||
},
|
||||
"keywords": [
|
||||
"babel",
|
||||
"compat-table",
|
||||
"compat-data"
|
||||
],
|
||||
"devDependencies": {
|
||||
"@mdn/browser-compat-data": "^6.0.8",
|
||||
"core-js-compat": "^3.48.0",
|
||||
"electron-to-chromium": "^1.5.278"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
},
|
||||
"type": "commonjs"
|
||||
}
|
||||
2
frontend/node_modules/@babel/compat-data/plugin-bugfixes.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/plugin-bugfixes.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
|
||||
module.exports = require("./data/plugin-bugfixes.json");
|
||||
2
frontend/node_modules/@babel/compat-data/plugins.js
generated
vendored
Normal file
2
frontend/node_modules/@babel/compat-data/plugins.js
generated
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
// Todo (Babel 8): remove this file, in Babel 8 users import the .json directly
|
||||
module.exports = require("./data/plugins.json");
|
||||
22
frontend/node_modules/@babel/core/LICENSE
generated
vendored
Normal file
22
frontend/node_modules/@babel/core/LICENSE
generated
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2014-present Sebastian McKenzie and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
19
frontend/node_modules/@babel/core/README.md
generated
vendored
Normal file
19
frontend/node_modules/@babel/core/README.md
generated
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# @babel/core
|
||||
|
||||
> Babel compiler core.
|
||||
|
||||
See our website [@babel/core](https://babeljs.io/docs/babel-core) for more information or the [issues](https://github.com/babel/babel/issues?utf8=%E2%9C%93&q=is%3Aissue+label%3A%22pkg%3A%20core%22+is%3Aopen) associated with this package.
|
||||
|
||||
## Install
|
||||
|
||||
Using npm:
|
||||
|
||||
```sh
|
||||
npm install --save-dev @babel/core
|
||||
```
|
||||
|
||||
or using yarn:
|
||||
|
||||
```sh
|
||||
yarn add @babel/core --dev
|
||||
```
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue