go integration and wikipedia

This commit is contained in:
jlimolina 2026-03-28 18:30:07 +01:00
parent 47a252e339
commit ee90335b92
7828 changed files with 1307913 additions and 20807 deletions

24
backend/Dockerfile Normal file
View file

@ -0,0 +1,24 @@
FROM golang:1.23 AS builder
WORKDIR /app
RUN apt-get update && apt-get install -y gcc musl-dev git
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -buildvcs=false -o /server ./cmd/server
FROM alpine:3.19
RUN apk add --no-cache ca-certificates tzdata postgresql-client
WORKDIR /app
COPY --from=builder /server .
EXPOSE 8080
CMD ["./server"]

View file

@ -0,0 +1,468 @@
package main
import (
"context"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/mmcdole/gofeed"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
pool *workers.Config
dbPool *pgxpool.Pool
sleepSec = 900 // 15 minutes
batchSize = 10
)
type URLSource struct {
ID int64
Nombre string
URL string
CategoriaID *int64
PaisID *int64
Idioma *string
}
func init() {
logger = log.New(os.Stdout, "[DISCOVERY] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("DISCOVERY_INTERVAL", 900)
batchSize = getEnvInt("DISCOVERY_BATCH", 10)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getPendingURLs(ctx context.Context) ([]URLSource, error) {
rows, err := dbPool.Query(ctx, `
SELECT id, nombre, url, categoria_id, pais_id, idioma
FROM fuentes_url
WHERE active = TRUE
ORDER BY
CASE
WHEN last_check IS NULL THEN 1
WHEN last_status = 'error' THEN 2
WHEN last_status = 'no_feeds' THEN 3
ELSE 4
END,
last_check ASC NULLS FIRST
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var sources []URLSource
for rows.Next() {
var s URLSource
if err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma); err != nil {
continue
}
sources = append(sources, s)
}
return sources, nil
}
func updateURLStatus(ctx context.Context, urlID int64, status, message string, httpCode int) error {
_, err := dbPool.Exec(ctx, `
UPDATE fuentes_url
SET last_check = NOW(),
last_status = $1,
status_message = $2,
last_http_code = $3
WHERE id = $4
`, status, message, httpCode, urlID)
return err
}
func discoverFeeds(pageURL string) ([]string, error) {
client := &http.Client{
Timeout: 15 * time.Second,
}
req, err := http.NewRequest("GET", pageURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml, text/html")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Try to parse as feed first
parser := gofeed.NewParser()
feed, err := parser.Parse(resp.Body)
if err == nil && feed != nil && len(feed.Items) > 0 {
// It's a valid feed
return []string{pageURL}, nil
}
// If not a feed, try to find feeds in HTML
return findFeedLinksInHTML(pageURL)
}
func findFeedLinksInHTML(baseURL string) ([]string, error) {
// Simple feed link finder - returns empty for now
// In production, use goquery to parse HTML and find RSS/Atom links
return []string{}, nil
}
func parseFeed(feedURL string) (*gofeed.Feed, error) {
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; RSS2Bot/1.0)")
req.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
parser := gofeed.NewParser()
return parser.Parse(resp.Body)
}
func getFeedMetadata(feedURL string) (title, description, language string, entryCount int, err error) {
feed, err := parseFeed(feedURL)
if err != nil {
return "", "", "", 0, err
}
title = feed.Title
if title == "" {
title = "Feed sin título"
}
description = feed.Description
if len(description) > 500 {
description = description[:500]
}
language = feed.Language
entryCount = len(feed.Items)
return title, description, language, entryCount, nil
}
func analyzeFeed(title, url, description string) (country, category string) {
// Simple heuristics - in production use ML or API
lowerTitle := strings.ToLower(title)
lowerDesc := strings.ToLower(description)
combined := lowerTitle + " " + lowerDesc
// Detect country
countries := map[string][]string{
"España": {"españa", "español", "madrid", "barcelona"},
"Argentina": {"argentino", "buenos aires"},
"México": {"méxico", "mexicano", "cdmx", "ciudad de méxico"},
"Colombia": {"colombiano", "bogotá"},
"Chile": {"chileno", "santiago"},
"Perú": {"peruano", "lima"},
"EE.UU.": {"estados unidos", "washington", "trump", "biden"},
"Reino Unido": {"reino unido", "londres", "uk"},
"Francia": {"francia", "parís"},
"Alemania": {"alemania", "berlín"},
}
for country, keywords := range countries {
for _, kw := range keywords {
if strings.Contains(combined, kw) {
return country, ""
}
}
}
return "", ""
}
func getCountryIDByName(ctx context.Context, countryName string) (*int64, error) {
var id int64
err := dbPool.QueryRow(ctx, "SELECT id FROM paises WHERE LOWER(nombre) = LOWER($1)", countryName).Scan(&id)
if err != nil {
return nil, err
}
return &id, nil
}
func getCategoryIDByName(ctx context.Context, categoryName string) (*int64, error) {
var id int64
err := dbPool.QueryRow(ctx, "SELECT id FROM categorias WHERE LOWER(nombre) = LOWER($1)", categoryName).Scan(&id)
if err != nil {
return nil, err
}
return &id, nil
}
func createPendingFeed(ctx context.Context, fuenteURLID int64, feedURL string, metadata map[string]interface{}) error {
feedTitle := metadata["title"].(string)
if feedTitle == "" {
feedTitle = "Feed sin título"
}
description := ""
if d, ok := metadata["description"].(string); ok {
description = d
}
language := ""
if l, ok := metadata["language"].(string); ok {
language = l
}
entryCount := 0
if c, ok := metadata["entry_count"].(int); ok {
entryCount = c
}
detectedCountry := ""
if dc, ok := metadata["detected_country"].(string); ok {
detectedCountry = dc
}
var detectedCountryID *int64
if detectedCountry != "" {
if cid, err := getCountryIDByName(ctx, detectedCountry); err == nil {
detectedCountryID = cid
}
}
suggestedCategory := ""
if sc, ok := metadata["suggested_category"].(string); ok {
suggestedCategory = sc
}
var suggestedCategoryID *int64
if suggestedCategory != "" {
if caid, err := getCategoryIDByName(ctx, suggestedCategory); err == nil {
suggestedCategoryID = caid
}
}
_, err := dbPool.Exec(ctx, `
INSERT INTO feeds_pending (
fuente_url_id, feed_url, feed_title, feed_description,
feed_language, feed_type, entry_count,
detected_country_id, suggested_categoria_id,
discovered_at
)
VALUES ($1, $2, $3, $4, $5, 'rss', $6, $7, $8, NOW())
ON CONFLICT (feed_url) DO UPDATE
SET feed_title = EXCLUDED.feed_title,
discovered_at = NOW()
`, fuenteURLID, feedURL, feedTitle, description, language, entryCount, detectedCountryID, suggestedCategoryID)
return err
}
func createFeedDirectly(ctx context.Context, feedURL string, fuenteURLID *int64, categoriaID, paisID *int64, idioma *string) (bool, error) {
title, description, language, _, err := getFeedMetadata(feedURL)
if err != nil {
return false, err
}
if language == "" && idioma != nil {
language = *idioma
}
var feedID int64
err = dbPool.QueryRow(ctx, `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, fuente_url_id, activo)
VALUES ($1, $2, $3, $4, $5, $6, $7, TRUE)
ON CONFLICT (url) DO NOTHING
RETURNING id
`, title, description, feedURL, categoriaID, paisID, language, fuenteURLID).Scan(&feedID)
if err != nil {
return false, err
}
return feedID > 0, nil
}
func processURLSource(ctx context.Context, source URLSource) {
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
// Try to find feeds on this URL
feeds, err := discoverFeeds(source.URL)
if err != nil {
logger.Printf("Error discovering feeds: %v", err)
updateURLStatus(ctx, source.ID, "error", err.Error()[:200], 0)
return
}
if len(feeds) == 0 {
logger.Printf("No feeds found for: %s", source.URL)
updateURLStatus(ctx, source.ID, "no_feeds", "No feeds found", 200)
return
}
logger.Printf("Found %d feeds for %s", len(feeds), source.URL)
maxFeeds := getEnvInt("MAX_FEEDS_PER_URL", 5)
if len(feeds) > maxFeeds {
feeds = feeds[:maxFeeds]
}
autoApprove := source.CategoriaID != nil && source.PaisID != nil
created := 0
pending := 0
existing := 0
errors := 0
for _, feedURL := range feeds {
// Get feed metadata
title, description, language, entryCount, err := getFeedMetadata(feedURL)
if err != nil {
logger.Printf("Error parsing feed %s: %v", feedURL, err)
errors++
continue
}
// Analyze for country/category
detectedCountry, suggestedCategory := analyzeFeed(title, feedURL, description)
metadata := map[string]interface{}{
"title": title,
"description": description,
"language": language,
"entry_count": entryCount,
"detected_country": detectedCountry,
"suggested_category": suggestedCategory,
}
if !autoApprove {
// Create pending feed for review
if err := createPendingFeed(ctx, source.ID, feedURL, metadata); err != nil {
logger.Printf("Error creating pending feed: %v", err)
errors++
} else {
pending++
}
} else {
// Create feed directly
createdFeed, err := createFeedDirectly(ctx, feedURL, &source.ID, source.CategoriaID, source.PaisID, source.Idioma)
if err != nil {
logger.Printf("Error creating feed: %v", err)
errors++
} else if createdFeed {
created++
} else {
existing++
}
}
time.Sleep(1 * time.Second) // Rate limiting
}
// Update status
var status string
var message string
if created > 0 || pending > 0 {
status = "success"
parts := []string{}
if created > 0 {
parts = append(parts, fmt.Sprintf("%d created", created))
}
if pending > 0 {
parts = append(parts, fmt.Sprintf("%d pending", pending))
}
message = strings.Join(parts, ", ")
} else if existing > 0 {
status = "existing"
message = fmt.Sprintf("%d already existed", existing)
} else {
status = "error"
message = fmt.Sprintf("%d errors", errors)
}
updateURLStatus(ctx, source.ID, status, message, 200)
logger.Printf("Processed %s: created=%d, pending=%d, existing=%d, errors=%d",
source.URL, created, pending, existing, errors)
}
func main() {
loadConfig()
logger.Println("Starting RSS Discovery Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: interval=%ds, batch=%d", sleepSec, batchSize)
ticker := time.NewTicker(time.Duration(sleepSec) * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sources, err := getPendingURLs(ctx)
if err != nil {
logger.Printf("Error fetching URLs: %v", err)
continue
}
if len(sources) == 0 {
logger.Println("No pending URLs to process")
continue
}
logger.Printf("Processing %d sources", len(sources))
for _, source := range sources {
processURLSource(ctx, source)
time.Sleep(2 * time.Second)
}
}
}
}

391
backend/cmd/qdrant/main.go Normal file
View file

@ -0,0 +1,391 @@
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"syscall"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
qdrantURL string
ollamaURL string
collection = "news_vectors"
sleepSec = 30
batchSize = 100
)
func init() {
logger = log.New(os.Stdout, "[QDRANT] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("QDRANT_SLEEP", 30)
batchSize = getEnvInt("QDRANT_BATCH", 100)
qdrantHost := getEnv("QDRANT_HOST", "localhost")
qdrantPort := getEnvInt("QDRANT_PORT", 6333)
qdrantURL = fmt.Sprintf("http://%s:%d", qdrantHost, qdrantPort)
ollamaURL = getEnv("OLLAMA_URL", "http://ollama:11434")
collection = getEnv("QDRANT_COLLECTION", "news_vectors")
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
type Translation struct {
ID int64
NoticiaID int64
Lang string
Titulo string
Resumen string
URL string
Fecha *time.Time
FuenteNombre string
CategoriaID *int64
PaisID *int64
}
func getPendingTranslations(ctx context.Context) ([]Translation, error) {
rows, err := dbPool.Query(ctx, `
SELECT
t.id as traduccion_id,
t.noticia_id,
TRIM(t.lang_to) as lang,
t.titulo_trad as titulo,
t.resumen_trad as resumen,
n.url,
n.fecha,
n.fuente_nombre,
n.categoria_id,
n.pais_id
FROM traducciones t
INNER JOIN noticias n ON t.noticia_id = n.id
WHERE t.vectorized = FALSE
AND t.status = 'done'
ORDER BY t.created_at ASC
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var translations []Translation
for rows.Next() {
var t Translation
if err := rows.Scan(
&t.ID, &t.NoticiaID, &t.Lang, &t.Titulo, &t.Resumen,
&t.URL, &t.Fecha, &t.FuenteNombre, &t.CategoriaID, &t.PaisID,
); err != nil {
continue
}
translations = append(translations, t)
}
return translations, nil
}
type EmbeddingRequest struct {
Model string `json:"model"`
Input string `json:"input"`
}
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
func generateEmbedding(text string) ([]float64, error) {
reqBody := EmbeddingRequest{
Model: "mxbai-embed-large",
Input: text,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 60 * time.Second}
resp, err := client.Post(ollamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Ollama returned status %d", resp.StatusCode)
}
var result EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Embedding, nil
}
type QdrantPoint struct {
ID interface{} `json:"id"`
Vector []float64 `json:"vector"`
Payload map[string]interface{} `json:"payload"`
}
type QdrantUpsertRequest struct {
Points []QdrantPoint `json:"points"`
}
func ensureCollection() error {
req, err := http.NewRequest("GET", qdrantURL+"/collections/"+collection, nil)
if err != nil {
return err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == 200 {
logger.Printf("Collection %s already exists", collection)
return nil
}
// Get embedding dimension
emb, err := generateEmbedding("test")
if err != nil {
return fmt.Errorf("failed to get embedding dimension: %w", err)
}
dimension := len(emb)
// Create collection
createReq := map[string]interface{}{
"name": collection,
"vectors": map[string]interface{}{
"size": dimension,
"distance": "Cosine",
},
}
body, _ := json.Marshal(createReq)
resp2, err := http.Post(qdrantURL+"/collections", "application/json", bytes.NewReader(body))
if err != nil {
return err
}
defer resp2.Body.Close()
logger.Printf("Created collection %s with dimension %d", collection, dimension)
return nil
}
func uploadToQdrant(translations []Translation, embeddings [][]float64) error {
points := make([]QdrantPoint, 0, len(translations))
for i, t := range translations {
if embeddings[i] == nil {
continue
}
pointID := uuid.New().String()
payload := map[string]interface{}{
"news_id": t.NoticiaID,
"traduccion_id": t.ID,
"titulo": t.Titulo,
"resumen": t.Resumen,
"url": t.URL,
"fuente_nombre": t.FuenteNombre,
"lang": t.Lang,
}
if t.Fecha != nil {
payload["fecha"] = t.Fecha.Format(time.RFC3339)
}
if t.CategoriaID != nil {
payload["categoria_id"] = *t.CategoriaID
}
if t.PaisID != nil {
payload["pais_id"] = *t.PaisID
}
points = append(points, QdrantPoint{
ID: pointID,
Vector: embeddings[i],
Payload: payload,
})
}
if len(points) == 0 {
return nil
}
reqBody := QdrantUpsertRequest{Points: points}
body, err := json.Marshal(reqBody)
if err != nil {
return err
}
url := fmt.Sprintf("%s/collections/%s/points", qdrantURL, collection)
resp, err := http.Post(url, "application/json", bytes.NewReader(body))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 && resp.StatusCode != 202 {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("Qdrant returned status %d: %s", resp.StatusCode, string(respBody))
}
return nil
}
func updateTranslationStatus(ctx context.Context, translations []Translation, pointIDs []string) error {
for i, t := range translations {
if i >= len(pointIDs) || pointIDs[i] == "" {
continue
}
_, err := dbPool.Exec(ctx, `
UPDATE traducciones
SET
vectorized = TRUE,
vectorization_date = NOW(),
qdrant_point_id = $1
WHERE id = $2
`, pointIDs[i], t.ID)
if err != nil {
logger.Printf("Error updating translation %d: %v", t.ID, err)
}
}
return nil
}
func getStats(ctx context.Context) (total, vectorized, pending int, err error) {
err = dbPool.QueryRow(ctx, `
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE vectorized = TRUE) as vectorized,
COUNT(*) FILTER (WHERE vectorized = FALSE AND status = 'done') as pending
FROM traducciones
WHERE lang_to = 'es'
`).Scan(&total, &vectorized, &pending)
return total, vectorized, pending, err
}
func main() {
loadConfig()
logger.Println("Starting Qdrant Vectorization Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
if err := ensureCollection(); err != nil {
logger.Printf("Warning: Could not ensure collection: %v", err)
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: qdrant=%s, ollama=%s, collection=%s, sleep=%ds, batch=%d",
qdrantURL, ollamaURL, collection, sleepSec, batchSize)
totalProcessed := 0
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
translations, err := getPendingTranslations(ctx)
if err != nil {
logger.Printf("Error fetching pending translations: %v", err)
continue
}
if len(translations) == 0 {
logger.Println("No pending translations to process")
continue
}
logger.Printf("Processing %d translations...", len(translations))
// Generate embeddings
embeddings := make([][]float64, len(translations))
for i, t := range translations {
text := fmt.Sprintf("%s %s", t.Titulo, t.Resumen)
emb, err := generateEmbedding(text)
if err != nil {
logger.Printf("Error generating embedding for %d: %v", t.ID, err)
continue
}
embeddings[i] = emb
}
// Upload to Qdrant
if err := uploadToQdrant(translations, embeddings); err != nil {
logger.Printf("Error uploading to Qdrant: %v", err)
continue
}
// Update DB status
pointIDs := make([]string, len(translations))
for i := range translations {
pointIDs[i] = uuid.New().String()
}
if err := updateTranslationStatus(ctx, translations, pointIDs); err != nil {
logger.Printf("Error updating status: %v", err)
}
totalProcessed += len(translations)
logger.Printf("Processed %d translations (total: %d)", len(translations), totalProcessed)
total, vectorized, pending, err := getStats(ctx)
if err == nil {
logger.Printf("Stats: total=%d, vectorized=%d, pending=%d", total, vectorized, pending)
}
}
}
}

384
backend/cmd/related/main.go Normal file
View file

@ -0,0 +1,384 @@
package main
import (
"context"
"log"
"os"
"os/signal"
"strconv"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
sleepSec = 10
topK = 10
batchSz = 200
minScore = 0.0
)
func init() {
logger = log.New(os.Stdout, "[RELATED] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("RELATED_SLEEP", 10)
topK = getEnvInt("RELATED_TOPK", 10)
batchSz = getEnvInt("RELATED_BATCH", 200)
minScore = getEnvFloat("RELATED_MIN_SCORE", 0.0)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getEnvFloat(key string, defaultValue float64) float64 {
if value := os.Getenv(key); value != "" {
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
}
return defaultValue
}
type Translation struct {
ID int64
Titulo string
Resumen string
Embedding []float64
}
func ensureSchema(ctx context.Context) error {
_, err := dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS related_noticias (
traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
related_traduccion_id INTEGER REFERENCES traducciones(id) ON DELETE CASCADE,
score FLOAT NOT NULL DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (traduccion_id, related_traduccion_id)
);
`)
if err != nil {
return err
}
// Ensure traduccion_embeddings table exists
_, err = dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS traduccion_embeddings (
id SERIAL PRIMARY KEY,
traduccion_id INTEGER NOT NULL REFERENCES traducciones(id) ON DELETE CASCADE,
model TEXT NOT NULL,
dim INTEGER NOT NULL,
embedding DOUBLE PRECISION[] NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE (traduccion_id, model)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE INDEX IF NOT EXISTS idx_tr_emb_model ON traduccion_embeddings(model);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE INDEX IF NOT EXISTS idx_tr_emb_traduccion_id ON traduccion_embeddings(traduccion_id);
`)
return err
}
func fetchAllEmbeddings(ctx context.Context, model string) ([]Translation, error) {
rows, err := dbPool.Query(ctx, `
SELECT e.traduccion_id,
COALESCE(NULLIF(t.titulo_trad,''), ''),
COALESCE(NULLIF(t.resumen_trad,''), ''),
e.embedding
FROM traduccion_embeddings e
JOIN traducciones t ON t.id = e.traduccion_id
WHERE e.model = $1
AND t.status = 'done'
AND t.lang_to = 'es'
`, model)
if err != nil {
return nil, err
}
defer rows.Close()
var translations []Translation
for rows.Next() {
var t Translation
if err := rows.Scan(&t.ID, &t.Titulo, &t.Resumen, &t.Embedding); err != nil {
continue
}
translations = append(translations, t)
}
return translations, nil
}
func fetchPendingIDs(ctx context.Context, model string, limit int) ([]int64, error) {
rows, err := dbPool.Query(ctx, `
SELECT t.id
FROM traducciones t
JOIN traduccion_embeddings e ON e.traduccion_id = t.id AND e.model = $1
LEFT JOIN related_noticias r ON r.traduccion_id = t.id
WHERE t.lang_to = 'es'
AND t.status = 'done'
GROUP BY t.id
HAVING COUNT(r.related_traduccion_id) = 0
ORDER BY t.id DESC
LIMIT $2
`, model, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var ids []int64
for rows.Next() {
var id int64
if err := rows.Scan(&id); err != nil {
continue
}
ids = append(ids, id)
}
return ids, nil
}
func cosineSimilarity(a, b []float64) float64 {
if len(a) != len(b) || len(a) == 0 {
return 0
}
var dotProduct, normA, normB float64
for i := range a {
dotProduct += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}
normA = sqrt(normA)
normB = sqrt(normB)
if normA == 0 || normB == 0 {
return 0
}
return dotProduct / (normA * normB)
}
func sqrt(x float64) float64 {
if x <= 0 {
return 0
}
// Simple Newton-Raphson
z := x
for i := 0; i < 20; i++ {
z = (z + x/z) / 2
}
return z
}
func findTopK(query Embedding, candidates []Translation, k int, minScore float64) []struct {
ID int64
Score float64
} {
type sim struct {
id int64
score float64
}
var similarities []sim
for _, c := range candidates {
if int64(c.ID) == query.ID {
continue
}
score := cosineSimilarity(query.Embedding, c.Embedding)
if score <= minScore {
continue
}
similarities = append(similarities, sim{int64(c.ID), score})
}
// Sort by score descending
for i := 0; i < len(similarities)-1; i++ {
for j := i + 1; j < len(similarities); j++ {
if similarities[j].score > similarities[i].score {
similarities[i], similarities[j] = similarities[j], similarities[i]
}
}
}
if len(similarities) > k {
similarities = similarities[:k]
}
result := make([]struct {
ID int64
Score float64
}, len(similarities))
for i, s := range similarities {
result[i] = struct {
ID int64
Score float64
}{s.id, s.score}
}
return result
}
type Embedding struct {
ID int64
Embedding []float64
}
func findEmbeddingByID(embeddings []Embedding, id int64) *Embedding {
for i := range embeddings {
if embeddings[i].ID == id {
return &embeddings[i]
}
}
return nil
}
func insertRelated(ctx context.Context, traduccionID int64, related []struct {
ID int64
Score float64
}) error {
if len(related) == 0 {
return nil
}
for _, r := range related {
if r.Score <= 0 {
continue
}
_, err := dbPool.Exec(ctx, `
INSERT INTO related_noticias (traduccion_id, related_traduccion_id, score)
VALUES ($1, $2, $3)
ON CONFLICT (traduccion_id, related_traduccion_id)
DO UPDATE SET score = EXCLUDED.score
`, traduccionID, r.ID, r.Score)
if err != nil {
logger.Printf("Error inserting related: %v", err)
}
}
return nil
}
func processBatch(ctx context.Context, model string) (int, error) {
// Fetch all embeddings once
allTranslations, err := fetchAllEmbeddings(ctx, model)
if err != nil {
return 0, err
}
if len(allTranslations) == 0 {
return 0, nil
}
// Convert to Embedding format for easier lookup
var allEmbeddings []Embedding
for _, t := range allTranslations {
if t.Embedding != nil {
allEmbeddings = append(allEmbeddings, Embedding{ID: t.ID, Embedding: t.Embedding})
}
}
// Get pending IDs
pendingIDs, err := fetchPendingIDs(ctx, model, batchSz)
if err != nil {
return 0, err
}
if len(pendingIDs) == 0 {
return 0, nil
}
processed := 0
for _, tradID := range pendingIDs {
emb := findEmbeddingByID(allEmbeddings, tradID)
if emb == nil {
continue
}
topRelated := findTopK(*emb, allTranslations, topK, minScore)
if err := insertRelated(ctx, tradID, topRelated); err != nil {
logger.Printf("Error inserting related for %d: %v", tradID, err)
continue
}
processed++
}
return processed, nil
}
func main() {
loadConfig()
logger.Println("Starting Related News Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
// Ensure schema
if err := ensureSchema(ctx); err != nil {
logger.Printf("Error ensuring schema: %v", err)
}
model := os.Getenv("EMB_MODEL")
if model == "" {
model = "mxbai-embed-large"
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, topK=%d, batch=%d, model=%s", sleepSec, topK, batchSz, model)
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
count, err := processBatch(ctx, model)
if err != nil {
logger.Printf("Error processing batch: %v", err)
continue
}
if count > 0 {
logger.Printf("Generated related news for %d translations", count)
}
}
}
}

330
backend/cmd/scraper/main.go Normal file
View file

@ -0,0 +1,330 @@
package main
import (
"context"
"crypto/md5"
"fmt"
"log"
"net/http"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *workers.Config
pool *pgxpool.Pool
sleepInterval = 60
batchSize = 10
)
type URLSource struct {
ID int64
Nombre string
URL string
CategoriaID *int64
PaisID *int64
Idioma *string
Active bool
}
type Article struct {
Title string
Summary string
Content string
URL string
ImageURL string
PubDate *time.Time
}
func init() {
logger = log.New(os.Stdout, "[SCRAPER] ", log.LstdFlags)
logger.SetOutput(os.Stdout)
}
func loadConfig() {
sleepInterval = getEnvInt("SCRAPER_SLEEP", 60)
batchSize = getEnvInt("SCRAPER_BATCH", 10)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getActiveURLs(ctx context.Context) ([]URLSource, error) {
rows, err := pool.Query(ctx, `
SELECT id, nombre, url, categoria_id, pais_id, idioma, activo
FROM fuentes_url
WHERE activo = true
`)
if err != nil {
return nil, err
}
defer rows.Close()
var sources []URLSource
for rows.Next() {
var s URLSource
err := rows.Scan(&s.ID, &s.Nombre, &s.URL, &s.CategoriaID, &s.PaisID, &s.Idioma, &s.Active)
if err != nil {
continue
}
sources = append(sources, s)
}
return sources, nil
}
func updateSourceStatus(ctx context.Context, sourceID int64, status, message string, httpCode int) error {
_, err := pool.Exec(ctx, `
UPDATE fuentes_url
SET last_check = NOW(),
last_status = $1,
status_message = $2,
last_http_code = $3
WHERE id = $4
`, status, message, httpCode, sourceID)
return err
}
func extractArticle(source URLSource) (*Article, error) {
client := &http.Client{
Timeout: 30 * time.Second,
}
req, err := http.NewRequest("GET", source.URL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
article := &Article{
URL: source.URL,
}
// Extract title
article.Title = doc.Find("meta[property='og:title']").First().AttrOr("content", "")
if article.Title == "" {
article.Title = doc.Find("meta[name='title']").First().AttrOr("content", "")
}
if article.Title == "" {
article.Title = doc.Find("h1").First().Text()
}
if article.Title == "" {
article.Title = doc.Find("title").First().Text()
}
// Extract description/summary
article.Summary = doc.Find("meta[property='og:description']").First().AttrOr("content", "")
if article.Summary == "" {
article.Summary = doc.Find("meta[name='description']").First().AttrOr("content", "")
}
// Extract image
article.ImageURL = doc.Find("meta[property='og:image']").First().AttrOr("content", "")
// Extract main content - try common selectors
contentSelectors := []string{
"article",
"[role='main']",
"main",
".article-content",
".post-content",
".entry-content",
".content",
"#content",
}
for _, sel := range contentSelectors {
content := doc.Find(sel).First()
if content.Length() > 0 {
article.Content = content.Text()
break
}
}
// Clean up
article.Title = strings.TrimSpace(article.Title)
article.Summary = strings.TrimSpace(article.Summary)
article.Content = strings.TrimSpace(article.Content)
// Truncate summary if too long
if len(article.Summary) > 500 {
article.Summary = article.Summary[:500]
}
return article, nil
}
func saveArticle(ctx context.Context, source URLSource, article *Article) (bool, error) {
finalURL := article.URL
if finalURL == "" {
finalURL = source.URL
}
// Generate ID from URL
articleID := fmt.Sprintf("%x", md5.Sum([]byte(finalURL)))
// Check if exists
var exists bool
err := pool.QueryRow(ctx, "SELECT EXISTS(SELECT 1 FROM noticias WHERE id = $1)", articleID).Scan(&exists)
if err != nil {
return false, err
}
if exists {
return false, nil
}
title := article.Title
if title == "" {
title = "Sin título"
}
summary := article.Summary
if summary == "" && article.Content != "" {
summary = article.Content
if len(summary) > 500 {
summary = summary[:500]
}
}
pubDate := time.Now()
if article.PubDate != nil {
pubDate = *article.PubDate
}
_, err = pool.Exec(ctx, `
INSERT INTO noticias (
id, titulo, resumen, url, fecha, imagen_url,
fuente_nombre, categoria_id, pais_id
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (id) DO NOTHING
`, articleID, title, summary, finalURL, pubDate, article.ImageURL,
source.Nombre, source.CategoriaID, source.PaisID)
if err != nil {
return false, err
}
return true, nil
}
func processSource(ctx context.Context, source URLSource) {
logger.Printf("Processing: %s (%s)", source.Nombre, source.URL)
article, err := extractArticle(source)
if err != nil {
logger.Printf("Error extracting article from %s: %v", source.URL, err)
status := "ERROR"
if strings.Contains(err.Error(), "HTTP") {
status = "ERROR_HTTP"
}
updateSourceStatus(ctx, source.ID, status, err.Error()[:200], 0)
return
}
if article.Title == "" {
logger.Printf("No title found for %s", source.URL)
updateSourceStatus(ctx, source.ID, "ERROR_PARSE", "No title extracted", 200)
return
}
saved, err := saveArticle(ctx, source, article)
if err != nil {
logger.Printf("Error saving article: %v", err)
updateSourceStatus(ctx, source.ID, "ERROR_DB", err.Error()[:200], 0)
return
}
if saved {
logger.Printf("Saved: %s", article.Title)
updateSourceStatus(ctx, source.ID, "OK", "News created successfully", 200)
} else {
logger.Printf("Already exists: %s", article.Title)
updateSourceStatus(ctx, source.ID, "OK", "News already exists", 200)
}
}
func main() {
loadConfig()
logger.Println("Starting Scraper Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
pool = workers.GetPool()
defer workers.Close()
logger.Println("Connected to PostgreSQL")
ctx := context.Background()
// Handle shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, batch=%d", sleepInterval, batchSize)
ticker := time.NewTicker(time.Duration(sleepInterval) * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sources, err := getActiveURLs(ctx)
if err != nil {
logger.Printf("Error fetching URLs: %v", err)
continue
}
if len(sources) == 0 {
logger.Println("No active URLs to process")
continue
}
logger.Printf("Processing %d sources", len(sources))
for _, source := range sources {
processSource(ctx, source)
time.Sleep(2 * time.Second) // Rate limiting
}
}
}
}

190
backend/cmd/server/main.go Normal file
View file

@ -0,0 +1,190 @@
package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/cache"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/handlers"
"github.com/rss2/backend/internal/middleware"
"github.com/rss2/backend/internal/services"
)
func initDB() {
ctx := context.Background()
// Crear tabla entity_aliases si no existe
_, err := db.GetPool().Exec(ctx, `
CREATE TABLE IF NOT EXISTS entity_aliases (
id SERIAL PRIMARY KEY,
canonical_name VARCHAR(255) NOT NULL,
alias VARCHAR(255) NOT NULL,
tipo VARCHAR(50) NOT NULL CHECK (tipo IN ('persona', 'organizacion', 'lugar', 'tema')),
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE(alias, tipo)
)
`)
if err != nil {
log.Printf("Warning: Could not create entity_aliases table: %v", err)
} else {
log.Println("Table entity_aliases ready")
}
// Añadir columna role a users si no existe
_, err = db.GetPool().Exec(ctx, `
ALTER TABLE users ADD COLUMN IF NOT EXISTS role VARCHAR(20) DEFAULT 'user'
`)
if err != nil {
log.Printf("Warning: Could not add role column: %v", err)
} else {
log.Println("Column role ready")
}
// Crear tabla de configuración si no existe
_, err = db.GetPool().Exec(ctx, `
CREATE TABLE IF NOT EXISTS config (
key VARCHAR(100) PRIMARY KEY,
value TEXT,
updated_at TIMESTAMP DEFAULT NOW()
)
`)
if err != nil {
log.Printf("Warning: Could not create config table: %v", err)
} else {
log.Println("Table config ready")
}
// Insertar configuración por defecto si no existe
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_type', 'cpu')
ON CONFLICT (key) DO NOTHING
`)
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_workers', '2')
ON CONFLICT (key) DO NOTHING
`)
db.GetPool().Exec(ctx, `
INSERT INTO config (key, value) VALUES ('translator_status', 'stopped')
ON CONFLICT (key) DO NOTHING
`)
}
func main() {
cfg := config.Load()
if err := db.Connect(cfg.DatabaseURL); err != nil {
log.Fatalf("Failed to connect to database: %v", err)
}
defer db.Close()
log.Println("Connected to PostgreSQL")
// Auto-setup DB tables
initDB()
if err := cache.Connect(cfg.RedisURL); err != nil {
log.Printf("Warning: Failed to connect to Redis: %v", err)
} else {
defer cache.Close()
log.Println("Connected to Redis")
}
services.Init(cfg)
r := gin.Default()
r.Use(middleware.CORSMiddleware())
r.Use(middleware.LoggerMiddleware())
r.GET("/health", func(c *gin.Context) {
c.JSON(200, gin.H{"status": "ok"})
})
api := r.Group("/api")
{
// Serve static images downloaded by wiki_worker
api.StaticFS("/wiki-images", gin.Dir("/app/data/wiki_images", false))
api.POST("/auth/login", handlers.Login)
api.POST("/auth/register", handlers.Register)
api.GET("/auth/check-first-user", handlers.CheckFirstUser)
news := api.Group("/news")
{
news.GET("", handlers.GetNews)
news.GET("/:id", handlers.GetNewsByID)
news.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteNews)
}
feeds := api.Group("/feeds")
{
feeds.GET("", handlers.GetFeeds)
feeds.GET("/export", handlers.ExportFeeds)
feeds.GET("/:id", handlers.GetFeedByID)
feeds.POST("", middleware.AuthRequired(), handlers.CreateFeed)
feeds.POST("/import", middleware.AuthRequired(), handlers.ImportFeeds)
feeds.PUT("/:id", middleware.AuthRequired(), handlers.UpdateFeed)
feeds.DELETE("/:id", middleware.AuthRequired(), handlers.DeleteFeed)
feeds.POST("/:id/toggle", middleware.AuthRequired(), handlers.ToggleFeedActive)
feeds.POST("/:id/reactivate", middleware.AuthRequired(), handlers.ReactivateFeed)
}
api.GET("/search", handlers.SearchNews)
api.GET("/entities", handlers.GetEntities)
api.GET("/stats", handlers.GetStats)
api.GET("/categories", handlers.GetCategories)
api.GET("/countries", handlers.GetCountries)
admin := api.Group("/admin")
admin.Use(middleware.AuthRequired(), middleware.AdminRequired())
{
admin.POST("/aliases", handlers.CreateAlias)
admin.GET("/aliases/export", handlers.ExportAliases)
admin.POST("/aliases/import", handlers.ImportAliases)
admin.POST("/entities/retype", handlers.PatchEntityTipo)
admin.GET("/backup", handlers.BackupDatabase)
admin.GET("/backup/news", handlers.BackupNewsZipped)
admin.GET("/users", handlers.GetUsers)
admin.POST("/users/:id/promote", handlers.PromoteUser)
admin.POST("/users/:id/demote", handlers.DemoteUser)
admin.POST("/reset-db", handlers.ResetDatabase)
admin.GET("/workers/status", handlers.GetWorkerStatus)
admin.POST("/workers/config", handlers.SetWorkerConfig)
admin.POST("/workers/start", handlers.StartWorkers)
admin.POST("/workers/stop", handlers.StopWorkers)
}
auth := api.Group("/auth")
auth.Use(middleware.AuthRequired())
{
auth.GET("/me", handlers.GetCurrentUser)
}
}
middleware.SetJWTSecret(cfg.SecretKey)
port := cfg.ServerPort
addr := fmt.Sprintf(":%s", port)
go func() {
log.Printf("Server starting on %s", addr)
if err := r.Run(addr); err != nil {
log.Fatalf("Failed to start server: %v", err)
}
}()
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("Shutting down server...")
}

383
backend/cmd/topics/main.go Normal file
View file

@ -0,0 +1,383 @@
package main
import (
"context"
"log"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
dbPool *pgxpool.Pool
sleepSec = 10
batchSz = 500
)
type Topic struct {
ID int64
Weight int
Keywords []string
}
type Country struct {
ID int64
Name string
Keywords []string
}
func init() {
logger = log.New(os.Stdout, "[TOPICS] ", log.LstdFlags)
}
func loadConfig() {
sleepSec = getEnvInt("TOPICS_SLEEP", 10)
batchSz = getEnvInt("TOPICS_BATCH", 500)
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func ensureSchema(ctx context.Context) error {
_, err := dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS topics (
id SERIAL PRIMARY KEY,
slug VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(100) NOT NULL,
weight INTEGER DEFAULT 1,
keywords TEXT,
group_name VARCHAR(50)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
CREATE TABLE IF NOT EXISTS news_topics (
noticia_id VARCHAR(32) REFERENCES noticias(id) ON DELETE CASCADE,
topic_id INTEGER REFERENCES topics(id) ON DELETE CASCADE,
score INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (noticia_id, topic_id)
);
`)
if err != nil {
return err
}
_, err = dbPool.Exec(ctx, `
ALTER TABLE noticias ADD COLUMN IF NOT EXISTS topics_processed BOOLEAN DEFAULT FALSE;
`)
return err
}
func loadTopics(ctx context.Context) ([]Topic, error) {
rows, err := dbPool.Query(ctx, "SELECT id, weight, keywords FROM topics")
if err != nil {
return nil, err
}
defer rows.Close()
var topics []Topic
for rows.Next() {
var t Topic
var kwStr *string
if err := rows.Scan(&t.ID, &t.Weight, &kwStr); err != nil {
continue
}
if kwStr != nil {
keywords := strings.Split(*kwStr, ",")
for i := range keywords {
keywords[i] = strings.ToLower(strings.TrimSpace(keywords[i]))
}
t.Keywords = keywords
}
topics = append(topics, t)
}
return topics, nil
}
func loadCountries(ctx context.Context) ([]Country, error) {
rows, err := dbPool.Query(ctx, "SELECT id, nombre FROM paises")
if err != nil {
return nil, err
}
defer rows.Close()
aliases := map[string][]string{
"Estados Unidos": {"eeuu", "ee.uu.", "usa", "estadounidense", "washington"},
"Rusia": {"ruso", "rusa", "moscú", "kremlin"},
"China": {"chino", "china", "pekin", "beijing"},
"Ucrania": {"ucraniano", "kiev", "kyiv"},
"Israel": {"israelí", "tel aviv", "jerusalén"},
"España": {"español", "madrid"},
"Reino Unido": {"uk", "londres", "británico"},
"Francia": {"francés", "parís"},
"Alemania": {"alemán", "berlín"},
"Palestina": {"palestino", "gaza", "cisjordania"},
"Irán": {"iraní", "teherán"},
}
var countries []Country
for rows.Next() {
var c Country
if err := rows.Scan(&c.ID, &c.Name); err != nil {
continue
}
c.Keywords = []string{strings.ToLower(c.Name)}
if kw, ok := aliases[c.Name]; ok {
c.Keywords = append(c.Keywords, kw...)
}
countries = append(countries, c)
}
return countries, nil
}
type NewsItem struct {
ID string
Titulo *string
Resumen *string
}
func fetchPendingNews(ctx context.Context, limit int) ([]NewsItem, error) {
rows, err := dbPool.Query(ctx, `
SELECT id, titulo, resumen
FROM noticias
WHERE topics_processed = FALSE
ORDER BY fecha DESC
LIMIT $1
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var items []NewsItem
for rows.Next() {
var n NewsItem
if err := rows.Scan(&n.ID, &n.Titulo, &n.Resumen); err != nil {
continue
}
items = append(items, n)
}
return items, nil
}
func findTopics(text string, topics []Topic) []struct {
TopicID int64
Score int
} {
text = strings.ToLower(text)
var matches []struct {
TopicID int64
Score int
}
for _, topic := range topics {
count := 0
for _, kw := range topic.Keywords {
if strings.Contains(text, kw) {
count++
}
}
if count > 0 {
matches = append(matches, struct {
TopicID int64
Score int
}{topic.ID, topic.Weight * count})
}
}
return matches
}
func findBestCountry(text string, countries []Country) *int64 {
text = strings.ToLower(text)
bestID := new(int64)
bestCount := 0
for _, c := range countries {
count := 0
for _, kw := range c.Keywords {
if strings.Contains(text, kw) {
count++
}
}
if count > bestCount {
bestCount = count
*bestID = c.ID
}
}
if bestCount > 0 {
return bestID
}
return nil
}
func processBatch(ctx context.Context, topics []Topic, countries []Country) (int, error) {
items, err := fetchPendingNews(ctx, batchSz)
if err != nil {
return 0, err
}
if len(items) == 0 {
return 0, nil
}
type topicMatch struct {
NoticiaID string
TopicID int64
Score int
}
type countryUpdate struct {
PaisID int64
NoticiaID string
}
var topicMatches []topicMatch
var countryUpdates []countryUpdate
var processedIDs []string
for _, item := range items {
var text string
if item.Titulo != nil {
text += *item.Titulo
}
if item.Resumen != nil {
text += " " + *item.Resumen
}
// Find topics
matches := findTopics(text, topics)
for _, m := range matches {
topicMatches = append(topicMatches, topicMatch{item.ID, m.TopicID, m.Score})
}
// Find best country
if countryID := findBestCountry(text, countries); countryID != nil {
countryUpdates = append(countryUpdates, countryUpdate{*countryID, item.ID})
}
processedIDs = append(processedIDs, item.ID)
}
// Insert topic relations
if len(topicMatches) > 0 {
for _, tm := range topicMatches {
_, err := dbPool.Exec(ctx, `
INSERT INTO news_topics (noticia_id, topic_id, score)
VALUES ($1, $2, $3)
ON CONFLICT (noticia_id, topic_id) DO UPDATE SET score = EXCLUDED.score
`, tm.NoticiaID, tm.TopicID, tm.Score)
if err != nil {
logger.Printf("Error inserting topic: %v", err)
}
}
}
// Update country
if len(countryUpdates) > 0 {
for _, cu := range countryUpdates {
_, err := dbPool.Exec(ctx, `
UPDATE noticias SET pais_id = $1 WHERE id = $2
`, cu.PaisID, cu.NoticiaID)
if err != nil {
logger.Printf("Error updating country: %v", err)
}
}
}
// Mark as processed
if len(processedIDs) > 0 {
_, err := dbPool.Exec(ctx, `
UPDATE noticias SET topics_processed = TRUE WHERE id = ANY($1)
`, processedIDs)
if err != nil {
return 0, err
}
}
return len(items), nil
}
func main() {
loadConfig()
logger.Println("Starting Topics Worker")
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
dbPool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
// Ensure schema
if err := ensureSchema(ctx); err != nil {
logger.Printf("Error ensuring schema: %v", err)
}
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Shutting down...")
os.Exit(0)
}()
logger.Printf("Config: sleep=%ds, batch=%d", sleepSec, batchSz)
for {
select {
case <-time.After(time.Duration(sleepSec) * time.Second):
topics, err := loadTopics(ctx)
if err != nil {
logger.Printf("Error loading topics: %v", err)
continue
}
if len(topics) == 0 {
logger.Println("No topics found in DB")
time.Sleep(time.Duration(sleepSec) * time.Second)
continue
}
countries, err := loadCountries(ctx)
if err != nil {
logger.Printf("Error loading countries: %v", err)
continue
}
count, err := processBatch(ctx, topics, countries)
if err != nil {
logger.Printf("Error processing batch: %v", err)
continue
}
if count > 0 {
logger.Printf("Processed %d news items", count)
}
if count < batchSz {
time.Sleep(time.Duration(sleepSec) * time.Second)
}
}
}
}

View file

@ -0,0 +1,267 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rss2/backend/internal/workers"
)
var (
logger *log.Logger
pool *pgxpool.Pool
sleepInterval = 30
batchSize = 50
imagesDir = "/app/data/wiki_images"
)
type WikiSummary struct {
Type string `json:"type"`
Title string `json:"title"`
DisplayTitle string `json:"displaytitle"`
Extract string `json:"extract"`
ContentUrls struct {
Desktop struct {
Page string `json:"page"`
} `json:"desktop"`
} `json:"content_urls"`
Thumbnail *struct {
Source string `json:"source"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"thumbnail"`
}
type Tag struct {
ID int64
Valor string
Tipo string
}
func init() {
logger = log.New(os.Stdout, "[WIKI_WORKER] ", log.LstdFlags)
}
func getPendingTags(ctx context.Context) ([]Tag, error) {
rows, err := pool.Query(ctx, `
SELECT t.id, t.valor, t.tipo
FROM tags t
LEFT JOIN (
SELECT tag_id, COUNT(*) as cnt
FROM tags_noticia
GROUP BY tag_id
) c ON c.tag_id = t.id
WHERE t.tipo IN ('persona', 'organizacion')
AND t.wiki_checked = FALSE
ORDER BY COALESCE(c.cnt, 0) DESC, t.id DESC
LIMIT $1
`, batchSize)
if err != nil {
return nil, err
}
defer rows.Close()
var tags []Tag
for rows.Next() {
var t Tag
if err := rows.Scan(&t.ID, &t.Valor, &t.Tipo); err == nil {
tags = append(tags, t)
}
}
return tags, nil
}
func downloadImage(imgURL, destPath string) error {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", imgURL, nil)
if err != nil {
return err
}
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (https://github.com/proyecto/rss2)")
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("HTTP %d", resp.StatusCode)
}
out, err := os.Create(destPath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return err
}
func fetchWikipediaInfo(valor string) (*WikiSummary, error) {
// Normalize the value to be wiki-compatible
title := strings.ReplaceAll(strings.TrimSpace(valor), " ", "_")
encodedTitle := url.PathEscape(title)
apiURL := fmt.Sprintf("https://es.wikipedia.org/api/rest_v1/page/summary/%s", encodedTitle)
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return nil, err
}
// Per MediaWiki API policy: https://meta.wikimedia.org/wiki/User-Agent_policy
req.Header.Set("User-Agent", "RSS2-WikiWorker/1.0 (pietrelinux@gmail.com)")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == 429 {
return nil, fmt.Errorf("HTTP 429: Too Many Requests (Rate Limited)")
}
if resp.StatusCode == 404 {
return nil, nil // Not found, but handled successfully without error
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
var summary WikiSummary
if err := json.NewDecoder(resp.Body).Decode(&summary); err != nil {
return nil, err
}
// Filter out disambiguation pages
if summary.Type == "disambiguation" {
return nil, nil // Treat as not found to strictly avoid incorrect tooltips
}
return &summary, nil
}
func processTag(ctx context.Context, tag Tag) {
logger.Printf("Procesando tag %d: %s", tag.ID, tag.Valor)
summary, err := fetchWikipediaInfo(tag.Valor)
if err != nil {
logger.Printf("Error al consultar Wikipedia para %s: %v", tag.Valor, err)
return
}
if summary == nil || summary.Extract == "" {
// Not found or disambiguation
_, _ = pool.Exec(ctx, "UPDATE tags SET wiki_checked = TRUE WHERE id = $1", tag.ID)
logger.Printf("No se encontraron resultados válidos en Wikipedia para: %s", tag.Valor)
return
}
var localImagePath *string
if summary.Thumbnail != nil && summary.Thumbnail.Source != "" {
ext := ".jpg"
if strings.HasSuffix(strings.ToLower(summary.Thumbnail.Source), ".png") {
ext = ".png"
}
fileName := fmt.Sprintf("wiki_%d%s", tag.ID, ext)
destPath := filepath.Join(imagesDir, fileName)
if err := downloadImage(summary.Thumbnail.Source, destPath); err != nil {
logger.Printf("Error descargando imagen para %s: %v", tag.Valor, err)
// Guardaremos la URL externa como fallback si falla la descarga
src := summary.Thumbnail.Source
localImagePath = &src
} else {
relativePath := "/api/wiki-images/" + fileName
localImagePath = &relativePath
}
}
wikiURL := summary.ContentUrls.Desktop.Page
_, err = pool.Exec(ctx, `
UPDATE tags
SET wiki_summary = $1,
wiki_url = $2,
image_path = $3,
wiki_checked = TRUE
WHERE id = $4
`, summary.Extract, wikiURL, localImagePath, tag.ID)
if err != nil {
logger.Printf("Error al actualizar la base de datos para tag %d: %v", tag.ID, err)
} else {
logger.Printf("Actualizado con éxito: %s (Imagen: %v)", tag.Valor, localImagePath != nil)
}
}
func main() {
if val := os.Getenv("WIKI_SLEEP"); val != "" {
if sleep, err := fmt.Sscanf(val, "%d", &sleepInterval); err == nil && sleep > 0 {
sleepInterval = sleep
}
}
logger.Println("Iniciando Wiki Worker...")
if err := os.MkdirAll(imagesDir, 0755); err != nil {
logger.Fatalf("Error creando directorio de imágenes: %v", err)
}
cfg := workers.LoadDBConfig()
if err := workers.Connect(cfg); err != nil {
logger.Fatalf("Failed to connect to database: %v", err)
}
pool = workers.GetPool()
defer workers.Close()
ctx := context.Background()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigChan
logger.Println("Cerrando gracefully...")
workers.Close()
os.Exit(0)
}()
logger.Printf("Configuración: sleep=%ds, batch=%d", sleepInterval, batchSize)
for {
tags, err := getPendingTags(ctx)
if err != nil {
logger.Printf("Error recuperando tags pendientes: %v", err)
time.Sleep(10 * time.Second)
continue
}
if len(tags) == 0 {
logger.Printf("No hay tags pendientes. Durmiendo %d segundos...", sleepInterval)
time.Sleep(time.Duration(sleepInterval) * time.Second)
continue
}
logger.Printf("Recuperados %d tags para procesar...", len(tags))
for _, tag := range tags {
processTag(ctx, tag)
time.Sleep(3 * time.Second) // Increased delay to avoid Wikipedia Rate Limits (429)
}
}
}

51
backend/go.mod Normal file
View file

@ -0,0 +1,51 @@
module github.com/rss2/backend
go 1.22
require (
github.com/PuerkitoBio/goquery v1.9.2
github.com/gin-gonic/gin v1.9.1
github.com/golang-jwt/jwt/v5 v5.0.0
github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.4.3
github.com/mmcdole/gofeed v1.2.1
github.com/redis/go-redis/v9 v9.0.5
golang.org/x/crypto v0.26.0
)
require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/bytedance/sonic v1.9.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.14.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mmcdole/goxpp v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/text v0.17.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

156
backend/go.sum Normal file
View file

@ -0,0 +1,156 @@
github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE=
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/bsm/ginkgo/v2 v2.7.0 h1:ItPMPH90RbmZJt5GtkcNvIRuGEdwlBItdNVoyzaNQao=
github.com/bsm/ginkgo/v2 v2.7.0/go.mod h1:AiKlXPm7ItEHNc/2+OkrNG4E0ITzojb9/xWzvQ9XZ9w=
github.com/bsm/gomega v1.26.0 h1:LhQm+AFcgV2M0WyKroMASzAzCAJVpAxQXv4SaI9a69Y=
github.com/bsm/gomega v1.26.0/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang-jwt/jwt/v5 v5.0.0 h1:1n1XNM9hk7O9mnQoNBGolZvzebBQ7p93ULHRc28XJUE=
github.com/golang-jwt/jwt/v5 v5.0.0/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.4.3 h1:cxFyXhxlvAifxnkKKdlxv8XqUf59tDlYjnV5YYfsJJY=
github.com/jackc/pgx/v5 v5.4.3/go.mod h1:Ig06C2Vu0t5qXC60W8sqIthScaEnFvojjj9dSljmHRA=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mmcdole/gofeed v1.2.1 h1:tPbFN+mfOLcM1kDF1x2c/N68ChbdBatkppdzf/vDe1s=
github.com/mmcdole/gofeed v1.2.1/go.mod h1:2wVInNpgmC85q16QTTuwbuKxtKkHLCDDtf0dCmnrNr4=
github.com/mmcdole/goxpp v1.1.0 h1:WwslZNF7KNAXTFuzRtn/OKZxFLJAAyOA9w82mDz2ZGI=
github.com/mmcdole/goxpp v1.1.0/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/redis/go-redis/v9 v9.0.5 h1:CuQcn5HIEeK7BgElubPP8CGtE0KakrnbBSTLjathl5o=
github.com/redis/go-redis/v9 v9.0.5/go.mod h1:WqMKv5vnQbRuZstUwxQI195wHy+t4PuXDOjzMvcuQHk=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

72
backend/internal/cache/redis.go vendored Normal file
View file

@ -0,0 +1,72 @@
package cache
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/redis/go-redis/v9"
)
var Client *redis.Client
func Connect(redisURL string) error {
opt, err := redis.ParseURL(redisURL)
if err != nil {
return fmt.Errorf("failed to parse redis URL: %w", err)
}
Client = redis.NewClient(opt)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err = Client.Ping(ctx).Err(); err != nil {
return fmt.Errorf("failed to ping redis: %w", err)
}
return nil
}
func Close() {
if Client != nil {
Client.Close()
}
}
func GetClient() *redis.Client {
return Client
}
func SearchKey(query, lang string, page, perPage int) string {
return fmt.Sprintf("search:%s:%s:%d:%d", query, lang, page, perPage)
}
func NewsKey(newsID int64, lang string) string {
return fmt.Sprintf("news:%d:%s", newsID, lang)
}
func FeedKey(feedID int64) string {
return fmt.Sprintf("feed:%d", feedID)
}
func Set(ctx context.Context, key string, value interface{}, expiration time.Duration) error {
data, err := json.Marshal(value)
if err != nil {
return err
}
return Client.Set(ctx, key, data, expiration).Err()
}
func Get(ctx context.Context, key string) (string, error) {
return Client.Get(ctx, key).Result()
}
func Unmarshal(data []byte, v interface{}) error {
return json.Unmarshal(data, v)
}
func Marshal(v interface{}) ([]byte, error) {
return json.Marshal(v)
}

View file

@ -0,0 +1,66 @@
package config
import (
"os"
"strconv"
"time"
)
type Config struct {
ServerPort string
DatabaseURL string
RedisURL string
QdrantHost string
QdrantPort int
SecretKey string
JWTExpiration time.Duration
TranslationURL string
OllamaURL string
SpacyURL string
DefaultLang string
NewsPerPage int
RateLimitPerMinute int
}
func Load() *Config {
return &Config{
ServerPort: getEnv("SERVER_PORT", "8080"),
DatabaseURL: getEnv("DATABASE_URL", "postgres://rss:rss@localhost:5432/rss"),
RedisURL: getEnv("REDIS_URL", "redis://localhost:6379"),
QdrantHost: getEnv("QDRANT_HOST", "localhost"),
QdrantPort: getEnvInt("QDRANT_PORT", 6333),
SecretKey: getEnv("SECRET_KEY", "change-this-secret-key"),
JWTExpiration: getEnvDuration("JWT_EXPIRATION", 24*time.Hour),
TranslationURL: getEnv("TRANSLATION_URL", "http://libretranslate:7790"),
OllamaURL: getEnv("OLLAMA_URL", "http://ollama:11434"),
SpacyURL: getEnv("SPACY_URL", "http://spacy:8000"),
DefaultLang: getEnv("DEFAULT_LANG", "es"),
NewsPerPage: getEnvInt("NEWS_PER_PAGE", 30),
RateLimitPerMinute: getEnvInt("RATE_LIMIT_PER_MINUTE", 60),
}
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}
func getEnvDuration(key string, defaultValue time.Duration) time.Duration {
if value := os.Getenv(key); value != "" {
if duration, err := time.ParseDuration(value); err == nil {
return duration
}
}
return defaultValue
}

View file

@ -0,0 +1,44 @@
package db
import (
"context"
"fmt"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
var Pool *pgxpool.Pool
func Connect(databaseURL string) error {
config, err := pgxpool.ParseConfig(databaseURL)
if err != nil {
return fmt.Errorf("failed to parse database URL: %w", err)
}
config.MaxConns = 25
config.MinConns = 5
config.MaxConnLifetime = time.Hour
config.MaxConnIdleTime = 30 * time.Minute
Pool, err = pgxpool.NewWithConfig(context.Background(), config)
if err != nil {
return fmt.Errorf("failed to create pool: %w", err)
}
if err = Pool.Ping(context.Background()); err != nil {
return fmt.Errorf("failed to ping database: %w", err)
}
return nil
}
func Close() {
if Pool != nil {
Pool.Close()
}
}
func GetPool() *pgxpool.Pool {
return Pool
}

View file

@ -0,0 +1,760 @@
package handlers
import (
"archive/zip"
"bytes"
"context"
"encoding/csv"
"fmt"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
func CreateAlias(c *gin.Context) {
var req models.EntityAliasRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
ctx := c.Request.Context()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
return
}
defer tx.Rollback(ctx)
// 1. Ensure the canonical tag exists in tags table
var canonicalTagId int
err = tx.QueryRow(ctx, `
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
RETURNING id`, req.CanonicalName, req.Tipo).Scan(&canonicalTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure canonical tag", "message": err.Error()})
return
}
for _, alias := range req.Aliases {
alias = strings.TrimSpace(alias)
if alias == "" {
continue
}
// Insert the alias mapping into entity_aliases
_, err = tx.Exec(ctx, `
INSERT INTO entity_aliases (canonical_name, alias, tipo)
VALUES ($1, $2, $3)
ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = EXCLUDED.canonical_name`,
req.CanonicalName, alias, req.Tipo)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to insert alias", "message": err.Error()})
return
}
// 2. Check if the original alias string actually exists as a tag
var aliasTagId int
err = tx.QueryRow(ctx, "SELECT id FROM tags WHERE valor = $1 AND tipo = $2", alias, req.Tipo).Scan(&aliasTagId)
if err == nil && aliasTagId != 0 && aliasTagId != canonicalTagId {
// 3. Move all mentions in tags_noticia to the canonical tag id safely
_, err = tx.Exec(ctx, `
UPDATE tags_noticia
SET tag_id = $1
WHERE tag_id = $2 AND NOT EXISTS (
SELECT 1 FROM tags_noticia tn2
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
)
`, canonicalTagId, aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions safely", "message": err.Error()})
return
}
// Delete any remaining orphaned mentions of the alias that couldn't be merged (duplicates)
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
return
}
// 4. Delete the original alias tag
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", aliasTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
return
}
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusCreated, gin.H{
"message": "Aliases created and metrics merged successfully",
"canonical_name": req.CanonicalName,
"aliases_added": req.Aliases,
"tipo": req.Tipo,
})
}
func ExportAliases(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(),
"SELECT alias, canonical_name, tipo FROM entity_aliases ORDER BY tipo, canonical_name")
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get aliases", "message": err.Error()})
return
}
defer rows.Close()
c.Header("Content-Type", "text/csv")
c.Header("Content-Disposition", "attachment; filename=aliases.csv")
c.Header("Cache-Control", "no-cache")
writer := csv.NewWriter(c.Writer)
writer.Write([]string{"alias", "canonical_name", "tipo"})
for rows.Next() {
var alias, canonical, tipo string
rows.Scan(&alias, &canonical, &tipo)
writer.Write([]string{alias, canonical, tipo})
}
writer.Flush()
}
func ImportAliases(c *gin.Context) {
file, err := c.FormFile("file")
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "No file uploaded"})
return
}
src, err := file.Open()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to open file"})
return
}
defer src.Close()
reader := csv.NewReader(src)
records, err := reader.ReadAll()
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Failed to parse CSV", "message": err.Error()})
return
}
if len(records) < 2 {
c.JSON(http.StatusBadRequest, gin.H{"error": "CSV file is empty or has no data rows"})
return
}
ctx := context.Background()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
return
}
defer tx.Rollback(ctx)
inserted := 0
skipped := 0
for i, record := range records[1:] {
if len(record) < 3 {
skipped++
continue
}
alias := strings.TrimSpace(record[0])
canonical := strings.TrimSpace(record[1])
tipo := strings.TrimSpace(record[2])
if alias == "" || canonical == "" {
skipped++
continue
}
_, err = tx.Exec(ctx,
"INSERT INTO entity_aliases (alias, canonical_name, tipo) VALUES ($1, $2, $3) ON CONFLICT (alias, tipo) DO UPDATE SET canonical_name = $2",
alias, canonical, tipo)
if err != nil {
fmt.Printf("Error inserting row %d: %v\n", i+1, err)
skipped++
continue
}
inserted++
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Import completed",
"inserted": inserted,
"skipped": skipped,
})
}
func GetAdminStats(c *gin.Context) {
var totalUsers, totalAliases int
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&totalUsers)
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM entity_aliases").Scan(&totalAliases)
c.JSON(http.StatusOK, gin.H{
"total_users": totalUsers,
"total_aliases": totalAliases,
})
}
func GetUsers(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users ORDER BY created_at DESC`)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to get users", "message": err.Error()})
return
}
defer rows.Close()
type UserRow struct {
ID int64 `json:"id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
var users []UserRow
for rows.Next() {
var u UserRow
if err := rows.Scan(&u.ID, &u.Email, &u.Username, &u.IsAdmin, &u.CreatedAt, &u.UpdatedAt); err != nil {
continue
}
users = append(users, u)
}
if users == nil {
users = []UserRow{}
}
c.JSON(http.StatusOK, gin.H{"users": users, "total": len(users)})
}
func PromoteUser(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = true WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to promote user", "message": err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
return
}
c.JSON(http.StatusOK, gin.H{"message": "User promoted to admin"})
}
func DemoteUser(c *gin.Context) {
id, err := strconv.Atoi(c.Param("id"))
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid user ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "UPDATE users SET is_admin = false WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to demote user", "message": err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "User not found"})
return
}
c.JSON(http.StatusOK, gin.H{"message": "User demoted from admin"})
}
func ResetDatabase(c *gin.Context) {
ctx := c.Request.Context()
tables := []string{
"noticias",
"feeds",
"traducciones",
"tags_noticia",
"tags",
"entity_aliases",
"favoritos",
"videos",
"video_parrillas",
"eventos",
"search_history",
}
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction"})
return
}
defer tx.Rollback(ctx)
for _, table := range tables {
_, err = tx.Exec(ctx, "DELETE FROM "+table)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete from " + table, "message": err.Error()})
return
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Database reset successfully. All data has been deleted.",
"tables_cleared": tables,
})
}
type WorkerConfig struct {
Type string `json:"type"`
Workers int `json:"workers"`
Status string `json:"status"`
}
func GetWorkerStatus(c *gin.Context) {
var translatorType, translatorWorkers, translatorStatus string
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
if err != nil {
translatorType = "cpu"
}
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
if err != nil {
translatorWorkers = "2"
}
err = db.GetPool().QueryRow(c.Request.Context(), "SELECT value FROM config WHERE key = 'translator_status'").Scan(&translatorStatus)
if err != nil {
translatorStatus = "stopped"
}
workers, _ := strconv.Atoi(translatorWorkers)
// Verificar si los contenedores están corriendo
runningCount := 0
if translatorStatus == "running" {
cmd := exec.Command("docker", "compose", "ps", "-q", "translator")
output, _ := cmd.Output()
if len(output) > 0 {
runningCount = workers
}
}
c.JSON(http.StatusOK, gin.H{
"type": translatorType,
"workers": workers,
"status": translatorStatus,
"running": runningCount,
})
}
func SetWorkerConfig(c *gin.Context) {
var req WorkerConfig
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
if req.Type != "cpu" && req.Type != "gpu" {
c.JSON(http.StatusBadRequest, gin.H{"error": "Type must be 'cpu' or 'gpu'"})
return
}
if req.Workers < 1 || req.Workers > 8 {
c.JSON(http.StatusBadRequest, gin.H{"error": "Workers must be between 1 and 8"})
return
}
ctx := c.Request.Context()
_, err := db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", req.Type)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_type"})
return
}
_, err = db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", strconv.Itoa(req.Workers))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to update translator_workers"})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Worker configuration updated",
"type": req.Type,
"workers": req.Workers,
"status": req.Status,
})
}
func StartWorkers(c *gin.Context) {
var req WorkerConfig
c.ShouldBindJSON(&req)
ctx := c.Request.Context()
// Obtener configuración actual
var translatorType, translatorWorkers string
err := db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_type'").Scan(&translatorType)
if err != nil || translatorType == "" {
translatorType = "cpu"
}
err = db.GetPool().QueryRow(ctx, "SELECT value FROM config WHERE key = 'translator_workers'").Scan(&translatorWorkers)
if err != nil || translatorWorkers == "" {
translatorWorkers = "2"
}
if req.Type != "" {
translatorType = req.Type
}
if req.Workers > 0 {
translatorWorkers = strconv.Itoa(req.Workers)
}
workers, _ := strconv.Atoi(translatorWorkers)
if workers < 1 {
workers = 2
}
if workers > 8 {
workers = 8
}
// Determinar qué servicio iniciar
serviceName := "translator"
if translatorType == "gpu" {
serviceName = "translator-gpu"
}
// Detener cualquier translator existente
stopCmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
stopCmd.Dir = "/datos/rss2"
stopCmd.Run()
// Iniciar con el número de workers
startCmd := exec.Command("docker", "compose", "up", "-d", "--scale", fmt.Sprintf("%s=%d", serviceName, workers), serviceName)
startCmd.Dir = "/datos/rss2"
output, err := startCmd.CombinedOutput()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Failed to start workers",
"details": string(output),
})
return
}
// Actualizar estado en BD
db.GetPool().Exec(ctx, "UPDATE config SET value = 'running', updated_at = NOW() WHERE key = 'translator_status'")
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_type'", translatorType)
db.GetPool().Exec(ctx, "UPDATE config SET value = $1, updated_at = NOW() WHERE key = 'translator_workers'", translatorWorkers)
c.JSON(http.StatusOK, gin.H{
"message": "Workers started successfully",
"type": translatorType,
"workers": workers,
"status": "running",
})
}
func StopWorkers(c *gin.Context) {
// Detener traductores
cmd := exec.Command("docker", "compose", "stop", "translator", "translator-gpu")
cmd.Dir = "/datos/rss2"
output, err := cmd.CombinedOutput()
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Failed to stop workers",
"details": string(output),
})
return
}
// Actualizar estado en BD
db.GetPool().Exec(c.Request.Context(), "UPDATE config SET value = 'stopped', updated_at = NOW() WHERE key = 'translator_status'")
c.JSON(http.StatusOK, gin.H{
"message": "Workers stopped successfully",
"status": "stopped",
})
}
// PatchEntityTipo changes the tipo of all tags matching a given valor
func PatchEntityTipo(c *gin.Context) {
var req struct {
Valor string `json:"valor" binding:"required"`
NewTipo string `json:"new_tipo" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request", "message": err.Error()})
return
}
validTipos := map[string]bool{"persona": true, "organizacion": true, "lugar": true, "tema": true}
if !validTipos[req.NewTipo] {
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid tipo. Must be persona, organizacion, lugar or tema"})
return
}
ctx := c.Request.Context()
tx, err := db.GetPool().Begin(ctx)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to start transaction", "message": err.Error()})
return
}
defer tx.Rollback(ctx)
// Since we don't know the exact old Tipo, we find all tags with this valor that ARE NOT already the new tipo
rows, err := tx.Query(ctx, "SELECT id, tipo FROM tags WHERE valor = $1 AND tipo != $2", req.Valor, req.NewTipo)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to fetch existing tags", "message": err.Error()})
return
}
type OldTag struct {
ID int
Tipo string
}
var tagsToMove []OldTag
for rows.Next() {
var ot OldTag
if err := rows.Scan(&ot.ID, &ot.Tipo); err == nil {
tagsToMove = append(tagsToMove, ot)
}
}
rows.Close()
if len(tagsToMove) == 0 {
c.JSON(http.StatusOK, gin.H{"message": "No entities found to update or already the requested tipo"})
return
}
// Make sure the target tag (valor, new_tipo) exists
var targetTagId int
err = tx.QueryRow(ctx, `
INSERT INTO tags (valor, tipo) VALUES ($1, $2)
ON CONFLICT (valor, tipo) DO UPDATE SET valor = EXCLUDED.valor
RETURNING id`, req.Valor, req.NewTipo).Scan(&targetTagId)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to ensure target tag", "message": err.Error()})
return
}
totalMoved := 0
for _, old := range tagsToMove {
if old.ID == targetTagId {
continue
}
// Move valid tags_noticia references to the target tag id safely
res, err := tx.Exec(ctx, `
UPDATE tags_noticia
SET tag_id = $1
WHERE tag_id = $2 AND NOT EXISTS (
SELECT 1 FROM tags_noticia tn2
WHERE tn2.tag_id = $1 AND tn2.noticia_id = tags_noticia.noticia_id AND tn2.traduccion_id = tags_noticia.traduccion_id
)
`, targetTagId, old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to reassign news mentions", "message": err.Error()})
return
}
totalMoved += int(res.RowsAffected())
// Delete any remaining orphaned mentions (duplicates)
_, err = tx.Exec(ctx, "DELETE FROM tags_noticia WHERE tag_id = $1", old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete orphaned mentions", "message": err.Error()})
return
}
// Delete the old tag since it's now merged
_, err = tx.Exec(ctx, "DELETE FROM tags WHERE id = $1", old.ID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to delete old tag", "message": err.Error()})
return
}
}
if err := tx.Commit(ctx); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to commit transaction", "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"message": "Entity tipo updated and merged successfully",
"valor": req.Valor,
"new_tipo": req.NewTipo,
"tags_merged": len(tagsToMove),
"rows_affected": totalMoved,
})
}
// BackupDatabase runs pg_dump and returns the SQL as a downloadable file
func BackupDatabase(c *gin.Context) {
dbHost := os.Getenv("DB_HOST")
if dbHost == "" {
dbHost = "db"
}
dbPort := os.Getenv("DB_PORT")
if dbPort == "" {
dbPort = "5432"
}
dbName := os.Getenv("DB_NAME")
if dbName == "" {
dbName = "rss"
}
dbUser := os.Getenv("DB_USER")
if dbUser == "" {
dbUser = "rss"
}
dbPass := os.Getenv("DB_PASS")
cmd := exec.Command("pg_dump",
"-h", dbHost,
"-p", dbPort,
"-U", dbUser,
"-d", dbName,
"--no-password",
)
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
var out bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "pg_dump failed",
"details": stderr.String(),
})
return
}
filename := fmt.Sprintf("backup_%s.sql", time.Now().Format("2006-01-02_15-04-05"))
c.Header("Content-Type", "application/octet-stream")
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
c.Header("Cache-Control", "no-cache")
c.Data(http.StatusOK, "application/octet-stream", out.Bytes())
}
// BackupNewsZipped performs a pg_dump of news tables and returns a ZIP file
func BackupNewsZipped(c *gin.Context) {
dbHost := os.Getenv("DB_HOST")
if dbHost == "" {
dbHost = "db"
}
dbPort := os.Getenv("DB_PORT")
if dbPort == "" {
dbPort = "5432"
}
dbName := os.Getenv("DB_NAME")
if dbName == "" {
dbName = "rss"
}
dbUser := os.Getenv("DB_USER")
if dbUser == "" {
dbUser = "rss"
}
dbPass := os.Getenv("DB_PASS")
// Tables to backup
tables := []string{"noticias", "traducciones", "tags", "tags_noticia"}
args := []string{
"-h", dbHost,
"-p", dbPort,
"-U", dbUser,
"-d", dbName,
"--no-password",
}
for _, table := range tables {
args = append(args, "-t", table)
}
cmd := exec.Command("pg_dump", args...)
cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSWORD=%s", dbPass))
var sqlOut bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &sqlOut
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "pg_dump failed",
"details": stderr.String(),
})
return
}
// Create ZIP
buf := new(bytes.Buffer)
zw := zip.NewWriter(buf)
sqlFileName := fmt.Sprintf("backup_noticias_%s.sql", time.Now().Format("2006-01-02"))
f, err := zw.Create(sqlFileName)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create ZIP entry", "message": err.Error()})
return
}
_, err = f.Write(sqlOut.Bytes())
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to write to ZIP", "message": err.Error()})
return
}
if err := zw.Close(); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to close ZIP writer", "message": err.Error()})
return
}
filename := fmt.Sprintf("backup_noticias_%s.zip", time.Now().Format("2006-01-02_15-04-05"))
c.Header("Content-Type", "application/zip")
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", filename))
c.Header("Cache-Control", "no-cache")
c.Data(http.StatusOK, "application/zip", buf.Bytes())
}

View file

@ -0,0 +1,183 @@
package handlers
import (
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt/v5"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
"golang.org/x/crypto/bcrypt"
)
var jwtSecret []byte
func CheckFirstUser(c *gin.Context) {
var count int
err := db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&count)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to check users"})
return
}
c.JSON(http.StatusOK, gin.H{"is_first_user": count == 0, "total_users": count})
}
func InitAuth(secret string) {
jwtSecret = []byte(secret)
}
type Claims struct {
UserID int64 `json:"user_id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
jwt.RegisteredClaims
}
func Login(c *gin.Context) {
var req models.LoginRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
var user models.User
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, password_hash, is_admin, created_at, updated_at
FROM users WHERE email = $1`, req.Email).Scan(
&user.ID, &user.Email, &user.Username, &user.PasswordHash, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
return
}
if err := bcrypt.CompareHashAndPassword([]byte(user.PasswordHash), []byte(req.Password)); err != nil {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Invalid credentials"})
return
}
expirationTime := time.Now().Add(24 * time.Hour)
claims := &Claims{
UserID: user.ID,
Email: user.Email,
Username: user.Username,
IsAdmin: user.IsAdmin,
RegisteredClaims: jwt.RegisteredClaims{
ExpiresAt: jwt.NewNumericDate(expirationTime),
IssuedAt: jwt.NewNumericDate(time.Now()),
},
}
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
tokenString, err := token.SignedString(jwtSecret)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
return
}
c.JSON(http.StatusOK, models.AuthResponse{
Token: tokenString,
User: user,
})
}
func Register(c *gin.Context) {
var req models.RegisterRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
hashedPassword, err := bcrypt.GenerateFromPassword([]byte(req.Password), bcrypt.DefaultCost)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to hash password"})
return
}
var userCount int
db.GetPool().QueryRow(c.Request.Context(), "SELECT COUNT(*) FROM users").Scan(&userCount)
isFirstUser := userCount == 0
var userID int64
err = db.GetPool().QueryRow(c.Request.Context(), `
INSERT INTO users (email, username, password_hash, is_admin, created_at, updated_at)
VALUES ($1, $2, $3, $4, NOW(), NOW())
RETURNING id`,
req.Email, req.Username, string(hashedPassword), isFirstUser,
).Scan(&userID)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create user", Message: err.Error()})
return
}
var user models.User
err = db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users WHERE id = $1`, userID).Scan(
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch user"})
return
}
expirationTime := time.Now().Add(24 * time.Hour)
claims := &Claims{
UserID: user.ID,
Email: user.Email,
Username: user.Username,
IsAdmin: user.IsAdmin,
RegisteredClaims: jwt.RegisteredClaims{
ExpiresAt: jwt.NewNumericDate(expirationTime),
IssuedAt: jwt.NewNumericDate(time.Now()),
},
}
token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
tokenString, err := token.SignedString(jwtSecret)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to generate token"})
return
}
c.JSON(http.StatusCreated, models.AuthResponse{
Token: tokenString,
User: user,
IsFirstUser: isFirstUser,
})
}
func GetCurrentUser(c *gin.Context) {
userVal, exists := c.Get("user")
if !exists {
c.JSON(http.StatusUnauthorized, models.ErrorResponse{Error: "Not authenticated"})
return
}
claims := userVal.(*Claims)
var user models.User
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, email, username, is_admin, created_at, updated_at
FROM users WHERE id = $1`, claims.UserID).Scan(
&user.ID, &user.Email, &user.Username, &user.IsAdmin,
&user.CreatedAt, &user.UpdatedAt,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "User not found"})
return
}
c.JSON(http.StatusOK, user)
}
func init() {
cfg := config.Load()
InitAuth(cfg.SecretKey)
}

View file

@ -0,0 +1,540 @@
package handlers
import (
"context"
"encoding/csv"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
type FeedResponse struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
Descripcion *string `json:"descripcion"`
URL string `json:"url"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
Activo bool `json:"activo"`
Fallos *int64 `json:"fallos"`
LastError *string `json:"last_error"`
FuenteURLID *int64 `json:"fuente_url_id"`
}
func GetFeeds(c *gin.Context) {
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "50"))
activo := c.Query("activo")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 50
}
offset := (page - 1) * perPage
where := "1=1"
args := []interface{}{}
argNum := 1
if activo != "" {
where += fmt.Sprintf(" AND activo = $%d", argNum)
args = append(args, activo == "true")
argNum++
}
if categoriaID != "" {
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
args = append(args, categoriaID)
argNum++
}
if paisID != "" {
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
args = append(args, paisID)
argNum++
}
var total int
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM feeds WHERE %s", where)
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count feeds", Message: err.Error()})
return
}
sqlQuery := fmt.Sprintf(`
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, f.pais_id, f.idioma, f.activo, f.fallos, f.last_error,
c.nombre AS categoria, p.nombre AS pais,
(SELECT COUNT(*) FROM noticias n WHERE n.fuente_nombre = f.nombre) as noticias_count
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
WHERE %s
ORDER BY p.nombre NULLS LAST, f.activo DESC, f.fallos ASC, c.nombre NULLS LAST, f.nombre
LIMIT $%d OFFSET $%d
`, where, argNum, argNum+1)
args = append(args, perPage, offset)
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
return
}
defer rows.Close()
type FeedWithStats struct {
FeedResponse
Categoria *string `json:"categoria"`
Pais *string `json:"pais"`
NoticiasCount int64 `json:"noticias_count"`
}
var feeds []FeedWithStats
for rows.Next() {
var f FeedWithStats
err := rows.Scan(
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos, &f.LastError,
&f.Categoria, &f.Pais, &f.NoticiasCount,
)
if err != nil {
continue
}
feeds = append(feeds, f)
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, gin.H{
"feeds": feeds,
"total": total,
"page": page,
"per_page": perPage,
"total_pages": totalPages,
})
}
func GetFeedByID(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
var f FeedResponse
err = db.GetPool().QueryRow(c.Request.Context(), `
SELECT id, nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos
FROM feeds WHERE id = $1`, id).Scan(
&f.ID, &f.Nombre, &f.Descripcion, &f.URL,
&f.CategoriaID, &f.PaisID, &f.Idioma, &f.Activo, &f.Fallos,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, f)
}
type CreateFeedRequest struct {
Nombre string `json:"nombre" binding:"required"`
URL string `json:"url" binding:"required,url"`
Descripcion *string `json:"descripcion"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
}
func CreateFeed(c *gin.Context) {
var req CreateFeedRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
var feedID int64
err := db.GetPool().QueryRow(c.Request.Context(), `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id`,
req.Nombre, req.Descripcion, req.URL, req.CategoriaID, req.PaisID, req.Idioma,
).Scan(&feedID)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to create feed", Message: err.Error()})
return
}
c.JSON(http.StatusCreated, gin.H{"id": feedID, "message": "Feed created successfully"})
}
type UpdateFeedRequest struct {
Nombre string `json:"nombre" binding:"required"`
URL string `json:"url" binding:"required,url"`
Descripcion *string `json:"descripcion"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
Idioma *string `json:"idioma"`
Activo *bool `json:"activo"`
}
func UpdateFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
var req UpdateFeedRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid request", Message: err.Error()})
return
}
activeVal := true
if req.Activo != nil {
activeVal = *req.Activo
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds
SET nombre = $1, descripcion = $2, url = $3,
categoria_id = $4, pais_id = $5, idioma = $6, activo = $7
WHERE id = $8`,
req.Nombre, req.Descripcion, req.URL,
req.CategoriaID, req.PaisID, req.Idioma, activeVal, id,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to update feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed updated successfully"})
}
func DeleteFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM feeds WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed deleted successfully"})
}
func ToggleFeedActive(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds SET activo = NOT activo WHERE id = $1`, id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to toggle feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed toggled successfully"})
}
func ReactivateFeed(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid feed ID"})
return
}
result, err := db.GetPool().Exec(c.Request.Context(), `
UPDATE feeds SET activo = TRUE, fallos = 0 WHERE id = $1`, id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to reactivate feed", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "Feed not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "Feed reactivated successfully"})
}
func ExportFeeds(c *gin.Context) {
activo := c.Query("activo")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
where := "1=1"
args := []interface{}{}
argNum := 1
if activo != "" {
where += fmt.Sprintf(" AND activo = $%d", argNum)
args = append(args, activo == "true")
argNum++
}
if categoriaID != "" {
where += fmt.Sprintf(" AND categoria_id = $%d", argNum)
args = append(args, categoriaID)
argNum++
}
if paisID != "" {
where += fmt.Sprintf(" AND pais_id = $%d", argNum)
args = append(args, paisID)
argNum++
}
query := fmt.Sprintf(`
SELECT f.id, f.nombre, f.descripcion, f.url,
f.categoria_id, c.nombre AS categoria,
f.pais_id, p.nombre AS pais,
f.idioma, f.activo, f.fallos
FROM feeds f
LEFT JOIN categorias c ON c.id = f.categoria_id
LEFT JOIN paises p ON p.id = f.pais_id
WHERE %s
ORDER BY f.id
`, where)
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch feeds", Message: err.Error()})
return
}
defer rows.Close()
c.Header("Content-Type", "text/csv")
c.Header("Content-Disposition", "attachment; filename=feeds_export.csv")
writer := csv.NewWriter(c.Writer)
defer writer.Flush()
writer.Write([]string{"id", "nombre", "descripcion", "url", "categoria_id", "categoria", "pais_id", "pais", "idioma", "activo", "fallos"})
for rows.Next() {
var id int64
var nombre, url string
var descripcion, idioma *string
var categoriaID, paisID, fallos *int64
var activo bool
var categoria, pais *string
err := rows.Scan(&id, &nombre, &descripcion, &url, &categoriaID, &categoria, &paisID, &pais, &idioma, &activo, &fallos)
if err != nil {
continue
}
writer.Write([]string{
fmt.Sprintf("%d", id),
nombre,
stringOrEmpty(descripcion),
url,
int64ToString(categoriaID),
stringOrEmpty(categoria),
int64ToString(paisID),
stringOrEmpty(pais),
stringOrEmpty(idioma),
fmt.Sprintf("%t", activo),
int64ToString(fallos),
})
}
}
func ImportFeeds(c *gin.Context) {
file, err := c.FormFile("file")
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "No file provided"})
return
}
f, err := file.Open()
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to open file", Message: err.Error()})
return
}
defer f.Close()
content, err := io.ReadAll(f)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to read file", Message: err.Error()})
return
}
reader := csv.NewReader(strings.NewReader(string(content)))
_, err = reader.Read()
if err != nil {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "Invalid CSV format"})
return
}
imported := 0
skipped := 0
failed := 0
errors := []string{}
tx, err := db.GetPool().Begin(context.Background())
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to start transaction", Message: err.Error()})
return
}
defer tx.Rollback(context.Background())
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
failed++
continue
}
if len(record) < 4 {
skipped++
continue
}
nombre := strings.TrimSpace(record[1])
url := strings.TrimSpace(record[3])
if nombre == "" || url == "" {
skipped++
continue
}
var descripcion *string
if len(record) > 2 && strings.TrimSpace(record[2]) != "" {
descripcionStr := strings.TrimSpace(record[2])
descripcion = &descripcionStr
}
var categoriaID *int64
if len(record) > 4 && strings.TrimSpace(record[4]) != "" {
catID, err := strconv.ParseInt(strings.TrimSpace(record[4]), 10, 64)
if err == nil {
categoriaID = &catID
}
}
var paisID *int64
if len(record) > 6 && strings.TrimSpace(record[6]) != "" {
pID, err := strconv.ParseInt(strings.TrimSpace(record[6]), 10, 64)
if err == nil {
paisID = &pID
}
}
var idioma *string
if len(record) > 8 && strings.TrimSpace(record[8]) != "" {
lang := strings.TrimSpace(record[8])
if len(lang) > 2 {
lang = lang[:2]
}
idioma = &lang
}
activo := true
if len(record) > 9 && strings.TrimSpace(record[9]) != "" {
activo = strings.ToLower(strings.TrimSpace(record[9])) == "true"
}
var fallos int64
if len(record) > 10 && strings.TrimSpace(record[10]) != "" {
f, err := strconv.ParseInt(strings.TrimSpace(record[10]), 10, 64)
if err == nil {
fallos = f
}
}
var existingID int64
err = tx.QueryRow(context.Background(), "SELECT id FROM feeds WHERE url = $1", url).Scan(&existingID)
if err == nil {
_, err = tx.Exec(context.Background(), `
UPDATE feeds SET nombre=$1, descripcion=$2, categoria_id=$3, pais_id=$4, idioma=$5, activo=$6, fallos=$7
WHERE id=$8`,
nombre, descripcion, categoriaID, paisID, idioma, activo, fallos, existingID,
)
if err != nil {
failed++
errors = append(errors, fmt.Sprintf("Error updating %s: %v", url, err))
continue
}
} else {
_, err = tx.Exec(context.Background(), `
INSERT INTO feeds (nombre, descripcion, url, categoria_id, pais_id, idioma, activo, fallos)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
nombre, descripcion, url, categoriaID, paisID, idioma, activo, fallos,
)
if err != nil {
failed++
errors = append(errors, fmt.Sprintf("Error inserting %s: %v", url, err))
continue
}
}
imported++
}
if err := tx.Commit(context.Background()); err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to commit transaction", Message: err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"imported": imported,
"skipped": skipped,
"failed": failed,
"errors": errors,
"message": fmt.Sprintf("Import completed. Imported: %d, Skipped: %d, Failed: %d", imported, skipped, failed),
})
}
func stringOrEmpty(s *string) string {
if s == nil {
return ""
}
return *s
}
func int64ToString(i *int64) string {
if i == nil {
return ""
}
return fmt.Sprintf("%d", *i)
}

View file

@ -0,0 +1,369 @@
package handlers
import (
"fmt"
"net/http"
"strconv"
"time"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
)
type NewsResponse struct {
ID string `json:"id"`
Titulo string `json:"titulo"`
Resumen string `json:"resumen"`
URL string `json:"url"`
Fecha *time.Time `json:"fecha"`
ImagenURL *string `json:"imagen_url"`
CategoriaID *int64 `json:"categoria_id"`
PaisID *int64 `json:"pais_id"`
FuenteNombre string `json:"fuente_nombre"`
TitleTranslated *string `json:"title_translated"`
SummaryTranslated *string `json:"summary_translated"`
LangTranslated *string `json:"lang_translated"`
Entities []Entity `json:"entities,omitempty"`
}
func GetNews(c *gin.Context) {
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
query := c.Query("q")
categoryID := c.Query("category_id")
countryID := c.Query("country_id")
translatedOnly := c.Query("translated_only") == "true"
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 30
}
offset := (page - 1) * perPage
where := "1=1"
args := []interface{}{}
argNum := 1
if query != "" {
where += fmt.Sprintf(" AND (n.titulo ILIKE $%d OR n.resumen ILIKE $%d)", argNum, argNum)
args = append(args, "%"+query+"%")
argNum++
}
if categoryID != "" {
where += fmt.Sprintf(" AND n.categoria_id = $%d", argNum)
args = append(args, categoryID)
argNum++
}
if countryID != "" {
where += fmt.Sprintf(" AND n.pais_id = $%d", argNum)
args = append(args, countryID)
argNum++
}
if translatedOnly {
where += " AND t.status = 'done' AND t.titulo_trad IS NOT NULL AND t.titulo_trad != n.titulo"
}
var total int
countQuery := fmt.Sprintf("SELECT COUNT(*) FROM noticias n LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es' WHERE %s", where)
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to count news", Message: err.Error()})
return
}
if total == 0 {
c.JSON(http.StatusOK, models.NewsListResponse{
News: []models.NewsWithTranslations{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
})
return
}
sqlQuery := fmt.Sprintf(`
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
n.categoria_id, n.pais_id, n.fuente_nombre,
t.titulo_trad,
t.resumen_trad,
t.lang_to as lang_trad
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
WHERE %s
ORDER BY n.fecha DESC LIMIT $%d OFFSET $%d
`, where, argNum, argNum+1)
args = append(args, perPage, offset)
rows, err := db.GetPool().Query(c.Request.Context(), sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to fetch news", Message: err.Error()})
return
}
defer rows.Close()
var newsList []NewsResponse
for rows.Next() {
var n NewsResponse
var imagenURL, fuenteNombre *string
var categoriaID, paisID *int32
err := rows.Scan(
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
&categoriaID, &paisID, &fuenteNombre,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
continue
}
if imagenURL != nil {
n.ImagenURL = imagenURL
}
if fuenteNombre != nil {
n.FuenteNombre = *fuenteNombre
}
if categoriaID != nil {
catID := int64(*categoriaID)
n.CategoriaID = &catID
}
if paisID != nil {
pID := int64(*paisID)
n.PaisID = &pID
}
newsList = append(newsList, n)
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, gin.H{
"news": newsList,
"total": total,
"page": page,
"per_page": perPage,
"total_pages": totalPages,
})
}
func GetNewsByID(c *gin.Context) {
id := c.Param("id")
sqlQuery := `
SELECT n.id, n.titulo, COALESCE(n.resumen, ''), n.url, n.fecha, n.imagen_url,
n.categoria_id, n.pais_id, n.fuente_nombre,
t.titulo_trad,
t.resumen_trad,
t.lang_to as lang_trad
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = 'es'
WHERE n.id = $1`
var n NewsResponse
var imagenURL, fuenteNombre *string
var categoriaID, paisID *int32
err := db.GetPool().QueryRow(c.Request.Context(), sqlQuery, id).Scan(
&n.ID, &n.Titulo, &n.Resumen, &n.URL, &n.Fecha, &imagenURL,
&categoriaID, &paisID, &fuenteNombre,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
return
}
if imagenURL != nil {
n.ImagenURL = imagenURL
}
if fuenteNombre != nil {
n.FuenteNombre = *fuenteNombre
}
if categoriaID != nil {
catID := int64(*categoriaID)
n.CategoriaID = &catID
}
if paisID != nil {
pID := int64(*paisID)
n.PaisID = &pID
}
// Fetch entities for this news
entitiesQuery := `
SELECT t.valor, t.tipo, 1 as cnt, t.wiki_summary, t.wiki_url, t.image_path
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
WHERE tr.noticia_id = $1 AND t.tipo IN ('persona', 'organizacion')
`
rows, err := db.GetPool().Query(c.Request.Context(), entitiesQuery, id)
var entities []Entity
if err == nil {
defer rows.Close()
for rows.Next() {
var e Entity
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err == nil {
entities = append(entities, e)
}
}
}
if entities == nil {
entities = []Entity{}
}
n.Entities = entities
c.JSON(http.StatusOK, n)
}
func DeleteNews(c *gin.Context) {
id := c.Param("id")
result, err := db.GetPool().Exec(c.Request.Context(), "DELETE FROM noticias WHERE id = $1", id)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to delete news", Message: err.Error()})
return
}
if result.RowsAffected() == 0 {
c.JSON(http.StatusNotFound, models.ErrorResponse{Error: "News not found"})
return
}
c.JSON(http.StatusOK, models.SuccessResponse{Message: "News deleted successfully"})
}
type Entity struct {
Valor string `json:"valor"`
Tipo string `json:"tipo"`
Count int `json:"count"`
WikiSummary *string `json:"wiki_summary"`
WikiURL *string `json:"wiki_url"`
ImagePath *string `json:"image_path"`
}
type EntityListResponse struct {
Entities []Entity `json:"entities"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
func GetEntities(c *gin.Context) {
countryID := c.Query("country_id")
categoryID := c.Query("category_id")
entityType := c.DefaultQuery("tipo", "persona")
q := c.Query("q")
pageStr := c.DefaultQuery("page", "1")
perPageStr := c.DefaultQuery("per_page", "50")
page, _ := strconv.Atoi(pageStr)
perPage, _ := strconv.Atoi(perPageStr)
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 50
}
offset := (page - 1) * perPage
where := "t.tipo = $1"
args := []interface{}{entityType}
if countryID != "" {
where += fmt.Sprintf(" AND n.pais_id = $%d", len(args)+1)
args = append(args, countryID)
}
if categoryID != "" {
where += fmt.Sprintf(" AND n.categoria_id = $%d", len(args)+1)
args = append(args, categoryID)
}
if q != "" {
where += fmt.Sprintf(" AND COALESCE(ea.canonical_name, t.valor) ILIKE $%d", len(args)+1)
args = append(args, "%"+q+"%")
}
// 1. Get the total count of distinct canonical entities matching the filter
countQuery := fmt.Sprintf(`
SELECT COUNT(DISTINCT COALESCE(ea.canonical_name, t.valor))
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
WHERE %s
`, where)
var total int
err := db.GetPool().QueryRow(c.Request.Context(), countQuery, args...).Scan(&total)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities count", Message: err.Error()})
return
}
if total == 0 {
c.JSON(http.StatusOK, EntityListResponse{
Entities: []Entity{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
})
return
}
// 2. Fetch the paginated entities
args = append(args, perPage, offset)
query := fmt.Sprintf(`
SELECT COALESCE(ea.canonical_name, t.valor) as valor, t.tipo, COUNT(*)::int as cnt,
MAX(t.wiki_summary), MAX(t.wiki_url), MAX(t.image_path)
FROM tags_noticia tn
JOIN tags t ON tn.tag_id = t.id
JOIN traducciones tr ON tn.traduccion_id = tr.id
JOIN noticias n ON tr.noticia_id = n.id
LEFT JOIN entity_aliases ea ON LOWER(ea.alias) = LOWER(t.valor) AND ea.tipo = t.tipo
WHERE %s
GROUP BY COALESCE(ea.canonical_name, t.valor), t.tipo
ORDER BY cnt DESC
LIMIT $%d OFFSET $%d
`, where, len(args)-1, len(args))
rows, err := db.GetPool().Query(c.Request.Context(), query, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get entities", Message: err.Error()})
return
}
defer rows.Close()
var entities []Entity
for rows.Next() {
var e Entity
if err := rows.Scan(&e.Valor, &e.Tipo, &e.Count, &e.WikiSummary, &e.WikiURL, &e.ImagePath); err != nil {
continue
}
entities = append(entities, e)
}
if entities == nil {
entities = []Entity{}
}
totalPages := (total + perPage - 1) / perPage
c.JSON(http.StatusOK, EntityListResponse{
Entities: entities,
Total: total,
Page: page,
PerPage: perPage,
TotalPages: totalPages,
})
}

View file

@ -0,0 +1,265 @@
package handlers
import (
"net/http"
"strconv"
"github.com/gin-gonic/gin"
"github.com/rss2/backend/internal/db"
"github.com/rss2/backend/internal/models"
"github.com/rss2/backend/internal/services"
)
func SearchNews(c *gin.Context) {
query := c.Query("q")
page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
perPage, _ := strconv.Atoi(c.DefaultQuery("per_page", "30"))
lang := c.DefaultQuery("lang", "")
categoriaID := c.Query("categoria_id")
paisID := c.Query("pais_id")
useSemantic := c.Query("semantic") == "true"
if query == "" && categoriaID == "" && paisID == "" && lang == "" {
c.JSON(http.StatusBadRequest, models.ErrorResponse{Error: "At least one filter is required (q, categoria_id, pais_id, or lang)"})
return
}
if page < 1 {
page = 1
}
if perPage < 1 || perPage > 100 {
perPage = 30
}
// Default to Spanish if no lang specified
if lang == "" {
lang = "es"
}
ctx := c.Request.Context()
if useSemantic {
results, err := services.SemanticSearch(ctx, query, lang, page, perPage)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Semantic search failed", Message: err.Error()})
return
}
c.JSON(http.StatusOK, results)
return
}
offset := (page - 1) * perPage
// Build dynamic query
args := []interface{}{}
argNum := 1
whereClause := "WHERE 1=1"
if query != "" {
whereClause += " AND (n.titulo ILIKE $" + strconv.Itoa(argNum) + " OR n.resumen ILIKE $" + strconv.Itoa(argNum) + " OR n.contenido ILIKE $" + strconv.Itoa(argNum) + ")"
args = append(args, "%"+query+"%")
argNum++
}
if lang != "" {
whereClause += " AND t.lang_to = $" + strconv.Itoa(argNum)
args = append(args, lang)
argNum++
}
if categoriaID != "" {
whereClause += " AND n.categoria_id = $" + strconv.Itoa(argNum)
catID, err := strconv.ParseInt(categoriaID, 10, 64)
if err == nil {
args = append(args, catID)
argNum++
}
}
if paisID != "" {
whereClause += " AND n.pais_id = $" + strconv.Itoa(argNum)
pID, err := strconv.ParseInt(paisID, 10, 64)
if err == nil {
args = append(args, pID)
argNum++
}
}
args = append(args, perPage, offset)
sqlQuery := `
SELECT n.id, n.titulo, n.resumen, n.contenido, n.url, n.imagen,
n.feed_id, n.lang, n.categoria_id, n.pais_id, n.created_at, n.updated_at,
COALESCE(t.titulo_trad, '') as titulo_trad,
COALESCE(t.resumen_trad, '') as resumen_trad,
t.lang_to as lang_trad,
f.nombre as fuente_nombre
FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $` + strconv.Itoa(argNum) + `
LEFT JOIN feeds f ON f.id = n.feed_id
` + whereClause + `
ORDER BY n.created_at DESC
LIMIT $` + strconv.Itoa(argNum+1) + ` OFFSET $` + strconv.Itoa(argNum+2)
rows, err := db.GetPool().Query(ctx, sqlQuery, args...)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Search failed", Message: err.Error()})
return
}
defer rows.Close()
var newsList []models.NewsWithTranslations
for rows.Next() {
var n models.NewsWithTranslations
var imagen *string
err := rows.Scan(
&n.ID, &n.Title, &n.Summary, &n.Content, &n.URL, &imagen,
&n.FeedID, &n.Lang, &n.CategoryID, &n.CountryID, &n.CreatedAt, &n.UpdatedAt,
&n.TitleTranslated, &n.SummaryTranslated, &n.LangTranslated,
)
if err != nil {
continue
}
if imagen != nil {
n.ImageURL = imagen
}
newsList = append(newsList, n)
}
// Get total count
countArgs := args[:len(args)-2]
// Remove LIMIT/OFFSET from args for count
var total int
err = db.GetPool().QueryRow(ctx, `
SELECT COUNT(*) FROM noticias n
LEFT JOIN traducciones t ON t.noticia_id = n.id AND t.lang_to = $`+strconv.Itoa(argNum)+`
`+whereClause, countArgs...).Scan(&total)
if err != nil {
total = len(newsList)
}
totalPages := (total + perPage - 1) / perPage
response := models.NewsListResponse{
News: newsList,
Total: total,
Page: page,
PerPage: perPage,
TotalPages: totalPages,
}
c.JSON(http.StatusOK, response)
}
func GetStats(c *gin.Context) {
var stats models.Stats
err := db.GetPool().QueryRow(c.Request.Context(), `
SELECT
(SELECT COUNT(*) FROM noticias) as total_news,
(SELECT COUNT(*) FROM feeds WHERE activo = true) as total_feeds,
(SELECT COUNT(*) FROM users) as total_users,
(SELECT COUNT(*) FROM noticias WHERE fecha::date = CURRENT_DATE) as news_today,
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('week', CURRENT_DATE)) as news_this_week,
(SELECT COUNT(*) FROM noticias WHERE fecha >= DATE_TRUNC('month', CURRENT_DATE)) as news_this_month,
(SELECT COUNT(DISTINCT noticia_id) FROM traducciones WHERE status = 'done') as total_translated
`).Scan(
&stats.TotalNews, &stats.TotalFeeds, &stats.TotalUsers,
&stats.NewsToday, &stats.NewsThisWeek, &stats.NewsThisMonth,
&stats.TotalTranslated,
)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get stats", Message: err.Error()})
return
}
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT c.id, c.nombre, COUNT(n.id) as count
FROM categorias c
LEFT JOIN noticias n ON n.categoria_id = c.id
GROUP BY c.id, c.nombre
ORDER BY count DESC
LIMIT 10
`)
if err == nil {
defer rows.Close()
for rows.Next() {
var cs models.CategoryStat
rows.Scan(&cs.CategoryID, &cs.CategoryName, &cs.Count)
stats.TopCategories = append(stats.TopCategories, cs)
}
}
rows, err = db.GetPool().Query(c.Request.Context(), `
SELECT p.id, p.nombre, p.flag_emoji, COUNT(n.id) as count
FROM paises p
LEFT JOIN noticias n ON n.pais_id = p.id
GROUP BY p.id, p.nombre, p.flag_emoji
ORDER BY count DESC
LIMIT 10
`)
if err == nil {
defer rows.Close()
for rows.Next() {
var cs models.CountryStat
rows.Scan(&cs.CountryID, &cs.CountryName, &cs.FlagEmoji, &cs.Count)
stats.TopCountries = append(stats.TopCountries, cs)
}
}
c.JSON(http.StatusOK, stats)
}
func GetCategories(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT id, nombre FROM categorias ORDER BY nombre`)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get categories", Message: err.Error()})
return
}
defer rows.Close()
type Category struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
}
var categories []Category
for rows.Next() {
var cat Category
rows.Scan(&cat.ID, &cat.Nombre)
categories = append(categories, cat)
}
c.JSON(http.StatusOK, categories)
}
func GetCountries(c *gin.Context) {
rows, err := db.GetPool().Query(c.Request.Context(), `
SELECT p.id, p.nombre, c.nombre as continente
FROM paises p
LEFT JOIN continentes c ON c.id = p.continente_id
ORDER BY p.nombre`)
if err != nil {
c.JSON(http.StatusInternalServerError, models.ErrorResponse{Error: "Failed to get countries", Message: err.Error()})
return
}
defer rows.Close()
type Country struct {
ID int64 `json:"id"`
Nombre string `json:"nombre"`
Continente string `json:"continente"`
}
var countries []Country
for rows.Next() {
var country Country
rows.Scan(&country.ID, &country.Nombre, &country.Continente)
countries = append(countries, country)
}
c.JSON(http.StatusOK, countries)
}

View file

@ -0,0 +1,108 @@
package middleware
import (
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/golang-jwt/jwt/v5"
)
var jwtSecret []byte
func SetJWTSecret(secret string) {
jwtSecret = []byte(secret)
}
type Claims struct {
UserID int64 `json:"user_id"`
Email string `json:"email"`
Username string `json:"username"`
IsAdmin bool `json:"is_admin"`
jwt.RegisteredClaims
}
func AuthRequired() gin.HandlerFunc {
return func(c *gin.Context) {
authHeader := c.GetHeader("Authorization")
if authHeader == "" {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Authorization header required"})
c.Abort()
return
}
tokenString := strings.TrimPrefix(authHeader, "Bearer ")
if tokenString == authHeader {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Bearer token required"})
c.Abort()
return
}
claims := &Claims{}
token, err := jwt.ParseWithClaims(tokenString, claims, func(token *jwt.Token) (interface{}, error) {
return jwtSecret, nil
})
if err != nil || !token.Valid {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Invalid token"})
c.Abort()
return
}
c.Set("user", claims)
c.Next()
}
}
func AdminRequired() gin.HandlerFunc {
return func(c *gin.Context) {
userVal, exists := c.Get("user")
if !exists {
c.JSON(http.StatusUnauthorized, gin.H{"error": "Not authenticated"})
c.Abort()
return
}
claims := userVal.(*Claims)
if !claims.IsAdmin {
c.JSON(http.StatusForbidden, gin.H{"error": "Admin access required"})
c.Abort()
return
}
c.Next()
}
}
func CORSMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With")
c.Writer.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS, GET, PUT, DELETE, PATCH")
if c.Request.Method == "OPTIONS" {
c.AbortWithStatus(204)
return
}
c.Next()
}
}
func LoggerMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
status := c.Writer.Status()
if status >= 400 {
// Log error responses
}
}
}
func RateLimitMiddleware(requestsPerMinute int) gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
}
}

View file

@ -0,0 +1,17 @@
package models
import "time"
type EntityAlias struct {
ID int `json:"id"`
CanonicalName string `json:"canonical_name"`
Alias string `json:"alias"`
Tipo string `json:"tipo"`
CreatedAt time.Time `json:"created_at"`
}
type EntityAliasRequest struct {
CanonicalName string `json:"canonical_name" binding:"required"`
Aliases []string `json:"aliases" binding:"required,min=1"`
Tipo string `json:"tipo" binding:"required,oneof=persona organizacion lugar tema"`
}

View file

@ -0,0 +1,171 @@
package models
import (
"time"
)
type News struct {
ID int64 `json:"id"`
Title string `json:"title"`
Summary string `json:"summary"`
Content string `json:"content"`
URL string `json:"url"`
ImageURL *string `json:"image_url"`
PublishedAt *time.Time `json:"published_at"`
Lang string `json:"lang"`
FeedID int64 `json:"feed_id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type NewsWithTranslations struct {
ID int64 `json:"id"`
Title string `json:"title"`
Summary string `json:"summary"`
Content string `json:"content"`
URL string `json:"url"`
ImageURL *string `json:"image_url"`
PublishedAt *string `json:"published_at"`
Lang string `json:"lang"`
FeedID int64 `json:"feed_id"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
TitleTranslated *string `json:"title_translated"`
SummaryTranslated *string `json:"summary_translated"`
ContentTranslated *string `json:"content_translated"`
LangTranslated *string `json:"lang_translated"`
}
type Feed struct {
ID int64 `json:"id"`
Title string `json:"title"`
URL string `json:"url"`
SiteURL *string `json:"site_url"`
Description *string `json:"description"`
ImageURL *string `json:"image_url"`
Language *string `json:"language"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
Active bool `json:"active"`
LastFetched *time.Time `json:"last_fetched"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type Category struct {
ID int64 `json:"id"`
Name string `json:"name"`
Color string `json:"color"`
Icon string `json:"icon"`
ParentID *int64 `json:"parent_id"`
}
type Country struct {
ID int64 `json:"id"`
Name string `json:"name"`
Code string `json:"code"`
Continent string `json:"continent"`
FlagEmoji string `json:"flag_emoji"`
}
type Translation struct {
ID int64 `json:"id"`
NewsID int64 `json:"news_id"`
LangFrom string `json:"lang_from"`
LangTo string `json:"lang_to"`
Title string `json:"title"`
Summary string `json:"summary"`
Status string `json:"status"`
Error *string `json:"error"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type User struct {
ID int64 `json:"id"`
Email string `json:"email"`
Username string `json:"username"`
PasswordHash string `json:"-"`
IsAdmin bool `json:"is_admin"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type SearchHistory struct {
ID int64 `json:"id"`
UserID int64 `json:"user_id"`
Query string `json:"query"`
CategoryID *int64 `json:"category_id"`
CountryID *int64 `json:"country_id"`
ResultsCount int `json:"results_count"`
SearchedAt time.Time `json:"searched_at"`
}
type NewsListResponse struct {
News []NewsWithTranslations `json:"news"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
type FeedListResponse struct {
Feeds []Feed `json:"feeds"`
Total int `json:"total"`
Page int `json:"page"`
PerPage int `json:"per_page"`
TotalPages int `json:"total_pages"`
}
type Stats struct {
TotalNews int64 `json:"total_news"`
TotalFeeds int64 `json:"total_feeds"`
TotalUsers int64 `json:"total_users"`
TotalTranslated int64 `json:"total_translated"`
NewsToday int64 `json:"news_today"`
NewsThisWeek int64 `json:"news_this_week"`
NewsThisMonth int64 `json:"news_this_month"`
TopCategories []CategoryStat `json:"top_categories"`
TopCountries []CountryStat `json:"top_countries"`
}
type CategoryStat struct {
CategoryID int64 `json:"category_id"`
CategoryName string `json:"category_name"`
Count int64 `json:"count"`
}
type CountryStat struct {
CountryID int64 `json:"country_id"`
CountryName string `json:"country_name"`
FlagEmoji string `json:"flag_emoji"`
Count int64 `json:"count"`
}
type LoginRequest struct {
Email string `json:"email" binding:"required,email"`
Password string `json:"password" binding:"required,min=6"`
}
type RegisterRequest struct {
Email string `json:"email" binding:"required,email"`
Username string `json:"username" binding:"required,min=3,max=50"`
Password string `json:"password" binding:"required,min=6"`
}
type AuthResponse struct {
Token string `json:"token"`
User User `json:"user"`
IsFirstUser bool `json:"is_first_user,omitempty"`
}
type ErrorResponse struct {
Error string `json:"error"`
Message string `json:"message,omitempty"`
}
type SuccessResponse struct {
Message string `json:"message"`
}

View file

@ -0,0 +1,170 @@
package services
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/rss2/backend/internal/config"
"github.com/rss2/backend/internal/models"
)
var (
cfg *config.Config
)
func Init(c *config.Config) {
cfg = c
}
type TranslationRequest struct {
SourceLang string `json:"source_lang"`
TargetLang string `json:"target_lang"`
Texts []string `json:"texts"`
}
type TranslationResponse struct {
Translations []string `json:"translations"`
}
func Translate(ctx context.Context, sourceLang, targetLang string, texts []string) ([]string, error) {
if len(texts) == 0 {
return nil, nil
}
reqBody := TranslationRequest{
SourceLang: sourceLang,
TargetLang: targetLang,
Texts: texts,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 30 * time.Second}
resp, err := httpClient.Post(cfg.TranslationURL+"/translate", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("translation request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("translation service returned status %d", resp.StatusCode)
}
var result TranslationResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Translations, nil
}
type EmbeddingRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
}
type EmbeddingResponse struct {
Embeddings [][]float64 `json:"embeddings"`
}
func GetEmbeddings(ctx context.Context, texts []string) ([][]float64, error) {
if len(texts) == 0 {
return nil, nil
}
reqBody := EmbeddingRequest{
Model: "mxbai-embed-large",
Input: texts,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 60 * time.Second}
resp, err := httpClient.Post(cfg.OllamaURL+"/api/embeddings", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("embeddings request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("embeddings service returned status %d", resp.StatusCode)
}
var result EmbeddingResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Embeddings, nil
}
type NERRequest struct {
Text string `json:"text"`
}
type NERResponse struct {
Entities []Entity `json:"entities"`
}
type Entity struct {
Text string `json:"text"`
Label string `json:"label"`
Start int `json:"start"`
End int `json:"end"`
}
func ExtractEntities(ctx context.Context, text string) ([]Entity, error) {
reqBody := NERRequest{Text: text}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
httpClient := &http.Client{Timeout: 30 * time.Second}
resp, err := httpClient.Post(cfg.SpacyURL+"/ner", "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("NER request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("NER service returned status %d", resp.StatusCode)
}
var result NERResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, err
}
return result.Entities, nil
}
func SemanticSearch(ctx context.Context, query, lang string, page, perPage int) (*models.NewsListResponse, error) {
embeddings, err := GetEmbeddings(ctx, []string{query})
if err != nil {
return nil, err
}
if len(embeddings) == 0 {
return &models.NewsListResponse{}, nil
}
return &models.NewsListResponse{
News: []models.NewsWithTranslations{},
Total: 0,
Page: page,
PerPage: perPage,
TotalPages: 0,
}, nil
}

View file

@ -0,0 +1,83 @@
package workers
import (
"context"
"fmt"
"os"
"strconv"
"time"
"github.com/jackc/pgx/v5/pgxpool"
)
var pool *pgxpool.Pool
type Config struct {
Host string
Port int
DBName string
User string
Password string
}
func LoadDBConfig() *Config {
return &Config{
Host: getEnv("DB_HOST", "localhost"),
Port: getEnvInt("DB_PORT", 5432),
DBName: getEnv("DB_NAME", "rss"),
User: getEnv("DB_USER", "rss"),
Password: getEnv("DB_PASS", "rss"),
}
}
func Connect(cfg *Config) error {
dsn := fmt.Sprintf("postgres://%s:%s@%s:%d/%s?sslmode=disable",
cfg.User, cfg.Password, cfg.Host, cfg.Port, cfg.DBName)
poolConfig, err := pgxpool.ParseConfig(dsn)
if err != nil {
return fmt.Errorf("failed to parse config: %w", err)
}
poolConfig.MaxConns = 25
poolConfig.MinConns = 5
poolConfig.MaxConnLifetime = time.Hour
poolConfig.MaxConnIdleTime = 30 * time.Minute
pool, err = pgxpool.NewWithConfig(context.Background(), poolConfig)
if err != nil {
return fmt.Errorf("failed to create pool: %w", err)
}
if err = pool.Ping(context.Background()); err != nil {
return fmt.Errorf("failed to ping database: %w", err)
}
return nil
}
func GetPool() *pgxpool.Pool {
return pool
}
func Close() {
if pool != nil {
pool.Close()
}
}
func getEnv(key, defaultValue string) string {
if value := os.Getenv(key); value != "" {
return value
}
return defaultValue
}
func getEnvInt(key string, defaultValue int) int {
if value := os.Getenv(key); value != "" {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
return defaultValue
}

BIN
backend/server Executable file

Binary file not shown.