- sub-nav.css: barra de navegación compartida con botones estilo home (fondos por sección, texto negro, borde negro, hover neón, fix WebGL z-index) - Móvil: panel de detalle ocupa 50% inferior, grafo permanece visible arriba - Imágenes de nodo duplicadas en tamaño (160×104), detail-img a 28vh - output_*.js: función showEgoGraph — filtra el grafo al ego-network del nodo seleccionado; botón "Ver solo conexiones" (solo si hay relaciones); botón flotante "← Volver al grafo completo" - int-sec.js: eliminado makeTextSprite, igualado al resto (nulos para no-imagen) - Eliminado footer de los 5 sub-HTML - image_analyzer.py: cuantización int4 (NF4) para Qwen3-VL-8B → 6.4 GB VRAM Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
341 lines
15 KiB
Python
341 lines
15 KiB
Python
"""
|
||
image_analyzer.py
|
||
-----------------
|
||
Analiza imágenes con Qwen3-VL-8B-Instruct (HuggingFace transformers).
|
||
Extrae tema, subtema, keywords, descripción y entidades.
|
||
|
||
Mejoras:
|
||
- Opción 3: Resume — salta imágenes ya analizadas en MongoDB
|
||
- Opción 4: Prioriza imágenes cuyos artículos ya están en MongoDB
|
||
- Opción 5: Batch inference — procesa N imágenes a la vez (ahorra RAM en activaciones)
|
||
|
||
Uso:
|
||
analyzer = ImageAnalyzer()
|
||
result = analyzer.analyze("foto.jpg")
|
||
results = analyzer.analyze_folder("./mis_imagenes/", batch_size=4)
|
||
results = analyzer.analyze_folder("./mis_imagenes/", resume=True)
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import re
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import torch
|
||
from PIL import Image
|
||
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
|
||
|
||
# ── Configuración ──────────────────────────────────────────────────────────────
|
||
|
||
MODEL_ID = os.getenv("VISION_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
|
||
CACHE_DIR = os.getenv("HF_HOME", "/var/www/theflows.net/flujos/FLUJOS_DATOS/IMAGENES/model_cache")
|
||
|
||
SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
|
||
|
||
# int4 via bitsandbytes: modelo ocupa ~4-5GB VRAM en lugar de ~16GB bfloat16
|
||
# RTX 3060 12GB → sobra VRAM para activaciones
|
||
DEFAULT_BATCH_SIZE = 1 # batch 1 para seguridad con 12GB
|
||
|
||
KEYWORD_PROMPT = """Analiza esta imagen en detalle.
|
||
Devuelve ÚNICAMENTE un objeto JSON válido con esta estructura exacta, sin texto adicional:
|
||
|
||
{
|
||
"tema": "tema principal de la imagen (1-3 palabras en español)",
|
||
"subtema": "subtema específico (1-4 palabras en español)",
|
||
"keywords": ["palabra1", "palabra2", "palabra3"],
|
||
"descripcion": "descripción breve y objetiva de lo que muestra la imagen (1-2 frases)",
|
||
"entidades": ["nombre_propio1", "organizacion1", "lugar1"],
|
||
"idioma_detectado": "es/en/fr/..."
|
||
}
|
||
|
||
Requisitos:
|
||
- keywords: entre 8 y 15 palabras clave relevantes, en minúsculas
|
||
- entidades: solo si son claramente visibles/identificables, puede estar vacío []
|
||
- todo el contenido en español salvo entidades propias
|
||
- SOLO el JSON, sin markdown ni explicaciones"""
|
||
|
||
|
||
# ── Clase principal ────────────────────────────────────────────────────────────
|
||
|
||
class ImageAnalyzer:
|
||
|
||
def __init__(self, model_id: str = MODEL_ID):
|
||
self.model_id = model_id
|
||
self._model = None
|
||
self._processor = None
|
||
|
||
def _load_model(self):
|
||
if self._model is not None:
|
||
return
|
||
|
||
print(f"[ImageAnalyzer] Cargando modelo {self.model_id}...")
|
||
print(f"[ImageAnalyzer] Cache: {CACHE_DIR}")
|
||
|
||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||
print(f"[ImageAnalyzer] Dispositivo: {device}")
|
||
|
||
if device == "cuda":
|
||
bnb_config = BitsAndBytesConfig(
|
||
load_in_4bit=True,
|
||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||
bnb_4bit_use_double_quant=True,
|
||
bnb_4bit_quant_type="nf4",
|
||
)
|
||
self._model = Qwen3VLForConditionalGeneration.from_pretrained(
|
||
self.model_id,
|
||
quantization_config=bnb_config,
|
||
device_map="auto",
|
||
cache_dir=CACHE_DIR,
|
||
)
|
||
else:
|
||
self._model = Qwen3VLForConditionalGeneration.from_pretrained(
|
||
self.model_id,
|
||
torch_dtype=torch.bfloat16,
|
||
device_map="cpu",
|
||
cache_dir=CACHE_DIR,
|
||
)
|
||
|
||
self._processor = AutoProcessor.from_pretrained(
|
||
self.model_id,
|
||
cache_dir=CACHE_DIR,
|
||
)
|
||
print("[ImageAnalyzer] Modelo cargado (int4 cuantizado).")
|
||
|
||
# ── Opción 3: Resume — obtener archivos ya analizados en MongoDB ───────────
|
||
|
||
@staticmethod
|
||
def get_already_analyzed(mongo_url: str = None, db_name: str = None) -> set[str]:
|
||
"""Devuelve el conjunto de nombres de archivo ya en MongoDB colección 'imagenes'."""
|
||
try:
|
||
from pymongo import MongoClient
|
||
url = mongo_url or os.getenv("MONGO_URL", "mongodb://localhost:27017")
|
||
dbname = db_name or os.getenv("DB_NAME", "FLUJOS_DATOS")
|
||
client = MongoClient(url, serverSelectionTimeoutMS=3000)
|
||
client.admin.command("ping")
|
||
db = client[dbname]
|
||
done = set(doc["archivo"] for doc in db["imagenes"].find({}, {"archivo": 1, "_id": 0}))
|
||
client.close()
|
||
print(f"[ImageAnalyzer] Resume: {len(done)} imágenes ya analizadas en MongoDB")
|
||
return done
|
||
except Exception as e:
|
||
print(f"[ImageAnalyzer] Resume: MongoDB no disponible ({e}) — se analizarán todas")
|
||
return set()
|
||
|
||
# ── Opción 4: Priorizar imágenes cuyos artículos existen en MongoDB ────────
|
||
|
||
@staticmethod
|
||
def get_known_article_titles(mongo_url: str = None, db_name: str = None) -> set[str]:
|
||
"""Devuelve títulos de artículos Wikipedia que ya tenemos en MongoDB."""
|
||
try:
|
||
from pymongo import MongoClient
|
||
url = mongo_url or os.getenv("MONGO_URL", "mongodb://localhost:27017")
|
||
dbname = db_name or os.getenv("DB_NAME", "FLUJOS_DATOS")
|
||
client = MongoClient(url, serverSelectionTimeoutMS=3000)
|
||
db = client[dbname]
|
||
titles = set()
|
||
for doc in db["wikipedia"].find({}, {"titulo": 1, "subtema": 1, "_id": 0}):
|
||
if doc.get("titulo"):
|
||
titles.add(doc["titulo"].lower())
|
||
if doc.get("subtema"):
|
||
titles.add(doc["subtema"].lower())
|
||
client.close()
|
||
print(f"[ImageAnalyzer] Priorización: {len(titles)} títulos conocidos en MongoDB")
|
||
return titles
|
||
except Exception:
|
||
return set()
|
||
|
||
@staticmethod
|
||
def _priority_score(img_path: Path, known_titles: set[str]) -> int:
|
||
"""Imagen con subtema en MongoDB Wikipedia → prioridad alta (0), resto (1)."""
|
||
stem = img_path.parent.name.lower().replace("_", " ")
|
||
return 0 if any(stem in t or t in stem for t in known_titles) else 1
|
||
|
||
# ── Helpers ────────────────────────────────────────────────────────────────
|
||
|
||
def _parse_json_response(self, raw: str) -> dict:
|
||
raw = raw.strip()
|
||
match = re.search(r'\{[\s\S]*\}', raw)
|
||
if match:
|
||
return json.loads(match.group())
|
||
raise ValueError(f"No se encontró JSON válido:\n{raw[:300]}")
|
||
|
||
def _build_result(self, img_path: Path, parsed: dict) -> dict:
|
||
return {
|
||
"archivo": img_path.name,
|
||
"image_path": str(img_path.resolve()),
|
||
"tema": parsed.get("tema", "sin_clasificar").lower(),
|
||
"subtema": parsed.get("subtema", "").lower(),
|
||
"texto": parsed.get("descripcion", ""),
|
||
"keywords": [k.lower().strip() for k in parsed.get("keywords", [])],
|
||
"entidades": parsed.get("entidades", []),
|
||
"idioma": parsed.get("idioma_detectado", "es"),
|
||
"source_type": "imagen",
|
||
"fecha": datetime.now().strftime("%Y-%m-%d"),
|
||
"modelo_usado": self.model_id,
|
||
}
|
||
|
||
# ── Análisis de una imagen (individual) ───────────────────────────────────
|
||
|
||
def analyze(self, image_path: str, extra_context: str = "") -> dict:
|
||
if not os.path.exists(image_path):
|
||
raise FileNotFoundError(f"Imagen no encontrada: {image_path}")
|
||
|
||
self._load_model()
|
||
prompt = (f"Contexto adicional: {extra_context}\n\n" + KEYWORD_PROMPT) if extra_context else KEYWORD_PROMPT
|
||
image = Image.open(image_path).convert("RGB")
|
||
|
||
messages = [{"role": "user", "content": [
|
||
{"type": "image", "image": image},
|
||
{"type": "text", "text": prompt},
|
||
]}]
|
||
|
||
inputs = self._processor.apply_chat_template(
|
||
messages, tokenize=True, add_generation_prompt=True,
|
||
return_dict=True, return_tensors="pt",
|
||
)
|
||
inputs = {k: v.to(self._model.device) for k, v in inputs.items()}
|
||
|
||
print(f" → Analizando: {Path(image_path).name}")
|
||
with torch.no_grad():
|
||
generated_ids = self._model.generate(**inputs, max_new_tokens=512, do_sample=False)
|
||
|
||
trimmed = [out[len(inp):] for inp, out in zip(inputs["input_ids"], generated_ids)]
|
||
raw = self._processor.batch_decode(trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||
return self._build_result(Path(image_path), self._parse_json_response(raw))
|
||
|
||
# ── Opción 5: Batch inference ──────────────────────────────────────────────
|
||
|
||
def analyze_batch(self, image_paths: list[str], extra_context: str = "") -> list[dict]:
|
||
"""
|
||
Analiza un lote de imágenes en una sola llamada al modelo.
|
||
Más eficiente que N llamadas individuales.
|
||
RAM estimada: ~16GB modelo + ~500MB × batch_size activaciones.
|
||
"""
|
||
self._load_model()
|
||
prompt = (f"Contexto adicional: {extra_context}\n\n" + KEYWORD_PROMPT) if extra_context else KEYWORD_PROMPT
|
||
|
||
batch_messages = []
|
||
valid_paths = []
|
||
for path in image_paths:
|
||
try:
|
||
img = Image.open(path).convert("RGB")
|
||
batch_messages.append([{"role": "user", "content": [
|
||
{"type": "image", "image": img},
|
||
{"type": "text", "text": prompt},
|
||
]}])
|
||
valid_paths.append(Path(path))
|
||
except Exception as e:
|
||
print(f" ✗ Error abriendo {path}: {e}")
|
||
|
||
if not batch_messages:
|
||
return []
|
||
|
||
all_inputs = [
|
||
self._processor.apply_chat_template(
|
||
msgs, tokenize=True, add_generation_prompt=True,
|
||
return_dict=True, return_tensors="pt",
|
||
)
|
||
for msgs in batch_messages
|
||
]
|
||
|
||
# Pad manualmente para batch
|
||
input_ids_list = [x["input_ids"][0] for x in all_inputs]
|
||
attention_mask_list = [x["attention_mask"][0] for x in all_inputs]
|
||
|
||
max_len = max(t.shape[0] for t in input_ids_list)
|
||
pad_id = self._processor.tokenizer.pad_token_id or 0
|
||
padded_ids = torch.stack([
|
||
torch.nn.functional.pad(t, (max_len - t.shape[0], 0), value=pad_id)
|
||
for t in input_ids_list
|
||
])
|
||
padded_masks = torch.stack([
|
||
torch.nn.functional.pad(t, (max_len - t.shape[0], 0), value=0)
|
||
for t in attention_mask_list
|
||
])
|
||
|
||
with torch.no_grad():
|
||
generated = self._model.generate(
|
||
input_ids=padded_ids.to(self._model.device),
|
||
attention_mask=padded_masks.to(self._model.device),
|
||
max_new_tokens=512,
|
||
do_sample=False,
|
||
)
|
||
|
||
results = []
|
||
for i, (out_ids, in_ids) in enumerate(zip(generated, padded_ids)):
|
||
raw = self._processor.decode(out_ids[in_ids.shape[0]:], skip_special_tokens=True)
|
||
try:
|
||
parsed = self._parse_json_response(raw)
|
||
results.append(self._build_result(valid_paths[i], parsed))
|
||
print(f" ✓ {valid_paths[i].name} → tema={parsed.get('tema','?')}")
|
||
except Exception as e:
|
||
print(f" ✗ {valid_paths[i].name}: {e}")
|
||
results.append({
|
||
"archivo": valid_paths[i].name, "error": str(e),
|
||
"source_type": "imagen", "fecha": datetime.now().strftime("%Y-%m-%d"),
|
||
})
|
||
return results
|
||
|
||
# ── Análisis de carpeta con todas las mejoras ──────────────────────────────
|
||
|
||
def analyze_folder(
|
||
self,
|
||
folder_path: str,
|
||
extra_context: str = "",
|
||
resume: bool = True,
|
||
batch_size: int = DEFAULT_BATCH_SIZE,
|
||
prioritize: bool = True,
|
||
) -> list[dict]:
|
||
"""
|
||
Args:
|
||
resume: Si True, salta imágenes ya analizadas en MongoDB (opción 3)
|
||
prioritize: Si True, procesa primero imágenes cuyos artículos están en MongoDB (opción 4)
|
||
batch_size: Imágenes por lote para el modelo (opción 5). Default: 4
|
||
"""
|
||
folder = Path(folder_path)
|
||
if not folder.exists():
|
||
raise FileNotFoundError(f"Carpeta no encontrada: {folder_path}")
|
||
|
||
images = sorted([
|
||
p for p in folder.rglob("*")
|
||
if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS
|
||
])
|
||
print(f"\n[ImageAnalyzer] {len(images)} imágenes encontradas en {folder_path}")
|
||
|
||
# Opción 3: Resume — filtrar ya analizadas
|
||
if resume:
|
||
done = self.get_already_analyzed()
|
||
before = len(images)
|
||
images = [p for p in images if p.name not in done]
|
||
print(f"[ImageAnalyzer] Resume: {before - len(images)} saltadas, {len(images)} pendientes")
|
||
|
||
if not images:
|
||
print("[ImageAnalyzer] Nada que analizar.")
|
||
return []
|
||
|
||
# Opción 4: Priorizar por artículos conocidos en MongoDB
|
||
if prioritize:
|
||
known = self.get_known_article_titles()
|
||
images = sorted(images, key=lambda p: self._priority_score(p, known))
|
||
print(f"[ImageAnalyzer] Priorización activada")
|
||
|
||
# Opción 5: Batch inference
|
||
results = []
|
||
total = len(images)
|
||
for start in range(0, total, batch_size):
|
||
batch = images[start:start + batch_size]
|
||
end = min(start + batch_size, total)
|
||
print(f"\n [Batch {start//batch_size + 1}] imágenes {start+1}-{end}/{total}")
|
||
batch_results = self.analyze_batch([str(p) for p in batch], extra_context)
|
||
results.extend(batch_results)
|
||
|
||
ok = len([r for r in results if "error" not in r])
|
||
print(f"\n[ImageAnalyzer] Completado: {ok}/{total} OK\n")
|
||
return results
|
||
|
||
@staticmethod
|
||
def save_json(results: list[dict], output_path: str):
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
print(f"[ImageAnalyzer] Guardado: {output_path} ({len(results)} registros)")
|