rss/download_models.py
2025-11-24 01:40:46 +01:00

40 lines
1.3 KiB
Python

import nltk
import logging
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
PACKAGES = ['punkt', 'punkt_tab', 'stopwords']
def download_nltk_data():
for package in PACKAGES:
try:
logging.info(f"Verificando si el paquete '{package}' de NLTK está disponible...")
if package.startswith('punkt'):
path = f'tokenizers/{package}'
else:
path = f'corpora/{package}'
nltk.data.find(path)
logging.info(f"El paquete '{package}' ya está descargado.")
except LookupError:
logging.info(f"El paquete '{package}' no se encontró. Iniciando descarga...")
try:
nltk.download(package, quiet=True)
logging.info(f"Paquete '{package}' descargado con éxito.")
except Exception as e:
logging.error(f"Ocurrió un error durante la descarga del paquete '{package}': {e}")
import sys
sys.exit(1)
if __name__ == '__main__':
download_nltk_data()