40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import nltk
|
|
import logging
|
|
import ssl
|
|
|
|
try:
|
|
_create_unverified_https_context = ssl._create_unverified_context
|
|
except AttributeError:
|
|
pass
|
|
else:
|
|
ssl._create_default_https_context = _create_unverified_https_context
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
|
|
|
|
PACKAGES = ['punkt', 'punkt_tab', 'stopwords']
|
|
|
|
|
|
def download_nltk_data():
|
|
for package in PACKAGES:
|
|
try:
|
|
logging.info(f"Verificando si el paquete '{package}' de NLTK está disponible...")
|
|
if package.startswith('punkt'):
|
|
path = f'tokenizers/{package}'
|
|
else:
|
|
path = f'corpora/{package}'
|
|
nltk.data.find(path)
|
|
logging.info(f"El paquete '{package}' ya está descargado.")
|
|
except LookupError:
|
|
logging.info(f"El paquete '{package}' no se encontró. Iniciando descarga...")
|
|
try:
|
|
nltk.download(package, quiet=True)
|
|
logging.info(f"Paquete '{package}' descargado con éxito.")
|
|
except Exception as e:
|
|
logging.error(f"Ocurrió un error durante la descarga del paquete '{package}': {e}")
|
|
import sys
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
download_nltk_data()
|
|
|