go integration and wikipedia
This commit is contained in:
parent
47a252e339
commit
ee90335b92
7828 changed files with 1307913 additions and 20807 deletions
79
Dockerfile
79
Dockerfile
|
|
@ -1,67 +1,50 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
# CUDA o CPU
|
||||
ARG TORCH_CUDA=cu121
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Dependencias del sistema
|
||||
# --------------------------------------------------------
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libpq-dev \
|
||||
gcc \
|
||||
git \
|
||||
libcairo2 \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libgdk-pixbuf-2.0-0 \
|
||||
libffi-dev \
|
||||
shared-mime-info \
|
||||
libpq-dev gcc git curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
TOKENIZERS_PARALLELISM=false \
|
||||
HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
|
||||
HF_HOME=/root/.cache/huggingface
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Instalación de requirements
|
||||
# --------------------------------------------------------
|
||||
COPY requirements.txt .
|
||||
RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
|
||||
RUN pip install --no-cache-dir --upgrade pip
|
||||
|
||||
# Instalar PyTorch según GPU/CPU
|
||||
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
|
||||
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
|
||||
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
|
||||
else \
|
||||
pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \
|
||||
torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 ; \
|
||||
fi
|
||||
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install --no-cache-dir \
|
||||
ctranslate2 \
|
||||
sentencepiece \
|
||||
transformers==4.44.0 \
|
||||
protobuf==3.20.3 \
|
||||
"numpy<2" \
|
||||
psycopg2-binary \
|
||||
redis \
|
||||
requests \
|
||||
beautifulsoup4 \
|
||||
lxml \
|
||||
langdetect \
|
||||
nltk \
|
||||
scikit-learn \
|
||||
pandas \
|
||||
sentence-transformers \
|
||||
spacy
|
||||
|
||||
# Instalar ctranslate2 con soporte CUDA
|
||||
RUN if [ "$TORCH_CUDA" = "cu121" ]; then \
|
||||
pip install --no-cache-dir ctranslate2 ; \
|
||||
else \
|
||||
pip install --no-cache-dir ctranslate2 ; \
|
||||
fi
|
||||
RUN python -m spacy download es_core_news_lg
|
||||
|
||||
# Descargar modelo spaCy ES
|
||||
RUN python -m spacy download es_core_news_md || true
|
||||
COPY workers/ ./workers/
|
||||
COPY init-db/ ./init-db/
|
||||
COPY migrations/ ./migrations/
|
||||
COPY entity_config.json .
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Copiar TODO el proyecto rss2/
|
||||
# --------------------------------------------------------
|
||||
COPY . .
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Puede descargar modelos NLLB o Sentence-BERT si existe
|
||||
# --------------------------------------------------------
|
||||
RUN python download_models.py || true
|
||||
|
||||
EXPOSE 8000
|
||||
ENV DB_HOST=db
|
||||
ENV DB_PORT=5432
|
||||
ENV DB_NAME=rss
|
||||
ENV DB_USER=rss
|
||||
ENV DB_PASS=x
|
||||
|
||||
CMD ["python", "-m", "workers.embeddings_worker"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue