doceker support

2026-01-17 10:24:00 +01:00
parent 8e2b7a5096
commit c4ab0ffad1
9 changed files with 1004 additions and 0 deletions
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -0,0 +1,68 @@
+# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
+# Supports: x86_64 with NVIDIA GPU (CUDA 12.x)
+# For DGX Spark (ARM64 + CUDA): build natively on the device
+
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_VISIBLE_DEVICES=0
+
+# Install Python 3.11 and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.11 \
+    python3.11-venv \
+    python3-pip \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3.11 /usr/bin/python
+
+# Install Python dependencies from requirements file
+COPY requirements-gpu.txt .
+RUN pip install --no-cache-dir -r requirements-gpu.txt
+
+# Copy application code
+COPY paddle_ocr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Build arguments for models to bake into image
+ARG DET_MODEL=PP-OCRv5_server_det
+ARG REC_MODEL=PP-OCRv5_server_rec
+
+# Set as environment variables (can be overridden at runtime)
+ENV PADDLE_DET_MODEL=${DET_MODEL}
+ENV PADDLE_REC_MODEL=${REC_MODEL}
+
+# Download models during build (not at runtime)
+RUN python -c "\
+import os; \
+from paddleocr import PaddleOCR; \
+det = os.environ.get('PADDLE_DET_MODEL', 'PP-OCRv5_server_det'); \
+rec = os.environ.get('PADDLE_REC_MODEL', 'PP-OCRv5_server_rec'); \
+print(f'Downloading models: det={det}, rec={rec}'); \
+ocr = PaddleOCR(text_detection_model_name=det, text_recognition_model_name=rec); \
+print('Models downloaded successfully!')"
+
+# Volume for dataset and optional additional model cache
+VOLUME ["/app/dataset", "/root/.paddlex"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]