Image update

2026-01-17 14:27:02 +01:00
parent 5459c9d660
commit 7ac0971153
3 changed files with 116 additions and 38 deletions
--- a/src/paddle_ocr/Dockerfile.cpu
+++ b/src/paddle_ocr/Dockerfile.cpu
@@ -1,12 +1,21 @@
-# Dockerfile.cpu - CPU-only PaddleOCR REST API
+# Dockerfile.cpu - Multi-stage CPU Dockerfile
-# Multi-arch: supports both amd64 and arm64
+#
 # Build base only (push to registry, rarely changes):
 #   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
 #
 # Build deploy (uses base, fast - code only):
 #   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
 #
 # Or build all at once:
 #   docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
-FROM python:3.11-slim
+# =============================================================================
 # STAGE 1: BASE - All dependencies (rarely changes)
 # =============================================================================
 FROM python:3.11-slim AS base
 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - CPU version"
+LABEL description="PaddleOCR Base Image - CPU dependencies"
 LABEL org.opencontainers.image.ref.name="python"
 LABEL org.opencontainers.image.version="3.11-slim"
 WORKDIR /app
@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*
-# Install Python dependencies from requirements file
+# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
+# =============================================================================
 # STAGE 2: DEPLOY - Application code (changes frequently)
 # =============================================================================
 FROM base AS deploy
 LABEL description="PaddleOCR Tuning REST API - CPU version"
 WORKDIR /app
 # Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .
-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec
@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}
-# Note: Models download at first runtime
+# Volume for dataset and model cache
 # First container start will take ~30s longer as models are fetched
 # Use paddlex-cache volume to persist models across container restarts
 # Volume for dataset and optional additional model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]
 # Expose API port
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -1,21 +1,21 @@
-# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
+# Dockerfile.gpu - Multi-stage GPU Dockerfile
 #
-# Supports both architectures:
+# Build base only (push to registry, rarely changes):
-# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
 # - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
 #
-# For ARM64 (DGX Spark), first build the wheel:
+# Build deploy (uses base, fast - code only):
-#   docker compose --profile build run --rm build-paddle
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
 # Then build this image:
 #   docker compose build ocr-gpu
 #
-# For x86_64, just build directly (no wheel needed):
+# Or build all at once:
-#   docker compose build ocr-gpu
+#   docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
+# =============================================================================
 # STAGE 1: BASE - All dependencies (rarely changes)
 # =============================================================================
 FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
 WORKDIR /app
@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Copy local wheels directory (may contain ARM64 wheel from build-paddle)
 COPY wheels/ /tmp/wheels/
 # Copy requirements
 COPY requirements-gpu.txt .
 # Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
 # Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
 RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
        echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
        python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
    fi && \
    rm -rf /tmp/wheels
-# Install remaining dependencies explicitly
+# Install remaining dependencies
 RUN python -m pip install --no-cache-dir \
    paddleocr==3.3.2 \
    jiwer \
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
    pydantic \
    Pillow
-# Copy application code
+# =============================================================================
 # STAGE 2: DEPLOY - Application code (changes frequently)
 # =============================================================================
 FROM base AS deploy
 LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
 WORKDIR /app
 # Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .
-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec
@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}
-# Note: Models download at first runtime (CI runner has no GPU for build-time download)
+# Volume for dataset and model cache
 # First container start will take ~30s longer as models are fetched
 # Use paddlex-cache volume to persist models across container restarts
 # Volume for dataset and optional additional model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]
 # Expose API port
--- a/src/paddle_ocr/paddle_ocr_tuning_rest.py
+++ b/src/paddle_ocr/paddle_ocr_tuning_rest.py
@@ -9,6 +9,7 @@ from typing import Optional
 from contextlib import asynccontextmanager
 import numpy as np
 import paddle
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
@@ -17,6 +18,37 @@ from jiwer import wer, cer
 from dataset_manager import ImageTextDataset
 def get_gpu_info() -> dict:
    """Get GPU status information from PaddlePaddle."""
    info = {
        "cuda_available": paddle.device.is_compiled_with_cuda(),
        "device": str(paddle.device.get_device()),
        "gpu_count": 0,
        "gpu_name": None,
        "gpu_memory_total": None,
        "gpu_memory_used": None,
    }
    if info["cuda_available"]:
        try:
            info["gpu_count"] = paddle.device.cuda.device_count()
            if info["gpu_count"] > 0:
                # Get GPU properties
                props = paddle.device.cuda.get_device_properties(0)
                info["gpu_name"] = props.name
                info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
                # Get current memory usage
                mem_reserved = paddle.device.cuda.memory_reserved(0)
                mem_allocated = paddle.device.cuda.memory_allocated(0)
                info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
                info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
        except Exception as e:
            info["gpu_error"] = str(e)
    return info
 # Model configuration via environment variables (with defaults)
 DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
 DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
@@ -37,6 +69,19 @@ state = AppState()
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Load OCR model at startup."""
    # Log GPU status
    gpu_info = get_gpu_info()
    print("=" * 50)
    print("GPU STATUS")
    print("=" * 50)
    print(f"  CUDA available: {gpu_info['cuda_available']}")
    print(f"  Device: {gpu_info['device']}")
    if gpu_info['cuda_available']:
        print(f"  GPU count: {gpu_info['gpu_count']}")
        print(f"  GPU name: {gpu_info['gpu_name']}")
        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
    print("=" * 50)
    print(f"Loading PaddleOCR models...")
    print(f"  Detection: {state.det_model}")
    print(f"  Recognition: {state.rec_model}")
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
        text_detection_model_name=state.det_model,
        text_recognition_model_name=state.rec_model,
    )
    # Log GPU memory after model load
    if gpu_info['cuda_available']:
        gpu_after = get_gpu_info()
        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
    print("Model loaded successfully!")
    yield
    # Cleanup on shutdown
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
    dataset_size: Optional[int] = None
    det_model: Optional[str] = None
    rec_model: Optional[str] = None
    # GPU info
    cuda_available: Optional[bool] = None
    device: Optional[str] = None
    gpu_name: Optional[str] = None
    gpu_memory_used: Optional[str] = None
    gpu_memory_total: Optional[str] = None
 def _normalize_box_xyxy(box):
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
@app.get("/health", response_model=HealthResponse)
 def health_check():
    """Check if the service is ready."""
    gpu_info = get_gpu_info()
    return HealthResponse(
        status="ok" if state.ocr is not None else "initializing",
        model_loaded=state.ocr is not None,
@@ -186,6 +244,11 @@ def health_check():
        dataset_size=len(state.dataset) if state.dataset else None,
        det_model=state.det_model,
        rec_model=state.rec_model,
        cuda_available=gpu_info.get("cuda_available"),
        device=gpu_info.get("device"),
        gpu_name=gpu_info.get("gpu_name"),
        gpu_memory_used=gpu_info.get("gpu_memory_used"),
        gpu_memory_total=gpu_info.get("gpu_memory_total"),
    )