2026-01-19 17:35:25 +00:00
3 changed files with 116 additions and 38 deletions
--- a/src/paddle_ocr/Dockerfile.cpu
+++ b/src/paddle_ocr/Dockerfile.cpu
@@ -1,12 +1,21 @@
-# Dockerfile.cpu - CPU-only PaddleOCR REST API
-# Multi-arch: supports both amd64 and arm64
+# Dockerfile.cpu - Multi-stage CPU Dockerfile
+#
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
+#
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
+#
+# Or build all at once:
+#   docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .

-FROM python:3.11-slim
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM python:3.11-slim AS base

 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - CPU version"
-LABEL org.opencontainers.image.ref.name="python"
-LABEL org.opencontainers.image.version="3.11-slim"
+LABEL description="PaddleOCR Base Image - CPU dependencies"

 WORKDIR /app

@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*

-# Install Python dependencies from requirements file
+# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

-# Copy application code
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - CPU version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .

-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec

@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}

-# Note: Models download at first runtime
-# First container start will take ~30s longer as models are fetched
-# Use paddlex-cache volume to persist models across container restarts
-
-# Volume for dataset and optional additional model cache
+# Volume for dataset and model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]

 # Expose API port
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -1,21 +1,21 @@
-# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
+# Dockerfile.gpu - Multi-stage GPU Dockerfile
 #
-# Supports both architectures:
-# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index
-# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
 #
-# For ARM64 (DGX Spark), first build the wheel:
-#   docker compose --profile build run --rm build-paddle
-# Then build this image:
-#   docker compose build ocr-gpu
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
 #
-# For x86_64, just build directly (no wheel needed):
-#   docker compose build ocr-gpu
+# Or build all at once:
+#   docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .

-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base

 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"

 WORKDIR /app

@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Copy local wheels directory (may contain ARM64 wheel from build-paddle)
 COPY wheels/ /tmp/wheels/

-# Copy requirements
-COPY requirements-gpu.txt .
-
 # Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
-# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
 RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
        echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
        python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
    fi && \
    rm -rf /tmp/wheels

-# Install remaining dependencies explicitly
+# Install remaining dependencies
 RUN python -m pip install --no-cache-dir \
    paddleocr==3.3.2 \
    jiwer \
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
    pydantic \
    Pillow

-# Copy application code
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .

-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec

@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}

-# Note: Models download at first runtime (CI runner has no GPU for build-time download)
-# First container start will take ~30s longer as models are fetched
-# Use paddlex-cache volume to persist models across container restarts
-
-# Volume for dataset and optional additional model cache
+# Volume for dataset and model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]

 # Expose API port
--- a/src/paddle_ocr/paddle_ocr_tuning_rest.py
+++ b/src/paddle_ocr/paddle_ocr_tuning_rest.py
@@ -9,6 +9,7 @@ from typing import Optional
 from contextlib import asynccontextmanager

 import numpy as np
+import paddle
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field

@@ -17,6 +18,37 @@ from jiwer import wer, cer
 from dataset_manager import ImageTextDataset


+def get_gpu_info() -> dict:
+    """Get GPU status information from PaddlePaddle."""
+    info = {
+        "cuda_available": paddle.device.is_compiled_with_cuda(),
+        "device": str(paddle.device.get_device()),
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = paddle.device.cuda.device_count()
+            if info["gpu_count"] > 0:
+                # Get GPU properties
+                props = paddle.device.cuda.get_device_properties(0)
+                info["gpu_name"] = props.name
+                info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
+
+                # Get current memory usage
+                mem_reserved = paddle.device.cuda.memory_reserved(0)
+                mem_allocated = paddle.device.cuda.memory_allocated(0)
+                info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
+                info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
 # Model configuration via environment variables (with defaults)
 DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
 DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
@@ -37,6 +69,19 @@ state = AppState()
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Load OCR model at startup."""
+    # Log GPU status
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
    print(f"Loading PaddleOCR models...")
    print(f"  Detection: {state.det_model}")
    print(f"  Recognition: {state.rec_model}")
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
        text_detection_model_name=state.det_model,
        text_recognition_model_name=state.rec_model,
    )
+
+    # Log GPU memory after model load
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
    print("Model loaded successfully!")
    yield
    # Cleanup on shutdown
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
    dataset_size: Optional[int] = None
    det_model: Optional[str] = None
    rec_model: Optional[str] = None
+    # GPU info
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None


 def _normalize_box_xyxy(box):
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
@app.get("/health", response_model=HealthResponse)
 def health_check():
    """Check if the service is ready."""
+    gpu_info = get_gpu_info()
    return HealthResponse(
        status="ok" if state.ocr is not None else "initializing",
        model_loaded=state.ocr is not None,
@@ -186,6 +244,11 @@ def health_check():
        dataset_size=len(state.dataset) if state.dataset else None,
        det_model=state.det_model,
        rec_model=state.rec_model,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
    )