From 7ac0971153fca5c8d577ea6d09d0dd505de9bd2f Mon Sep 17 00:00:00 2001
From: Sergio Jimenez Jimenez <sergiojj932@gmail.com>
Date: Sat, 17 Jan 2026 14:27:02 +0100
Subject: [PATCH] Image update

---
 src/paddle_ocr/Dockerfile.cpu            | 42 ++++++++++------
 src/paddle_ocr/Dockerfile.gpu            | 49 +++++++++---------
 src/paddle_ocr/paddle_ocr_tuning_rest.py | 63 ++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 38 deletions(-)

diff --git a/src/paddle_ocr/Dockerfile.cpu b/src/paddle_ocr/Dockerfile.cpu
index fe4d61f..e206caf 100644
--- a/src/paddle_ocr/Dockerfile.cpu
+++ b/src/paddle_ocr/Dockerfile.cpu
@@ -1,12 +1,21 @@
-# Dockerfile.cpu - CPU-only PaddleOCR REST API
-# Multi-arch: supports both amd64 and arm64
+# Dockerfile.cpu - Multi-stage CPU Dockerfile
+#
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
+#
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
+#
+# Or build all at once:
+#   docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
 
-FROM python:3.11-slim
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM python:3.11-slim AS base
 
 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - CPU version"
-LABEL org.opencontainers.image.ref.name="python"
-LABEL org.opencontainers.image.version="3.11-slim"
+LABEL description="PaddleOCR Base Image - CPU dependencies"
 
 WORKDIR /app
 
@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Python dependencies from requirements file
+# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Copy application code
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - CPU version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .
 
-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec
 
@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}
 
-# Note: Models download at first runtime
-# First container start will take ~30s longer as models are fetched
-# Use paddlex-cache volume to persist models across container restarts
-
-# Volume for dataset and optional additional model cache
+# Volume for dataset and model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]
 
 # Expose API port
diff --git a/src/paddle_ocr/Dockerfile.gpu b/src/paddle_ocr/Dockerfile.gpu
index 9e5f1a6..4f7b037 100644
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -1,21 +1,21 @@
-# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
+# Dockerfile.gpu - Multi-stage GPU Dockerfile
 #
-# Supports both architectures:
-# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index
-# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
 #
-# For ARM64 (DGX Spark), first build the wheel:
-#   docker compose --profile build run --rm build-paddle
-# Then build this image:
-#   docker compose build ocr-gpu
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
 #
-# For x86_64, just build directly (no wheel needed):
-#   docker compose build ocr-gpu
+# Or build all at once:
+#   docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
 
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
 
 LABEL maintainer="Sergio Jimenez"
-LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
 
 WORKDIR /app
 
@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Copy local wheels directory (may contain ARM64 wheel from build-paddle)
 COPY wheels/ /tmp/wheels/
 
-# Copy requirements
-COPY requirements-gpu.txt .
-
 # Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
-# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
 RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
         echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
         python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
     fi && \
     rm -rf /tmp/wheels
 
-# Install remaining dependencies explicitly
+# Install remaining dependencies
 RUN python -m pip install --no-cache-dir \
     paddleocr==3.3.2 \
     jiwer \
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
     pydantic \
     Pillow
 
-# Copy application code
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
 COPY paddle_ocr_tuning_rest.py .
 COPY dataset_manager.py .
 
-# Build arguments for models to bake into image
+# Build arguments for models
 ARG DET_MODEL=PP-OCRv5_server_det
 ARG REC_MODEL=PP-OCRv5_server_rec
 
@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
 ENV PADDLE_DET_MODEL=${DET_MODEL}
 ENV PADDLE_REC_MODEL=${REC_MODEL}
 
-# Note: Models download at first runtime (CI runner has no GPU for build-time download)
-# First container start will take ~30s longer as models are fetched
-# Use paddlex-cache volume to persist models across container restarts
-
-# Volume for dataset and optional additional model cache
+# Volume for dataset and model cache
 VOLUME ["/app/dataset", "/root/.paddlex"]
 
 # Expose API port
diff --git a/src/paddle_ocr/paddle_ocr_tuning_rest.py b/src/paddle_ocr/paddle_ocr_tuning_rest.py
index 9a34c78..f345aba 100644
--- a/src/paddle_ocr/paddle_ocr_tuning_rest.py
+++ b/src/paddle_ocr/paddle_ocr_tuning_rest.py
@@ -9,6 +9,7 @@ from typing import Optional
 from contextlib import asynccontextmanager
 
 import numpy as np
+import paddle
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
 
@@ -17,6 +18,37 @@ from jiwer import wer, cer
 from dataset_manager import ImageTextDataset
 
 
+def get_gpu_info() -> dict:
+    """Get GPU status information from PaddlePaddle."""
+    info = {
+        "cuda_available": paddle.device.is_compiled_with_cuda(),
+        "device": str(paddle.device.get_device()),
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = paddle.device.cuda.device_count()
+            if info["gpu_count"] > 0:
+                # Get GPU properties
+                props = paddle.device.cuda.get_device_properties(0)
+                info["gpu_name"] = props.name
+                info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
+
+                # Get current memory usage
+                mem_reserved = paddle.device.cuda.memory_reserved(0)
+                mem_allocated = paddle.device.cuda.memory_allocated(0)
+                info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
+                info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
 # Model configuration via environment variables (with defaults)
 DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
 DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
@@ -37,6 +69,19 @@ state = AppState()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Load OCR model at startup."""
+    # Log GPU status
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
     print(f"Loading PaddleOCR models...")
     print(f"  Detection: {state.det_model}")
     print(f"  Recognition: {state.rec_model}")
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
         text_detection_model_name=state.det_model,
         text_recognition_model_name=state.rec_model,
     )
+
+    # Log GPU memory after model load
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
     print("Model loaded successfully!")
     yield
     # Cleanup on shutdown
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
     dataset_size: Optional[int] = None
     det_model: Optional[str] = None
     rec_model: Optional[str] = None
+    # GPU info
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None
 
 
 def _normalize_box_xyxy(box):
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
 @app.get("/health", response_model=HealthResponse)
 def health_check():
     """Check if the service is ready."""
+    gpu_info = get_gpu_info()
     return HealthResponse(
         status="ok" if state.ocr is not None else "initializing",
         model_loaded=state.ocr is not None,
@@ -186,6 +244,11 @@ def health_check():
         dataset_size=len(state.dataset) if state.dataset else None,
         det_model=state.det_model,
         rec_model=state.rec_model,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
     )