From 7ac0971153fca5c8d577ea6d09d0dd505de9bd2f Mon Sep 17 00:00:00 2001 From: Sergio Jimenez Jimenez Date: Sat, 17 Jan 2026 14:27:02 +0100 Subject: [PATCH] Image update --- src/paddle_ocr/Dockerfile.cpu | 42 ++++++++++------ src/paddle_ocr/Dockerfile.gpu | 49 +++++++++--------- src/paddle_ocr/paddle_ocr_tuning_rest.py | 63 ++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 38 deletions(-) diff --git a/src/paddle_ocr/Dockerfile.cpu b/src/paddle_ocr/Dockerfile.cpu index fe4d61f..e206caf 100644 --- a/src/paddle_ocr/Dockerfile.cpu +++ b/src/paddle_ocr/Dockerfile.cpu @@ -1,12 +1,21 @@ -# Dockerfile.cpu - CPU-only PaddleOCR REST API -# Multi-arch: supports both amd64 and arm64 +# Dockerfile.cpu - Multi-stage CPU Dockerfile +# +# Build base only (push to registry, rarely changes): +# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu . +# +# Build deploy (uses base, fast - code only): +# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu . +# +# Or build all at once: +# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu . -FROM python:3.11-slim +# ============================================================================= +# STAGE 1: BASE - All dependencies (rarely changes) +# ============================================================================= +FROM python:3.11-slim AS base LABEL maintainer="Sergio Jimenez" -LABEL description="PaddleOCR Tuning REST API - CPU version" -LABEL org.opencontainers.image.ref.name="python" -LABEL org.opencontainers.image.version="3.11-slim" +LABEL description="PaddleOCR Base Image - CPU dependencies" WORKDIR /app @@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ && rm -rf /var/lib/apt/lists/* -# Install Python dependencies from requirements file +# Install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -# Copy application code +# ============================================================================= +# STAGE 2: DEPLOY - Application code (changes frequently) +# ============================================================================= +FROM base AS deploy + +LABEL description="PaddleOCR Tuning REST API - CPU version" + +WORKDIR /app + +# Copy application code (this is the only layer that changes frequently) COPY paddle_ocr_tuning_rest.py . COPY dataset_manager.py . -# Build arguments for models to bake into image +# Build arguments for models ARG DET_MODEL=PP-OCRv5_server_det ARG REC_MODEL=PP-OCRv5_server_rec @@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec ENV PADDLE_DET_MODEL=${DET_MODEL} ENV PADDLE_REC_MODEL=${REC_MODEL} -# Note: Models download at first runtime -# First container start will take ~30s longer as models are fetched -# Use paddlex-cache volume to persist models across container restarts - -# Volume for dataset and optional additional model cache +# Volume for dataset and model cache VOLUME ["/app/dataset", "/root/.paddlex"] # Expose API port diff --git a/src/paddle_ocr/Dockerfile.gpu b/src/paddle_ocr/Dockerfile.gpu index 9e5f1a6..4f7b037 100644 --- a/src/paddle_ocr/Dockerfile.gpu +++ b/src/paddle_ocr/Dockerfile.gpu @@ -1,21 +1,21 @@ -# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API +# Dockerfile.gpu - Multi-stage GPU Dockerfile # -# Supports both architectures: -# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index -# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark) +# Build base only (push to registry, rarely changes): +# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu . # -# For ARM64 (DGX Spark), first build the wheel: -# docker compose --profile build run --rm build-paddle -# Then build this image: -# docker compose build ocr-gpu +# Build deploy (uses base, fast - code only): +# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu . # -# For x86_64, just build directly (no wheel needed): -# docker compose build ocr-gpu +# Or build all at once: +# docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu . -FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 +# ============================================================================= +# STAGE 1: BASE - All dependencies (rarely changes) +# ============================================================================= +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base LABEL maintainer="Sergio Jimenez" -LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version" +LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies" WORKDIR /app @@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Copy local wheels directory (may contain ARM64 wheel from build-paddle) COPY wheels/ /tmp/wheels/ -# Copy requirements -COPY requirements-gpu.txt . - # Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64) -# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10) RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \ echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \ python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \ @@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \ fi && \ rm -rf /tmp/wheels -# Install remaining dependencies explicitly +# Install remaining dependencies RUN python -m pip install --no-cache-dir \ paddleocr==3.3.2 \ jiwer \ @@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \ pydantic \ Pillow -# Copy application code +# ============================================================================= +# STAGE 2: DEPLOY - Application code (changes frequently) +# ============================================================================= +FROM base AS deploy + +LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version" + +WORKDIR /app + +# Copy application code (this is the only layer that changes frequently) COPY paddle_ocr_tuning_rest.py . COPY dataset_manager.py . -# Build arguments for models to bake into image +# Build arguments for models ARG DET_MODEL=PP-OCRv5_server_det ARG REC_MODEL=PP-OCRv5_server_rec @@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec ENV PADDLE_DET_MODEL=${DET_MODEL} ENV PADDLE_REC_MODEL=${REC_MODEL} -# Note: Models download at first runtime (CI runner has no GPU for build-time download) -# First container start will take ~30s longer as models are fetched -# Use paddlex-cache volume to persist models across container restarts - -# Volume for dataset and optional additional model cache +# Volume for dataset and model cache VOLUME ["/app/dataset", "/root/.paddlex"] # Expose API port diff --git a/src/paddle_ocr/paddle_ocr_tuning_rest.py b/src/paddle_ocr/paddle_ocr_tuning_rest.py index 9a34c78..f345aba 100644 --- a/src/paddle_ocr/paddle_ocr_tuning_rest.py +++ b/src/paddle_ocr/paddle_ocr_tuning_rest.py @@ -9,6 +9,7 @@ from typing import Optional from contextlib import asynccontextmanager import numpy as np +import paddle from fastapi import FastAPI, HTTPException from pydantic import BaseModel, Field @@ -17,6 +18,37 @@ from jiwer import wer, cer from dataset_manager import ImageTextDataset +def get_gpu_info() -> dict: + """Get GPU status information from PaddlePaddle.""" + info = { + "cuda_available": paddle.device.is_compiled_with_cuda(), + "device": str(paddle.device.get_device()), + "gpu_count": 0, + "gpu_name": None, + "gpu_memory_total": None, + "gpu_memory_used": None, + } + + if info["cuda_available"]: + try: + info["gpu_count"] = paddle.device.cuda.device_count() + if info["gpu_count"] > 0: + # Get GPU properties + props = paddle.device.cuda.get_device_properties(0) + info["gpu_name"] = props.name + info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB" + + # Get current memory usage + mem_reserved = paddle.device.cuda.memory_reserved(0) + mem_allocated = paddle.device.cuda.memory_allocated(0) + info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB" + info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB" + except Exception as e: + info["gpu_error"] = str(e) + + return info + + # Model configuration via environment variables (with defaults) DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det") DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec") @@ -37,6 +69,19 @@ state = AppState() @asynccontextmanager async def lifespan(app: FastAPI): """Load OCR model at startup.""" + # Log GPU status + gpu_info = get_gpu_info() + print("=" * 50) + print("GPU STATUS") + print("=" * 50) + print(f" CUDA available: {gpu_info['cuda_available']}") + print(f" Device: {gpu_info['device']}") + if gpu_info['cuda_available']: + print(f" GPU count: {gpu_info['gpu_count']}") + print(f" GPU name: {gpu_info['gpu_name']}") + print(f" GPU memory total: {gpu_info['gpu_memory_total']}") + print("=" * 50) + print(f"Loading PaddleOCR models...") print(f" Detection: {state.det_model}") print(f" Recognition: {state.rec_model}") @@ -44,6 +89,12 @@ async def lifespan(app: FastAPI): text_detection_model_name=state.det_model, text_recognition_model_name=state.rec_model, ) + + # Log GPU memory after model load + if gpu_info['cuda_available']: + gpu_after = get_gpu_info() + print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}") + print("Model loaded successfully!") yield # Cleanup on shutdown @@ -89,6 +140,12 @@ class HealthResponse(BaseModel): dataset_size: Optional[int] = None det_model: Optional[str] = None rec_model: Optional[str] = None + # GPU info + cuda_available: Optional[bool] = None + device: Optional[str] = None + gpu_name: Optional[str] = None + gpu_memory_used: Optional[str] = None + gpu_memory_total: Optional[str] = None def _normalize_box_xyxy(box): @@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict: @app.get("/health", response_model=HealthResponse) def health_check(): """Check if the service is ready.""" + gpu_info = get_gpu_info() return HealthResponse( status="ok" if state.ocr is not None else "initializing", model_loaded=state.ocr is not None, @@ -186,6 +244,11 @@ def health_check(): dataset_size=len(state.dataset) if state.dataset else None, det_model=state.det_model, rec_model=state.rec_model, + cuda_available=gpu_info.get("cuda_available"), + device=gpu_info.get("device"), + gpu_name=gpu_info.get("gpu_name"), + gpu_memory_used=gpu_info.get("gpu_memory_used"), + gpu_memory_total=gpu_info.get("gpu_memory_total"), )