Image update
All checks were successful
build_docker / essential (pull_request) Successful in 0s
build_docker / build_cpu (linux/amd64) (pull_request) Successful in 3m42s
build_docker / build_cpu (linux/arm64) (pull_request) Successful in 18m44s
build_docker / build_gpu (pull_request) Successful in 20m51s

This commit is contained in:
2026-01-17 14:27:02 +01:00
parent 5459c9d660
commit 7ac0971153
3 changed files with 116 additions and 38 deletions

View File

@@ -1,12 +1,21 @@
# Dockerfile.cpu - CPU-only PaddleOCR REST API # Dockerfile.cpu - Multi-stage CPU Dockerfile
# Multi-arch: supports both amd64 and arm64 #
# Build base only (push to registry, rarely changes):
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
#
# Build deploy (uses base, fast - code only):
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
#
# Or build all at once:
# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
FROM python:3.11-slim # =============================================================================
# STAGE 1: BASE - All dependencies (rarely changes)
# =============================================================================
FROM python:3.11-slim AS base
LABEL maintainer="Sergio Jimenez" LABEL maintainer="Sergio Jimenez"
LABEL description="PaddleOCR Tuning REST API - CPU version" LABEL description="PaddleOCR Base Image - CPU dependencies"
LABEL org.opencontainers.image.ref.name="python"
LABEL org.opencontainers.image.version="3.11-slim"
WORKDIR /app WORKDIR /app
@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \ libgomp1 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install Python dependencies from requirements file # Install Python dependencies
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
# Copy application code # =============================================================================
# STAGE 2: DEPLOY - Application code (changes frequently)
# =============================================================================
FROM base AS deploy
LABEL description="PaddleOCR Tuning REST API - CPU version"
WORKDIR /app
# Copy application code (this is the only layer that changes frequently)
COPY paddle_ocr_tuning_rest.py . COPY paddle_ocr_tuning_rest.py .
COPY dataset_manager.py . COPY dataset_manager.py .
# Build arguments for models to bake into image # Build arguments for models
ARG DET_MODEL=PP-OCRv5_server_det ARG DET_MODEL=PP-OCRv5_server_det
ARG REC_MODEL=PP-OCRv5_server_rec ARG REC_MODEL=PP-OCRv5_server_rec
@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
ENV PADDLE_DET_MODEL=${DET_MODEL} ENV PADDLE_DET_MODEL=${DET_MODEL}
ENV PADDLE_REC_MODEL=${REC_MODEL} ENV PADDLE_REC_MODEL=${REC_MODEL}
# Note: Models download at first runtime # Volume for dataset and model cache
# First container start will take ~30s longer as models are fetched
# Use paddlex-cache volume to persist models across container restarts
# Volume for dataset and optional additional model cache
VOLUME ["/app/dataset", "/root/.paddlex"] VOLUME ["/app/dataset", "/root/.paddlex"]
# Expose API port # Expose API port

View File

@@ -1,21 +1,21 @@
# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API # Dockerfile.gpu - Multi-stage GPU Dockerfile
# #
# Supports both architectures: # Build base only (push to registry, rarely changes):
# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index # docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
# #
# For ARM64 (DGX Spark), first build the wheel: # Build deploy (uses base, fast - code only):
# docker compose --profile build run --rm build-paddle # docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
# Then build this image:
# docker compose build ocr-gpu
# #
# For x86_64, just build directly (no wheel needed): # Or build all at once:
# docker compose build ocr-gpu # docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 # =============================================================================
# STAGE 1: BASE - All dependencies (rarely changes)
# =============================================================================
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
LABEL maintainer="Sergio Jimenez" LABEL maintainer="Sergio Jimenez"
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version" LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
WORKDIR /app WORKDIR /app
@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Copy local wheels directory (may contain ARM64 wheel from build-paddle) # Copy local wheels directory (may contain ARM64 wheel from build-paddle)
COPY wheels/ /tmp/wheels/ COPY wheels/ /tmp/wheels/
# Copy requirements
COPY requirements-gpu.txt .
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64) # Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \ echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \ python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
fi && \ fi && \
rm -rf /tmp/wheels rm -rf /tmp/wheels
# Install remaining dependencies explicitly # Install remaining dependencies
RUN python -m pip install --no-cache-dir \ RUN python -m pip install --no-cache-dir \
paddleocr==3.3.2 \ paddleocr==3.3.2 \
jiwer \ jiwer \
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
pydantic \ pydantic \
Pillow Pillow
# Copy application code # =============================================================================
# STAGE 2: DEPLOY - Application code (changes frequently)
# =============================================================================
FROM base AS deploy
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
WORKDIR /app
# Copy application code (this is the only layer that changes frequently)
COPY paddle_ocr_tuning_rest.py . COPY paddle_ocr_tuning_rest.py .
COPY dataset_manager.py . COPY dataset_manager.py .
# Build arguments for models to bake into image # Build arguments for models
ARG DET_MODEL=PP-OCRv5_server_det ARG DET_MODEL=PP-OCRv5_server_det
ARG REC_MODEL=PP-OCRv5_server_rec ARG REC_MODEL=PP-OCRv5_server_rec
@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
ENV PADDLE_DET_MODEL=${DET_MODEL} ENV PADDLE_DET_MODEL=${DET_MODEL}
ENV PADDLE_REC_MODEL=${REC_MODEL} ENV PADDLE_REC_MODEL=${REC_MODEL}
# Note: Models download at first runtime (CI runner has no GPU for build-time download) # Volume for dataset and model cache
# First container start will take ~30s longer as models are fetched
# Use paddlex-cache volume to persist models across container restarts
# Volume for dataset and optional additional model cache
VOLUME ["/app/dataset", "/root/.paddlex"] VOLUME ["/app/dataset", "/root/.paddlex"]
# Expose API port # Expose API port

View File

@@ -9,6 +9,7 @@ from typing import Optional
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
import numpy as np import numpy as np
import paddle
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@@ -17,6 +18,37 @@ from jiwer import wer, cer
from dataset_manager import ImageTextDataset from dataset_manager import ImageTextDataset
def get_gpu_info() -> dict:
"""Get GPU status information from PaddlePaddle."""
info = {
"cuda_available": paddle.device.is_compiled_with_cuda(),
"device": str(paddle.device.get_device()),
"gpu_count": 0,
"gpu_name": None,
"gpu_memory_total": None,
"gpu_memory_used": None,
}
if info["cuda_available"]:
try:
info["gpu_count"] = paddle.device.cuda.device_count()
if info["gpu_count"] > 0:
# Get GPU properties
props = paddle.device.cuda.get_device_properties(0)
info["gpu_name"] = props.name
info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
# Get current memory usage
mem_reserved = paddle.device.cuda.memory_reserved(0)
mem_allocated = paddle.device.cuda.memory_allocated(0)
info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
except Exception as e:
info["gpu_error"] = str(e)
return info
# Model configuration via environment variables (with defaults) # Model configuration via environment variables (with defaults)
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det") DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec") DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
@@ -37,6 +69,19 @@ state = AppState()
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""Load OCR model at startup.""" """Load OCR model at startup."""
# Log GPU status
gpu_info = get_gpu_info()
print("=" * 50)
print("GPU STATUS")
print("=" * 50)
print(f" CUDA available: {gpu_info['cuda_available']}")
print(f" Device: {gpu_info['device']}")
if gpu_info['cuda_available']:
print(f" GPU count: {gpu_info['gpu_count']}")
print(f" GPU name: {gpu_info['gpu_name']}")
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
print("=" * 50)
print(f"Loading PaddleOCR models...") print(f"Loading PaddleOCR models...")
print(f" Detection: {state.det_model}") print(f" Detection: {state.det_model}")
print(f" Recognition: {state.rec_model}") print(f" Recognition: {state.rec_model}")
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
text_detection_model_name=state.det_model, text_detection_model_name=state.det_model,
text_recognition_model_name=state.rec_model, text_recognition_model_name=state.rec_model,
) )
# Log GPU memory after model load
if gpu_info['cuda_available']:
gpu_after = get_gpu_info()
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
print("Model loaded successfully!") print("Model loaded successfully!")
yield yield
# Cleanup on shutdown # Cleanup on shutdown
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
dataset_size: Optional[int] = None dataset_size: Optional[int] = None
det_model: Optional[str] = None det_model: Optional[str] = None
rec_model: Optional[str] = None rec_model: Optional[str] = None
# GPU info
cuda_available: Optional[bool] = None
device: Optional[str] = None
gpu_name: Optional[str] = None
gpu_memory_used: Optional[str] = None
gpu_memory_total: Optional[str] = None
def _normalize_box_xyxy(box): def _normalize_box_xyxy(box):
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
@app.get("/health", response_model=HealthResponse) @app.get("/health", response_model=HealthResponse)
def health_check(): def health_check():
"""Check if the service is ready.""" """Check if the service is ready."""
gpu_info = get_gpu_info()
return HealthResponse( return HealthResponse(
status="ok" if state.ocr is not None else "initializing", status="ok" if state.ocr is not None else "initializing",
model_loaded=state.ocr is not None, model_loaded=state.ocr is not None,
@@ -186,6 +244,11 @@ def health_check():
dataset_size=len(state.dataset) if state.dataset else None, dataset_size=len(state.dataset) if state.dataset else None,
det_model=state.det_model, det_model=state.det_model,
rec_model=state.rec_model, rec_model=state.rec_model,
cuda_available=gpu_info.get("cuda_available"),
device=gpu_info.get("device"),
gpu_name=gpu_info.get("gpu_name"),
gpu_memory_used=gpu_info.get("gpu_memory_used"),
gpu_memory_total=gpu_info.get("gpu_memory_total"),
) )