Paddle ocr gpu support. #4

Merged
Seryusjj merged 40 commits from gpu_support into main 2026-01-19 17:35:25 +00:00
3 changed files with 116 additions and 38 deletions
Showing only changes of commit 7ac0971153 - Show all commits

View File

@@ -1,12 +1,21 @@
# Dockerfile.cpu - CPU-only PaddleOCR REST API
# Multi-arch: supports both amd64 and arm64
# Dockerfile.cpu - Multi-stage CPU Dockerfile
#
# Build base only (push to registry, rarely changes):
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
#
# Build deploy (uses base, fast - code only):
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
#
# Or build all at once:
# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
FROM python:3.11-slim
# =============================================================================
# STAGE 1: BASE - All dependencies (rarely changes)
# =============================================================================
FROM python:3.11-slim AS base
LABEL maintainer="Sergio Jimenez"
LABEL description="PaddleOCR Tuning REST API - CPU version"
LABEL org.opencontainers.image.ref.name="python"
LABEL org.opencontainers.image.version="3.11-slim"
LABEL description="PaddleOCR Base Image - CPU dependencies"
WORKDIR /app
@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies from requirements file
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
# =============================================================================
# STAGE 2: DEPLOY - Application code (changes frequently)
# =============================================================================
FROM base AS deploy
LABEL description="PaddleOCR Tuning REST API - CPU version"
WORKDIR /app
# Copy application code (this is the only layer that changes frequently)
COPY paddle_ocr_tuning_rest.py .
COPY dataset_manager.py .
# Build arguments for models to bake into image
# Build arguments for models
ARG DET_MODEL=PP-OCRv5_server_det
ARG REC_MODEL=PP-OCRv5_server_rec
@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
ENV PADDLE_DET_MODEL=${DET_MODEL}
ENV PADDLE_REC_MODEL=${REC_MODEL}
# Note: Models download at first runtime
# First container start will take ~30s longer as models are fetched
# Use paddlex-cache volume to persist models across container restarts
# Volume for dataset and optional additional model cache
# Volume for dataset and model cache
VOLUME ["/app/dataset", "/root/.paddlex"]
# Expose API port

View File

@@ -1,21 +1,21 @@
# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
# Dockerfile.gpu - Multi-stage GPU Dockerfile
#
# Supports both architectures:
# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index
# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
# Build base only (push to registry, rarely changes):
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
#
# For ARM64 (DGX Spark), first build the wheel:
# docker compose --profile build run --rm build-paddle
# Then build this image:
# docker compose build ocr-gpu
# Build deploy (uses base, fast - code only):
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
#
# For x86_64, just build directly (no wheel needed):
# docker compose build ocr-gpu
# Or build all at once:
# docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
# =============================================================================
# STAGE 1: BASE - All dependencies (rarely changes)
# =============================================================================
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
LABEL maintainer="Sergio Jimenez"
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
WORKDIR /app
@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
COPY wheels/ /tmp/wheels/
# Copy requirements
COPY requirements-gpu.txt .
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
fi && \
rm -rf /tmp/wheels
# Install remaining dependencies explicitly
# Install remaining dependencies
RUN python -m pip install --no-cache-dir \
paddleocr==3.3.2 \
jiwer \
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
pydantic \
Pillow
# Copy application code
# =============================================================================
# STAGE 2: DEPLOY - Application code (changes frequently)
# =============================================================================
FROM base AS deploy
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
WORKDIR /app
# Copy application code (this is the only layer that changes frequently)
COPY paddle_ocr_tuning_rest.py .
COPY dataset_manager.py .
# Build arguments for models to bake into image
# Build arguments for models
ARG DET_MODEL=PP-OCRv5_server_det
ARG REC_MODEL=PP-OCRv5_server_rec
@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
ENV PADDLE_DET_MODEL=${DET_MODEL}
ENV PADDLE_REC_MODEL=${REC_MODEL}
# Note: Models download at first runtime (CI runner has no GPU for build-time download)
# First container start will take ~30s longer as models are fetched
# Use paddlex-cache volume to persist models across container restarts
# Volume for dataset and optional additional model cache
# Volume for dataset and model cache
VOLUME ["/app/dataset", "/root/.paddlex"]
# Expose API port

View File

@@ -9,6 +9,7 @@ from typing import Optional
from contextlib import asynccontextmanager
import numpy as np
import paddle
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
@@ -17,6 +18,37 @@ from jiwer import wer, cer
from dataset_manager import ImageTextDataset
def get_gpu_info() -> dict:
"""Get GPU status information from PaddlePaddle."""
info = {
"cuda_available": paddle.device.is_compiled_with_cuda(),
"device": str(paddle.device.get_device()),
"gpu_count": 0,
"gpu_name": None,
"gpu_memory_total": None,
"gpu_memory_used": None,
}
if info["cuda_available"]:
try:
info["gpu_count"] = paddle.device.cuda.device_count()
if info["gpu_count"] > 0:
# Get GPU properties
props = paddle.device.cuda.get_device_properties(0)
info["gpu_name"] = props.name
info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
# Get current memory usage
mem_reserved = paddle.device.cuda.memory_reserved(0)
mem_allocated = paddle.device.cuda.memory_allocated(0)
info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
except Exception as e:
info["gpu_error"] = str(e)
return info
# Model configuration via environment variables (with defaults)
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
@@ -37,6 +69,19 @@ state = AppState()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Load OCR model at startup."""
# Log GPU status
gpu_info = get_gpu_info()
print("=" * 50)
print("GPU STATUS")
print("=" * 50)
print(f" CUDA available: {gpu_info['cuda_available']}")
print(f" Device: {gpu_info['device']}")
if gpu_info['cuda_available']:
print(f" GPU count: {gpu_info['gpu_count']}")
print(f" GPU name: {gpu_info['gpu_name']}")
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
print("=" * 50)
print(f"Loading PaddleOCR models...")
print(f" Detection: {state.det_model}")
print(f" Recognition: {state.rec_model}")
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
text_detection_model_name=state.det_model,
text_recognition_model_name=state.rec_model,
)
# Log GPU memory after model load
if gpu_info['cuda_available']:
gpu_after = get_gpu_info()
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
print("Model loaded successfully!")
yield
# Cleanup on shutdown
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
dataset_size: Optional[int] = None
det_model: Optional[str] = None
rec_model: Optional[str] = None
# GPU info
cuda_available: Optional[bool] = None
device: Optional[str] = None
gpu_name: Optional[str] = None
gpu_memory_used: Optional[str] = None
gpu_memory_total: Optional[str] = None
def _normalize_box_xyxy(box):
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
@app.get("/health", response_model=HealthResponse)
def health_check():
"""Check if the service is ready."""
gpu_info = get_gpu_info()
return HealthResponse(
status="ok" if state.ocr is not None else "initializing",
model_loaded=state.ocr is not None,
@@ -186,6 +244,11 @@ def health_check():
dataset_size=len(state.dataset) if state.dataset else None,
det_model=state.det_model,
rec_model=state.rec_model,
cuda_available=gpu_info.get("cuda_available"),
device=gpu_info.get("device"),
gpu_name=gpu_info.get("gpu_name"),
gpu_memory_used=gpu_info.get("gpu_memory_used"),
gpu_memory_total=gpu_info.get("gpu_memory_total"),
)