Image update
All checks were successful
All checks were successful
This commit is contained in:
@@ -1,12 +1,21 @@
|
|||||||
# Dockerfile.cpu - CPU-only PaddleOCR REST API
|
# Dockerfile.cpu - Multi-stage CPU Dockerfile
|
||||||
# Multi-arch: supports both amd64 and arm64
|
#
|
||||||
|
# Build base only (push to registry, rarely changes):
|
||||||
|
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
|
||||||
|
#
|
||||||
|
# Build deploy (uses base, fast - code only):
|
||||||
|
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
|
||||||
|
#
|
||||||
|
# Or build all at once:
|
||||||
|
# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
|
||||||
|
|
||||||
FROM python:3.11-slim
|
# =============================================================================
|
||||||
|
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||||
|
# =============================================================================
|
||||||
|
FROM python:3.11-slim AS base
|
||||||
|
|
||||||
LABEL maintainer="Sergio Jimenez"
|
LABEL maintainer="Sergio Jimenez"
|
||||||
LABEL description="PaddleOCR Tuning REST API - CPU version"
|
LABEL description="PaddleOCR Base Image - CPU dependencies"
|
||||||
LABEL org.opencontainers.image.ref.name="python"
|
|
||||||
LABEL org.opencontainers.image.version="3.11-slim"
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -20,15 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
libgomp1 \
|
libgomp1 \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Python dependencies from requirements file
|
# Install Python dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Copy application code
|
# =============================================================================
|
||||||
|
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||||
|
# =============================================================================
|
||||||
|
FROM base AS deploy
|
||||||
|
|
||||||
|
LABEL description="PaddleOCR Tuning REST API - CPU version"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy application code (this is the only layer that changes frequently)
|
||||||
COPY paddle_ocr_tuning_rest.py .
|
COPY paddle_ocr_tuning_rest.py .
|
||||||
COPY dataset_manager.py .
|
COPY dataset_manager.py .
|
||||||
|
|
||||||
# Build arguments for models to bake into image
|
# Build arguments for models
|
||||||
ARG DET_MODEL=PP-OCRv5_server_det
|
ARG DET_MODEL=PP-OCRv5_server_det
|
||||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||||
|
|
||||||
@@ -36,11 +54,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
|
|||||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||||
|
|
||||||
# Note: Models download at first runtime
|
# Volume for dataset and model cache
|
||||||
# First container start will take ~30s longer as models are fetched
|
|
||||||
# Use paddlex-cache volume to persist models across container restarts
|
|
||||||
|
|
||||||
# Volume for dataset and optional additional model cache
|
|
||||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||||
|
|
||||||
# Expose API port
|
# Expose API port
|
||||||
|
|||||||
@@ -1,21 +1,21 @@
|
|||||||
# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
|
# Dockerfile.gpu - Multi-stage GPU Dockerfile
|
||||||
#
|
#
|
||||||
# Supports both architectures:
|
# Build base only (push to registry, rarely changes):
|
||||||
# - x86_64: Uses paddlepaddle-gpu from PaddlePaddle's CUDA index
|
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
|
||||||
# - ARM64: Uses local wheel from ./wheels/ (built on DGX Spark)
|
|
||||||
#
|
#
|
||||||
# For ARM64 (DGX Spark), first build the wheel:
|
# Build deploy (uses base, fast - code only):
|
||||||
# docker compose --profile build run --rm build-paddle
|
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
|
||||||
# Then build this image:
|
|
||||||
# docker compose build ocr-gpu
|
|
||||||
#
|
#
|
||||||
# For x86_64, just build directly (no wheel needed):
|
# Or build all at once:
|
||||||
# docker compose build ocr-gpu
|
# docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
|
||||||
|
|
||||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
|
# =============================================================================
|
||||||
|
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||||
|
# =============================================================================
|
||||||
|
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
|
||||||
|
|
||||||
LABEL maintainer="Sergio Jimenez"
|
LABEL maintainer="Sergio Jimenez"
|
||||||
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
|
LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
@@ -41,11 +41,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
|
# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
|
||||||
COPY wheels/ /tmp/wheels/
|
COPY wheels/ /tmp/wheels/
|
||||||
|
|
||||||
# Copy requirements
|
|
||||||
COPY requirements-gpu.txt .
|
|
||||||
|
|
||||||
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
|
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
|
||||||
# Use python -m pip to ensure packages install to Python 3.11 (not system Python 3.10)
|
|
||||||
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
||||||
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
||||||
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
||||||
@@ -55,7 +51,7 @@ RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
|||||||
fi && \
|
fi && \
|
||||||
rm -rf /tmp/wheels
|
rm -rf /tmp/wheels
|
||||||
|
|
||||||
# Install remaining dependencies explicitly
|
# Install remaining dependencies
|
||||||
RUN python -m pip install --no-cache-dir \
|
RUN python -m pip install --no-cache-dir \
|
||||||
paddleocr==3.3.2 \
|
paddleocr==3.3.2 \
|
||||||
jiwer \
|
jiwer \
|
||||||
@@ -65,11 +61,20 @@ RUN python -m pip install --no-cache-dir \
|
|||||||
pydantic \
|
pydantic \
|
||||||
Pillow
|
Pillow
|
||||||
|
|
||||||
# Copy application code
|
# =============================================================================
|
||||||
|
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||||
|
# =============================================================================
|
||||||
|
FROM base AS deploy
|
||||||
|
|
||||||
|
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy application code (this is the only layer that changes frequently)
|
||||||
COPY paddle_ocr_tuning_rest.py .
|
COPY paddle_ocr_tuning_rest.py .
|
||||||
COPY dataset_manager.py .
|
COPY dataset_manager.py .
|
||||||
|
|
||||||
# Build arguments for models to bake into image
|
# Build arguments for models
|
||||||
ARG DET_MODEL=PP-OCRv5_server_det
|
ARG DET_MODEL=PP-OCRv5_server_det
|
||||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||||
|
|
||||||
@@ -77,11 +82,7 @@ ARG REC_MODEL=PP-OCRv5_server_rec
|
|||||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||||
|
|
||||||
# Note: Models download at first runtime (CI runner has no GPU for build-time download)
|
# Volume for dataset and model cache
|
||||||
# First container start will take ~30s longer as models are fetched
|
|
||||||
# Use paddlex-cache volume to persist models across container restarts
|
|
||||||
|
|
||||||
# Volume for dataset and optional additional model cache
|
|
||||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||||
|
|
||||||
# Expose API port
|
# Expose API port
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from typing import Optional
|
|||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import paddle
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
@@ -17,6 +18,37 @@ from jiwer import wer, cer
|
|||||||
from dataset_manager import ImageTextDataset
|
from dataset_manager import ImageTextDataset
|
||||||
|
|
||||||
|
|
||||||
|
def get_gpu_info() -> dict:
|
||||||
|
"""Get GPU status information from PaddlePaddle."""
|
||||||
|
info = {
|
||||||
|
"cuda_available": paddle.device.is_compiled_with_cuda(),
|
||||||
|
"device": str(paddle.device.get_device()),
|
||||||
|
"gpu_count": 0,
|
||||||
|
"gpu_name": None,
|
||||||
|
"gpu_memory_total": None,
|
||||||
|
"gpu_memory_used": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if info["cuda_available"]:
|
||||||
|
try:
|
||||||
|
info["gpu_count"] = paddle.device.cuda.device_count()
|
||||||
|
if info["gpu_count"] > 0:
|
||||||
|
# Get GPU properties
|
||||||
|
props = paddle.device.cuda.get_device_properties(0)
|
||||||
|
info["gpu_name"] = props.name
|
||||||
|
info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
|
||||||
|
|
||||||
|
# Get current memory usage
|
||||||
|
mem_reserved = paddle.device.cuda.memory_reserved(0)
|
||||||
|
mem_allocated = paddle.device.cuda.memory_allocated(0)
|
||||||
|
info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
|
||||||
|
info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
|
||||||
|
except Exception as e:
|
||||||
|
info["gpu_error"] = str(e)
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
# Model configuration via environment variables (with defaults)
|
# Model configuration via environment variables (with defaults)
|
||||||
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
|
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
|
||||||
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
|
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
|
||||||
@@ -37,6 +69,19 @@ state = AppState()
|
|||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
"""Load OCR model at startup."""
|
"""Load OCR model at startup."""
|
||||||
|
# Log GPU status
|
||||||
|
gpu_info = get_gpu_info()
|
||||||
|
print("=" * 50)
|
||||||
|
print("GPU STATUS")
|
||||||
|
print("=" * 50)
|
||||||
|
print(f" CUDA available: {gpu_info['cuda_available']}")
|
||||||
|
print(f" Device: {gpu_info['device']}")
|
||||||
|
if gpu_info['cuda_available']:
|
||||||
|
print(f" GPU count: {gpu_info['gpu_count']}")
|
||||||
|
print(f" GPU name: {gpu_info['gpu_name']}")
|
||||||
|
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
print(f"Loading PaddleOCR models...")
|
print(f"Loading PaddleOCR models...")
|
||||||
print(f" Detection: {state.det_model}")
|
print(f" Detection: {state.det_model}")
|
||||||
print(f" Recognition: {state.rec_model}")
|
print(f" Recognition: {state.rec_model}")
|
||||||
@@ -44,6 +89,12 @@ async def lifespan(app: FastAPI):
|
|||||||
text_detection_model_name=state.det_model,
|
text_detection_model_name=state.det_model,
|
||||||
text_recognition_model_name=state.rec_model,
|
text_recognition_model_name=state.rec_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log GPU memory after model load
|
||||||
|
if gpu_info['cuda_available']:
|
||||||
|
gpu_after = get_gpu_info()
|
||||||
|
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
|
||||||
|
|
||||||
print("Model loaded successfully!")
|
print("Model loaded successfully!")
|
||||||
yield
|
yield
|
||||||
# Cleanup on shutdown
|
# Cleanup on shutdown
|
||||||
@@ -89,6 +140,12 @@ class HealthResponse(BaseModel):
|
|||||||
dataset_size: Optional[int] = None
|
dataset_size: Optional[int] = None
|
||||||
det_model: Optional[str] = None
|
det_model: Optional[str] = None
|
||||||
rec_model: Optional[str] = None
|
rec_model: Optional[str] = None
|
||||||
|
# GPU info
|
||||||
|
cuda_available: Optional[bool] = None
|
||||||
|
device: Optional[str] = None
|
||||||
|
gpu_name: Optional[str] = None
|
||||||
|
gpu_memory_used: Optional[str] = None
|
||||||
|
gpu_memory_total: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def _normalize_box_xyxy(box):
|
def _normalize_box_xyxy(box):
|
||||||
@@ -179,6 +236,7 @@ def evaluate_text(reference: str, prediction: str) -> dict:
|
|||||||
@app.get("/health", response_model=HealthResponse)
|
@app.get("/health", response_model=HealthResponse)
|
||||||
def health_check():
|
def health_check():
|
||||||
"""Check if the service is ready."""
|
"""Check if the service is ready."""
|
||||||
|
gpu_info = get_gpu_info()
|
||||||
return HealthResponse(
|
return HealthResponse(
|
||||||
status="ok" if state.ocr is not None else "initializing",
|
status="ok" if state.ocr is not None else "initializing",
|
||||||
model_loaded=state.ocr is not None,
|
model_loaded=state.ocr is not None,
|
||||||
@@ -186,6 +244,11 @@ def health_check():
|
|||||||
dataset_size=len(state.dataset) if state.dataset else None,
|
dataset_size=len(state.dataset) if state.dataset else None,
|
||||||
det_model=state.det_model,
|
det_model=state.det_model,
|
||||||
rec_model=state.rec_model,
|
rec_model=state.rec_model,
|
||||||
|
cuda_available=gpu_info.get("cuda_available"),
|
||||||
|
device=gpu_info.get("device"),
|
||||||
|
gpu_name=gpu_info.get("gpu_name"),
|
||||||
|
gpu_memory_used=gpu_info.get("gpu_memory_used"),
|
||||||
|
gpu_memory_total=gpu_info.get("gpu_memory_total"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user