Paddle ocr, easyicr and doctr gpu support. (#4)
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
This commit was merged in pull request #4.
This commit is contained in:
213
src/paddle_ocr/Dockerfile.build-paddle
Normal file
213
src/paddle_ocr/Dockerfile.build-paddle
Normal file
@@ -0,0 +1,213 @@
|
||||
# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
|
||||
#
|
||||
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
|
||||
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
|
||||
#
|
||||
# Build time: ~1-2 hours with caching, 2-4 hours first build
|
||||
# Output: /output/paddlepaddle_gpu-*.whl
|
||||
#
|
||||
# Usage:
|
||||
# CUDA_ARCH=90 docker compose --profile build run --rm build-paddle
|
||||
#
|
||||
# Features:
|
||||
# - ccache for compiler caching (survives rebuilds)
|
||||
# - Split build stages for better layer caching
|
||||
# - ARM64 -m64 patch applied automatically
|
||||
|
||||
# syntax=docker/dockerfile:1.4
|
||||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddlePaddle GPU wheel builder for ARM64"
|
||||
|
||||
# Build arguments
|
||||
ARG PADDLE_VERSION=v3.0.0
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG CUDA_ARCH=90
|
||||
|
||||
# Environment setup
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
# Install build dependencies + ccache
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python3-pip \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
pkg-config \
|
||||
ccache \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
zlib1g-dev \
|
||||
libbz2-dev \
|
||||
libreadline-dev \
|
||||
libsqlite3-dev \
|
||||
liblzma-dev \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
libgflags-dev \
|
||||
libgoogle-glog-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
patchelf \
|
||||
libopenblas-dev \
|
||||
liblapack-dev \
|
||||
swig \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
||||
|
||||
# Setup ccache symlinks for CUDA
|
||||
RUN mkdir -p /usr/lib/ccache && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
|
||||
|
||||
# Upgrade pip and install Python build dependencies
|
||||
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Clone PaddlePaddle repository
|
||||
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
|
||||
# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64)
|
||||
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
|
||||
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
|
||||
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
|
||||
echo "Patched -m64 flag for ARM64 compatibility"
|
||||
|
||||
# Patch for ARM64: Install sse2neon to translate x86 SSE intrinsics to ARM NEON
|
||||
# sse2neon provides drop-in replacements for x86 SIMD headers
|
||||
RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
|
||||
mkdir -p /usr/local/include/sse2neon && \
|
||||
cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
|
||||
rm -rf /tmp/sse2neon && \
|
||||
echo "Installed sse2neon for x86->ARM NEON translation"
|
||||
|
||||
# Create wrapper headers that use sse2neon for ARM64
|
||||
RUN mkdir -p /usr/local/include/x86_stubs && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#include_next <immintrin.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#include_next <xmmintrin.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#include_next <emmintrin.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#include_next <pmmintrin.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#include_next <smmintrin.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "Created x86 intrinsic wrapper headers for ARM64 using sse2neon"
|
||||
|
||||
# Install additional Python requirements for building
|
||||
RUN pip install -r python/requirements.txt || true
|
||||
|
||||
# Create build directory
|
||||
RUN mkdir -p build
|
||||
WORKDIR /build/Paddle/build
|
||||
|
||||
# Configure CMake for ARM64 + CUDA build
|
||||
# Note: -Wno-class-memaccess fixes Eigen NEON warning on ARM64
|
||||
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
|
||||
cmake .. \
|
||||
-GNinja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DPY_VERSION=${PYTHON_VERSION} \
|
||||
-DWITH_GPU=ON \
|
||||
-DWITH_TESTING=OFF \
|
||||
-DWITH_DISTRIBUTE=OFF \
|
||||
-DWITH_NCCL=OFF \
|
||||
-DWITH_MKL=OFF \
|
||||
-DWITH_MKLDNN=OFF \
|
||||
-DON_INFER=OFF \
|
||||
-DWITH_PYTHON=ON \
|
||||
-DWITH_AVX=OFF \
|
||||
-DCUDA_ARCH_NAME=Manual \
|
||||
-DCUDA_ARCH_BIN="${CUDA_ARCH}" \
|
||||
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs" \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||
|
||||
# Build external dependencies first (cacheable layer)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
|
||||
|
||||
# Build flashattn (heaviest dependency, separate layer for caching)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_flashattn
|
||||
|
||||
# Build remaining external dependencies
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
|
||||
|
||||
# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
||||
|
||||
# Build the Python wheel
|
||||
RUN ninja paddle_python || true
|
||||
|
||||
# Create output directory
|
||||
RUN mkdir -p /output
|
||||
|
||||
# Build wheel package - try multiple methods since PaddlePaddle build structure varies
|
||||
WORKDIR /build/Paddle
|
||||
RUN echo "=== Looking for wheel build method ===" && \
|
||||
ls -la python/ 2>/dev/null && \
|
||||
ls -la build/python/ 2>/dev/null && \
|
||||
if [ -f build/python/setup.py ]; then \
|
||||
echo "Using build/python/setup.py" && \
|
||||
cd build/python && python setup.py bdist_wheel; \
|
||||
elif [ -f python/setup.py ]; then \
|
||||
echo "Using python/setup.py" && \
|
||||
cd python && python setup.py bdist_wheel; \
|
||||
else \
|
||||
echo "Looking for existing wheel..." && \
|
||||
find /build -name "paddlepaddle*.whl" -type f 2>/dev/null; \
|
||||
fi
|
||||
|
||||
# Copy wheel to output
|
||||
RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
|
||||
ls -la /output/ && \
|
||||
if [ ! "$(ls -A /output/*.whl 2>/dev/null)" ]; then \
|
||||
echo "ERROR: No wheel found!" && exit 1; \
|
||||
fi
|
||||
|
||||
# List what was built
|
||||
RUN ls -la /output/ && \
|
||||
echo "=== Build complete ===" && \
|
||||
find /build -name "*.whl" -type f 2>/dev/null
|
||||
|
||||
# Default command: copy wheel to mounted volume
|
||||
CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]
|
||||
149
src/paddle_ocr/Dockerfile.build-paddle-cpu
Normal file
149
src/paddle_ocr/Dockerfile.build-paddle-cpu
Normal file
@@ -0,0 +1,149 @@
|
||||
# Dockerfile.build-paddle-cpu - Build PaddlePaddle CPU wheel for ARM64
|
||||
#
|
||||
# Required because PyPI wheels don't work on ARM64 (x86 SSE instructions).
|
||||
#
|
||||
# Build time: ~1-2 hours
|
||||
# Output: /output/paddlepaddle-*.whl
|
||||
#
|
||||
# Usage:
|
||||
# docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
|
||||
# docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
|
||||
|
||||
# syntax=docker/dockerfile:1.4
|
||||
FROM ubuntu:22.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddlePaddle CPU wheel builder for ARM64"
|
||||
|
||||
ARG PADDLE_VERSION=v3.0.0
|
||||
ARG PYTHON_VERSION=3.11
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python3-pip \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
pkg-config \
|
||||
ccache \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
zlib1g-dev \
|
||||
libbz2-dev \
|
||||
libreadline-dev \
|
||||
libsqlite3-dev \
|
||||
liblzma-dev \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
libgflags-dev \
|
||||
libgoogle-glog-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
patchelf \
|
||||
libopenblas-dev \
|
||||
liblapack-dev \
|
||||
swig \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
||||
|
||||
# Setup ccache
|
||||
RUN mkdir -p /usr/lib/ccache && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
|
||||
|
||||
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
|
||||
|
||||
WORKDIR /build
|
||||
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
|
||||
# Patch -m64 flag (x86_64 specific)
|
||||
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
|
||||
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
|
||||
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true
|
||||
|
||||
# Install sse2neon for x86 SSE -> ARM NEON translation
|
||||
RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
|
||||
mkdir -p /usr/local/include/sse2neon && \
|
||||
cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
|
||||
rm -rf /tmp/sse2neon
|
||||
|
||||
# Create x86 intrinsic wrapper headers
|
||||
RUN mkdir -p /usr/local/include/x86_stubs && \
|
||||
for h in immintrin xmmintrin emmintrin pmmintrin smmintrin; do \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#include_next <${h}.h>" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/${h}.h; \
|
||||
done
|
||||
|
||||
RUN pip install -r python/requirements.txt || true
|
||||
|
||||
RUN mkdir -p build
|
||||
WORKDIR /build/Paddle/build
|
||||
|
||||
# Configure for CPU-only ARM64 build
|
||||
# WITH_ARM=ON enables ARM NEON optimizations and disables x86-specific code (XBYAK, MKL)
|
||||
RUN cmake .. \
|
||||
-GNinja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DPY_VERSION=${PYTHON_VERSION} \
|
||||
-DWITH_GPU=OFF \
|
||||
-DWITH_ARM=ON \
|
||||
-DWITH_TESTING=OFF \
|
||||
-DWITH_DISTRIBUTE=OFF \
|
||||
-DWITH_NCCL=OFF \
|
||||
-DWITH_MKL=OFF \
|
||||
-DWITH_MKLDNN=OFF \
|
||||
-DWITH_XBYAK=OFF \
|
||||
-DON_INFER=OFF \
|
||||
-DWITH_PYTHON=ON \
|
||||
-DWITH_AVX=OFF \
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs"
|
||||
|
||||
# Build external dependencies
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
|
||||
|
||||
# Note: extern_xbyak excluded - it's x86-only and disabled with WITH_ARM=ON
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo
|
||||
|
||||
# Build PaddlePaddle
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
||||
|
||||
RUN ninja paddle_python || true
|
||||
|
||||
RUN mkdir -p /output
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
RUN if [ -f build/python/setup.py ]; then \
|
||||
cd build/python && python setup.py bdist_wheel; \
|
||||
elif [ -f python/setup.py ]; then \
|
||||
cd python && python setup.py bdist_wheel; \
|
||||
fi
|
||||
|
||||
RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
|
||||
ls -la /output/
|
||||
|
||||
CMD ["sh", "-c", "cp /output/*.whl /wheels/ && ls -la /wheels/"]
|
||||
81
src/paddle_ocr/Dockerfile.cpu
Normal file
81
src/paddle_ocr/Dockerfile.cpu
Normal file
@@ -0,0 +1,81 @@
|
||||
# Dockerfile.cpu - Multi-stage CPU Dockerfile
|
||||
#
|
||||
# Build base only (push to registry, rarely changes):
|
||||
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
|
||||
#
|
||||
# Build deploy (uses base, fast - code only):
|
||||
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
|
||||
#
|
||||
# Or build all at once:
|
||||
# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||
# =============================================================================
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddleOCR Base Image - CPU dependencies"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for OpenCV and PaddleOCR
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy local wheels directory (may contain ARM64 wheel from build-paddle-cpu)
|
||||
COPY wheels/ /tmp/wheels/
|
||||
|
||||
# Install paddlepaddle: prefer local wheel (ARM64), fallback to PyPI (x86_64)
|
||||
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
||||
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
||||
pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
||||
else \
|
||||
echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \
|
||||
pip install --no-cache-dir paddlepaddle==3.0.0; \
|
||||
fi && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# Install remaining Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||
# =============================================================================
|
||||
FROM base AS deploy
|
||||
|
||||
LABEL description="PaddleOCR Tuning REST API - CPU version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code (this is the only layer that changes frequently)
|
||||
COPY paddle_ocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Build arguments for models
|
||||
ARG DET_MODEL=PP-OCRv5_server_det
|
||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||
|
||||
# Set as environment variables (can be overridden at runtime)
|
||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
105
src/paddle_ocr/Dockerfile.gpu
Normal file
105
src/paddle_ocr/Dockerfile.gpu
Normal file
@@ -0,0 +1,105 @@
|
||||
# Dockerfile.gpu - Multi-stage GPU Dockerfile
|
||||
#
|
||||
# Build base only (push to registry, rarely changes):
|
||||
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Build deploy (uses base, fast - code only):
|
||||
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Or build all at once:
|
||||
# docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||
# =============================================================================
|
||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# Install Python 3.11 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.11 \
|
||||
python3.11-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.11 /usr/bin/python
|
||||
|
||||
# Fix cuDNN library path for ARM64 only (PaddlePaddle looks in /usr/local/cuda/lib64)
|
||||
# x86_64 doesn't need this - PyPI wheel handles paths correctly
|
||||
RUN if [ "$(uname -m)" = "aarch64" ]; then \
|
||||
mkdir -p /usr/local/cuda/lib64 && \
|
||||
ln -sf /usr/lib/aarch64-linux-gnu/libcudnn*.so* /usr/local/cuda/lib64/ && \
|
||||
ln -sf /usr/lib/aarch64-linux-gnu/libcudnn.so.9 /usr/local/cuda/lib64/libcudnn.so && \
|
||||
ldconfig; \
|
||||
fi
|
||||
|
||||
# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
|
||||
COPY wheels/ /tmp/wheels/
|
||||
|
||||
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
|
||||
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
||||
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
||||
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
||||
else \
|
||||
echo "=== Installing PaddlePaddle from CUDA index (x86_64) ===" && \
|
||||
python -m pip install --no-cache-dir paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/; \
|
||||
fi && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# Install remaining dependencies
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
paddleocr==3.3.2 \
|
||||
jiwer \
|
||||
numpy \
|
||||
fastapi \
|
||||
"uvicorn[standard]" \
|
||||
pydantic \
|
||||
Pillow
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||
# =============================================================================
|
||||
FROM base AS deploy
|
||||
|
||||
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code (this is the only layer that changes frequently)
|
||||
COPY paddle_ocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Build arguments for models
|
||||
ARG DET_MODEL=PP-OCRv5_server_det
|
||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||
|
||||
# Set as environment variables (can be overridden at runtime)
|
||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
824
src/paddle_ocr/README.md
Normal file
824
src/paddle_ocr/README.md
Normal file
@@ -0,0 +1,824 @@
|
||||
# PaddleOCR Tuning REST API
|
||||
|
||||
REST API service for PaddleOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start with Docker Compose
|
||||
|
||||
Docker Compose manages building and running containers. The `docker-compose.yml` defines two services:
|
||||
- `ocr-cpu` - CPU-only version (works everywhere)
|
||||
- `ocr-gpu` - GPU version (requires NVIDIA GPU + Container Toolkit)
|
||||
|
||||
### Run CPU Version
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build and start (first time takes ~2-3 min to build, ~30s to load model)
|
||||
docker compose up ocr-cpu
|
||||
|
||||
# Or run in background (detached)
|
||||
docker compose up -d ocr-cpu
|
||||
|
||||
# View logs
|
||||
docker compose logs -f ocr-cpu
|
||||
|
||||
# Stop
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### Run GPU Version
|
||||
|
||||
```bash
|
||||
# Requires: NVIDIA GPU + nvidia-container-toolkit installed
|
||||
docker compose up ocr-gpu
|
||||
```
|
||||
|
||||
### Test the API
|
||||
|
||||
Once running, test with:
|
||||
```bash
|
||||
# Check health
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Or use the test script
|
||||
pip install requests
|
||||
python test.py --url http://localhost:8000
|
||||
```
|
||||
|
||||
### What Docker Compose Does
|
||||
|
||||
```
|
||||
docker compose up ocr-cpu
|
||||
│
|
||||
├─► Builds image from Dockerfile.cpu (if not exists)
|
||||
├─► Creates container "paddle-ocr-cpu"
|
||||
├─► Mounts ../dataset → /app/dataset (your PDF images)
|
||||
├─► Mounts paddlex-cache volume (persists downloaded models)
|
||||
├─► Exposes port 8000
|
||||
└─► Runs: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `paddle_ocr_tuning_rest.py` | FastAPI REST service |
|
||||
| `dataset_manager.py` | Dataset loader |
|
||||
| `test.py` | API test client |
|
||||
| `Dockerfile.cpu` | CPU-only image (x86_64 + ARM64 with local wheel) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) |
|
||||
| `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 |
|
||||
| `Dockerfile.build-paddle-cpu` | PaddlePaddle CPU wheel builder for ARM64 |
|
||||
| `docker-compose.yml` | Service orchestration |
|
||||
| `docker-compose.cpu-registry.yml` | Pull CPU image from registry |
|
||||
| `docker-compose.gpu-registry.yml` | Pull GPU image from registry |
|
||||
| `wheels/` | Local PaddlePaddle wheels (created by build-paddle) |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{"status": "ok", "model_loaded": true, "dataset_loaded": true, "dataset_size": 24}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"textline_orientation": true,
|
||||
"use_doc_orientation_classify": false,
|
||||
"use_doc_unwarping": false,
|
||||
"text_det_thresh": 0.469,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.635,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{"CER": 0.0115, "WER": 0.0989, "TIME": 330.5, "PAGES": 5, "TIME_PER_PAGE": 66.1}
|
||||
```
|
||||
|
||||
### `POST /evaluate_full`
|
||||
Same as `/evaluate` but runs on ALL pages (ignores start_page/end_page).
|
||||
|
||||
## Debug Output (debugset)
|
||||
|
||||
The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
|
||||
|
||||
### Enable Debug Output
|
||||
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"save_output": true,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Output Structure
|
||||
|
||||
```
|
||||
debugset/
|
||||
├── doc1/
|
||||
│ └── paddle_ocr/
|
||||
│ ├── page_0005.txt
|
||||
│ ├── page_0006.txt
|
||||
│ └── ...
|
||||
├── doc2/
|
||||
│ └── paddle_ocr/
|
||||
│ └── ...
|
||||
```
|
||||
|
||||
Each `.txt` file contains the OCR-extracted text for that page.
|
||||
|
||||
### Docker Mount
|
||||
|
||||
The `debugset` folder is mounted read-write in docker-compose:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- ../debugset:/app/debugset:rw
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
|
||||
- **Debug hyperparameters**: See how different settings affect text extraction
|
||||
- **Ground truth comparison**: Compare predictions against expected output
|
||||
|
||||
## Building Images
|
||||
|
||||
### CPU Image (Multi-Architecture)
|
||||
|
||||
```bash
|
||||
# Local build (current architecture)
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
|
||||
|
||||
# Multi-arch build with buildx (amd64 + arm64)
|
||||
docker buildx create --name multiarch --use
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t paddle-ocr-api:cpu \
|
||||
--push .
|
||||
```
|
||||
|
||||
### GPU Image (x86_64 + ARM64 with local wheel)
|
||||
|
||||
```bash
|
||||
docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu .
|
||||
```
|
||||
|
||||
> **Note:** PaddlePaddle GPU 3.x packages are **not on PyPI**. The Dockerfile installs from PaddlePaddle's official CUDA index (`paddlepaddle.org.cn/packages/stable/cu126/`). This is handled automatically during build.
|
||||
|
||||
## Running
|
||||
|
||||
### CPU (Any machine)
|
||||
|
||||
```bash
|
||||
docker run -d -p 8000:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v paddlex-cache:/root/.paddlex \
|
||||
paddle-ocr-api:cpu
|
||||
```
|
||||
|
||||
### GPU (NVIDIA)
|
||||
|
||||
```bash
|
||||
docker run -d -p 8000:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v paddlex-cache:/root/.paddlex \
|
||||
paddle-ocr-api:gpu
|
||||
```
|
||||
|
||||
## GPU Support Analysis
|
||||
|
||||
### Host System Reference (DGX Spark)
|
||||
|
||||
This section documents GPU support findings based on testing on an NVIDIA DGX Spark:
|
||||
|
||||
| Component | Value |
|
||||
|-----------|-------|
|
||||
| Architecture | ARM64 (aarch64) |
|
||||
| CPU | NVIDIA Grace (ARM) |
|
||||
| GPU | NVIDIA GB10 |
|
||||
| CUDA Version | 13.0 |
|
||||
| Driver | 580.95.05 |
|
||||
| OS | Ubuntu 24.04 LTS |
|
||||
| Container Toolkit | nvidia-container-toolkit 1.18.1 |
|
||||
| Docker | 28.5.1 |
|
||||
| Docker Compose | v2.40.0 |
|
||||
|
||||
### PaddlePaddle GPU Platform Support
|
||||
|
||||
**Note:** PaddlePaddle-GPU does NOT have prebuilt ARM64 wheels on PyPI, but ARM64 support is available via custom-built wheels.
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
|
||||
| Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
|
||||
| macOS x64 | ✅ | ❌ |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
| Linux ARM64 (Jetson/DGX) | ✅ | ⚠️ Limited - see Blackwell note |
|
||||
|
||||
**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available on PyPI. ARM64 wheels must be built from source or downloaded from Gitea packages.
|
||||
|
||||
### ARM64 GPU Support
|
||||
|
||||
ARM64 GPU support is available but requires custom-built wheels:
|
||||
|
||||
1. **No prebuilt PyPI wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist on PyPI
|
||||
2. **Custom wheels work**: This project provides Dockerfiles to build ARM64 GPU wheels from source
|
||||
3. **CI/CD builds ARM64 GPU images**: Pre-built wheels are available from Gitea packages
|
||||
|
||||
**To use GPU on ARM64:**
|
||||
- Use the pre-built images from the container registry, or
|
||||
- Build the wheel locally using `Dockerfile.build-paddle` (see Option 2 below), or
|
||||
- Download the wheel from Gitea packages: `wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl`
|
||||
|
||||
### ⚠️ Known Limitation: Blackwell GPU (sm_121 / GB10)
|
||||
|
||||
**Status: GPU inference does NOT work on NVIDIA Blackwell GPUs (DGX Spark, GB200, etc.)**
|
||||
|
||||
#### Symptoms
|
||||
|
||||
When running PaddleOCR on Blackwell GPUs:
|
||||
- CUDA loads successfully ✅
|
||||
- Basic tensor operations work ✅
|
||||
- **Detection model outputs constant values** ❌
|
||||
- 0 text regions detected
|
||||
- CER/WER = 100% (nothing recognized)
|
||||
|
||||
#### Root Cause
|
||||
|
||||
**Confirmed:** PaddlePaddle's entire CUDA backend does NOT support Blackwell (sm_121). This is NOT just an inference model problem - even basic operations fail.
|
||||
|
||||
**Test Results (January 2026):**
|
||||
|
||||
1. **PTX JIT Test** (`CUDA_FORCE_PTX_JIT=1`):
|
||||
```
|
||||
OSError: CUDA error(209), no kernel image is available for execution on the device.
|
||||
[Hint: 'cudaErrorNoKernelImageForDevice']
|
||||
```
|
||||
→ Confirmed: No PTX code exists in PaddlePaddle binaries
|
||||
|
||||
2. **Dynamic Graph Mode Test** (bypassing inference models):
|
||||
```
|
||||
Conv2D + BatchNorm output:
|
||||
Output min: 0.0000
|
||||
Output max: 0.0000
|
||||
Output mean: 0.0000
|
||||
Dynamic graph mode: BROKEN (constant output)
|
||||
```
|
||||
→ Confirmed: Even simple nn.Conv2D produces zeros on Blackwell
|
||||
|
||||
**Conclusion:** The issue is PaddlePaddle's compiled CUDA kernels (cubins), not just the inference models. The entire framework was compiled without sm_121 support and without PTX for JIT compilation.
|
||||
|
||||
**Why building PaddlePaddle from source doesn't fix it:**
|
||||
|
||||
1. ⚠️ Building with `CUDA_ARCH=121` requires CUDA 13.0+ (PaddlePaddle only supports up to CUDA 12.6)
|
||||
2. ❌ Even if you could build it, PaddleOCR models contain pre-compiled CUDA ops
|
||||
3. ❌ These model files were exported/compiled targeting sm_80/sm_90 architectures
|
||||
4. ❌ The model kernels execute on GPU but produce garbage output on sm_121
|
||||
|
||||
**To truly fix this**, the PaddlePaddle team would need to:
|
||||
1. Add sm_121 to their model export pipeline
|
||||
2. Re-export all PaddleOCR models (PP-OCRv4, PP-OCRv5, etc.) with Blackwell support
|
||||
3. Release new model versions
|
||||
|
||||
This is tracked in [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327).
|
||||
|
||||
#### Debug Script
|
||||
|
||||
Use the included debug script to verify this issue:
|
||||
|
||||
```bash
|
||||
docker exec paddle-ocr-gpu python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
|
||||
```
|
||||
|
||||
Expected output showing the problem:
|
||||
```
|
||||
OUTPUT ANALYSIS:
|
||||
Shape: (1, 1, 640, 640)
|
||||
Min: 0.000010
|
||||
Max: 0.000010 # <-- Same as min = constant output
|
||||
Mean: 0.000010
|
||||
|
||||
DIAGNOSIS:
|
||||
PROBLEM: Output is constant - model inference is broken!
|
||||
This typically indicates GPU compute capability mismatch.
|
||||
```
|
||||
|
||||
#### Workarounds
|
||||
|
||||
1. **Use CPU mode** (recommended):
|
||||
```bash
|
||||
docker compose up ocr-cpu
|
||||
```
|
||||
The ARM Grace CPU is fast (~2-5 sec/page). This is the reliable option.
|
||||
|
||||
2. **Use EasyOCR or DocTR with GPU**:
|
||||
These use PyTorch which has official ARM64 CUDA wheels (cu128 index):
|
||||
```bash
|
||||
# EasyOCR with GPU on DGX Spark
|
||||
docker build -f ../easyocr_service/Dockerfile.gpu -t easyocr-gpu ../easyocr_service
|
||||
docker run --gpus all -p 8002:8000 easyocr-gpu
|
||||
```
|
||||
|
||||
3. **Wait for PaddlePaddle Blackwell support**:
|
||||
Track [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327) for updates.
|
||||
|
||||
#### GPU Support Matrix (Updated)
|
||||
|
||||
| GPU Architecture | Compute | CPU | GPU |
|
||||
|------------------|---------|-----|-----|
|
||||
| Ampere (A100, A10) | sm_80 | ✅ | ✅ |
|
||||
| Hopper (H100, H200) | sm_90 | ✅ | ✅ |
|
||||
| **Blackwell (GB10, GB200)** | sm_121 | ✅ | ❌ Not supported |
|
||||
|
||||
#### FAQ: Why Doesn't CUDA Backward Compatibility Work?
|
||||
|
||||
**Q: CUDA normally runs older kernels on newer GPUs. Why doesn't this work for Blackwell?**
|
||||
|
||||
Per [NVIDIA Blackwell Compatibility Guide](https://docs.nvidia.com/cuda/blackwell-compatibility-guide/):
|
||||
|
||||
CUDA **can** run older code on newer GPUs via **PTX JIT compilation**:
|
||||
1. PTX (Parallel Thread Execution) is NVIDIA's intermediate representation
|
||||
2. If an app includes PTX code, the driver JIT-compiles it for the target GPU
|
||||
3. This allows sm_80 code to run on sm_121
|
||||
|
||||
**The problem**: PaddleOCR inference models contain only pre-compiled **cubins** (SASS binary), not PTX. Without PTX, there's nothing to JIT-compile.
|
||||
|
||||
We tested PTX JIT (January 2026):
|
||||
```bash
|
||||
# Force PTX JIT compilation
|
||||
docker run --gpus all -e CUDA_FORCE_PTX_JIT=1 paddle-ocr-gpu \
|
||||
python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
|
||||
|
||||
# Result:
|
||||
# OSError: CUDA error(209), no kernel image is available for execution on the device.
|
||||
```
|
||||
**Confirmed: No PTX exists** in PaddlePaddle binaries. The CUDA kernels are cubins-only (SASS binary), compiled for sm_80/sm_90 without PTX fallback.
|
||||
|
||||
**Note on sm_121**: Per NVIDIA docs, "sm_121 is the same as sm_120 since the only difference is physically integrated CPU+GPU memory of Spark." The issue is general Blackwell (sm_12x) support, not Spark-specific.
|
||||
|
||||
#### FAQ: Does Dynamic Graph Mode Work on Blackwell?
|
||||
|
||||
**Q: Can I bypass inference models and use PaddlePaddle's dynamic graph mode?**
|
||||
|
||||
**No.** We tested dynamic graph mode (January 2026):
|
||||
```bash
|
||||
# Test script runs: paddle.nn.Conv2D + paddle.nn.BatchNorm2D
|
||||
python /app/scripts/test_dynamic_mode.py
|
||||
|
||||
# Result:
|
||||
# Input shape: [1, 3, 224, 224]
|
||||
# Output shape: [1, 64, 112, 112]
|
||||
# Output min: 0.0000
|
||||
# Output max: 0.0000 # <-- All zeros!
|
||||
# Output mean: 0.0000
|
||||
# Dynamic graph mode: BROKEN (constant output)
|
||||
```
|
||||
|
||||
**Conclusion:** The problem isn't limited to inference models. PaddlePaddle's core CUDA kernels (Conv2D, BatchNorm, etc.) produce garbage on sm_121. The entire framework lacks Blackwell support.
|
||||
|
||||
#### FAQ: Can I Run AMD64 Containers on ARM64 DGX Spark?
|
||||
|
||||
**Q: Can I just run the working x86_64 GPU image via emulation?**
|
||||
|
||||
**Short answer: Yes for CPU, No for GPU.**
|
||||
|
||||
You can run amd64 containers via QEMU emulation:
|
||||
```bash
|
||||
# Install QEMU
|
||||
sudo apt-get install qemu binfmt-support qemu-user-static
|
||||
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
|
||||
# Run amd64 container
|
||||
docker run --platform linux/amd64 paddle-ocr-gpu:amd64 ...
|
||||
```
|
||||
|
||||
**But GPU doesn't work:**
|
||||
- QEMU emulates CPU instructions (x86 → ARM)
|
||||
- **QEMU user-mode does NOT support GPU passthrough**
|
||||
- GPU calls from emulated x86 code cannot reach the ARM64 GPU
|
||||
|
||||
So even if the amd64 image works on x86_64:
|
||||
- ❌ No GPU access through QEMU
|
||||
- ❌ CPU emulation is 10-100x slower than native ARM64
|
||||
- ❌ Defeats the purpose entirely
|
||||
|
||||
| Approach | CPU | GPU | Speed |
|
||||
|----------|-----|-----|-------|
|
||||
| ARM64 native (CPU) | ✅ | N/A | Fast (~2-5s/page) |
|
||||
| ARM64 native (GPU) | ✅ | ❌ Blackwell issue | - |
|
||||
| AMD64 via QEMU | ⚠️ Works | ❌ No passthrough | 10-100x slower |
|
||||
|
||||
### Options for ARM64 Systems
|
||||
|
||||
#### Option 1: CPU-Only (Recommended)
|
||||
|
||||
Use `Dockerfile.cpu` which works on ARM64:
|
||||
|
||||
```bash
|
||||
# On DGX Spark
|
||||
docker compose up ocr-cpu
|
||||
|
||||
# Or build directly
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
|
||||
```
|
||||
|
||||
**Performance:** CPU inference on ARM64 Grace is surprisingly fast due to high core count. Expect ~2-5 seconds per page.
|
||||
|
||||
#### Option 2: Build PaddlePaddle from Source (Docker-based)
|
||||
|
||||
Use the included Docker builder to compile PaddlePaddle GPU for ARM64:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Step 1: Build the PaddlePaddle GPU wheel (one-time, 2-4 hours)
|
||||
docker compose --profile build run --rm build-paddle
|
||||
|
||||
# Verify wheel was created
|
||||
ls -la wheels/paddlepaddle*.whl
|
||||
|
||||
# Step 2: Build the GPU image (uses local wheel)
|
||||
docker compose build ocr-gpu
|
||||
|
||||
# Step 3: Run with GPU
|
||||
docker compose up ocr-gpu
|
||||
|
||||
# Verify GPU is working
|
||||
docker compose exec ocr-gpu python -c "import paddle; print(paddle.device.is_compiled_with_cuda())"
|
||||
```
|
||||
|
||||
**What this does:**
|
||||
1. `build-paddle` compiles PaddlePaddle from source inside a CUDA container
|
||||
2. The wheel is saved to `./wheels/` directory
|
||||
3. `Dockerfile.gpu` detects the local wheel and uses it instead of PyPI
|
||||
|
||||
**Caveats:**
|
||||
- Build takes 2-4 hours on first run
|
||||
- Requires ~20GB disk space during build
|
||||
- Not officially supported by PaddlePaddle team
|
||||
- May need adjustments for future PaddlePaddle versions
|
||||
|
||||
See: [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327)
|
||||
|
||||
#### Option 3: Alternative OCR Engines
|
||||
|
||||
For ARM64 GPU acceleration, consider alternatives:
|
||||
|
||||
| Engine | ARM64 GPU | Notes |
|
||||
|--------|-----------|-------|
|
||||
| **Tesseract** | ❌ CPU-only | Good fallback, widely available |
|
||||
| **EasyOCR** | ⚠️ Via PyTorch | PyTorch has ARM64 GPU support |
|
||||
| **TrOCR** | ⚠️ Via Transformers | Hugging Face Transformers + PyTorch |
|
||||
| **docTR** | ⚠️ Via TensorFlow/PyTorch | Both backends have ARM64 support |
|
||||
|
||||
EasyOCR with PyTorch is a viable alternative:
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
|
||||
pip install easyocr
|
||||
```
|
||||
|
||||
### x86_64 GPU Setup (Working)
|
||||
|
||||
For x86_64 systems with NVIDIA GPU, the GPU Docker works:
|
||||
|
||||
```bash
|
||||
# Verify GPU is accessible
|
||||
nvidia-smi
|
||||
|
||||
# Verify Docker GPU access
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
|
||||
|
||||
# Build and run GPU version
|
||||
docker compose up ocr-gpu
|
||||
```
|
||||
|
||||
### GPU Docker Compose Configuration
|
||||
|
||||
The `docker-compose.yml` configures GPU access via:
|
||||
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
```
|
||||
|
||||
This requires Docker Compose v2 and nvidia-container-toolkit.
|
||||
|
||||
## DGX Spark / ARM64 Quick Start
|
||||
|
||||
For ARM64 systems (DGX Spark, Jetson, Graviton), use CPU-only:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build ARM64-native CPU image
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:arm64 .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8000:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
paddle-ocr-api:arm64
|
||||
|
||||
# Test
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
### Cross-Compile from x86_64
|
||||
|
||||
Build ARM64 images from an x86_64 machine:
|
||||
|
||||
```bash
|
||||
# Setup buildx for multi-arch
|
||||
docker buildx create --name mybuilder --use
|
||||
|
||||
# Build ARM64 image from x86_64 machine
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/arm64 \
|
||||
-t paddle-ocr-api:arm64 \
|
||||
--load .
|
||||
|
||||
# Save and transfer to DGX Spark
|
||||
docker save paddle-ocr-api:arm64 | gzip > paddle-ocr-arm64.tar.gz
|
||||
scp paddle-ocr-arm64.tar.gz dgx-spark:~/
|
||||
|
||||
# On DGX Spark:
|
||||
docker load < paddle-ocr-arm64.tar.gz
|
||||
```
|
||||
|
||||
## Using with Ray Tune
|
||||
|
||||
### Multi-Worker Setup for Parallel Trials
|
||||
|
||||
Run multiple workers for parallel hyperparameter tuning:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Start 2 CPU workers (ports 8001-8002)
|
||||
sudo docker compose -f docker-compose.workers.yml --profile cpu up -d
|
||||
|
||||
# Or for GPU workers (if supported)
|
||||
sudo docker compose -f docker-compose.workers.yml --profile gpu up -d
|
||||
|
||||
# Check workers are healthy
|
||||
curl http://localhost:8001/health
|
||||
curl http://localhost:8002/health
|
||||
```
|
||||
|
||||
Then run the notebook with `max_concurrent_trials=2` to use both workers in parallel.
|
||||
|
||||
### Single Worker Setup
|
||||
|
||||
Update your notebook's `trainable_paddle_ocr` function:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
API_URL = "http://localhost:8000/evaluate"
|
||||
|
||||
def trainable_paddle_ocr(config):
|
||||
"""Call OCR API instead of subprocess."""
|
||||
payload = {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
|
||||
"use_doc_unwarping": config.get("use_doc_unwarping", False),
|
||||
"textline_orientation": config.get("textline_orientation", True),
|
||||
"text_det_thresh": config.get("text_det_thresh", 0.0),
|
||||
"text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
|
||||
"text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
|
||||
"text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(API_URL, json=payload, timeout=600)
|
||||
response.raise_for_status()
|
||||
metrics = response.json()
|
||||
tune.report(metrics=metrics)
|
||||
except Exception as e:
|
||||
tune.report({"CER": 1.0, "WER": 1.0, "ERROR": str(e)[:500]})
|
||||
```
|
||||
|
||||
## Architecture: Model Lifecycle
|
||||
|
||||
The model is loaded **once** at container startup and stays in memory for all requests:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Container["Docker Container Lifecycle"]
|
||||
Start([Container Start]) --> Load[Load PaddleOCR Models<br/>~10-30s one-time cost]
|
||||
Load --> Ready[API Ready<br/>Models in RAM ~500MB]
|
||||
|
||||
subgraph Requests["Incoming Requests - Models Stay Loaded"]
|
||||
Ready --> R1[Request 1] --> Ready
|
||||
Ready --> R2[Request 2] --> Ready
|
||||
Ready --> RN[Request N...] --> Ready
|
||||
end
|
||||
|
||||
Ready --> Stop([Container Stop])
|
||||
Stop --> Free[Models Freed]
|
||||
end
|
||||
|
||||
style Load fill:#f9f,stroke:#333
|
||||
style Ready fill:#9f9,stroke:#333
|
||||
style Requests fill:#e8f4ea,stroke:#090
|
||||
```
|
||||
|
||||
**Subprocess vs REST API comparison:**
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Subprocess["❌ Subprocess Approach"]
|
||||
direction TB
|
||||
S1[Trial 1] --> L1[Load Model ~10s]
|
||||
L1 --> E1[Evaluate ~60s]
|
||||
E1 --> U1[Unload]
|
||||
U1 --> S2[Trial 2]
|
||||
S2 --> L2[Load Model ~10s]
|
||||
L2 --> E2[Evaluate ~60s]
|
||||
end
|
||||
|
||||
subgraph REST["✅ REST API Approach"]
|
||||
direction TB
|
||||
Start2[Start Container] --> Load2[Load Model ~10s]
|
||||
Load2 --> Ready2[Model in Memory]
|
||||
Ready2 --> T1[Trial 1 ~60s]
|
||||
T1 --> Ready2
|
||||
Ready2 --> T2[Trial 2 ~60s]
|
||||
T2 --> Ready2
|
||||
Ready2 --> TN[Trial N ~60s]
|
||||
end
|
||||
|
||||
style L1 fill:#faa
|
||||
style L2 fill:#faa
|
||||
style Load2 fill:#afa
|
||||
style Ready2 fill:#afa
|
||||
```
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
| Approach | Model Load | Per-Trial Overhead | 64 Trials |
|
||||
|----------|------------|-------------------|-----------|
|
||||
| Subprocess (original) | Every trial (~10s) | ~10s | ~7 hours |
|
||||
| Docker per trial | Every trial (~10s) | ~12-15s | ~7.5 hours |
|
||||
| **REST API** | **Once** | **~0.1s** | **~5.8 hours** |
|
||||
|
||||
The REST API saves ~1+ hour by loading the model only once.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Model download slow on first run
|
||||
The first run downloads ~500MB of models. Use volume `paddlex-cache` to persist them.
|
||||
|
||||
### Out of memory
|
||||
Reduce `max_concurrent_trials` in Ray Tune, or increase container memory:
|
||||
```bash
|
||||
docker run --memory=8g ...
|
||||
```
|
||||
|
||||
### GPU not detected
|
||||
Ensure NVIDIA Container Toolkit is installed:
|
||||
```bash
|
||||
nvidia-smi # Should work
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi # Should work
|
||||
```
|
||||
|
||||
### PaddlePaddle GPU installation fails
|
||||
PaddlePaddle 3.x GPU packages are **not available on PyPI**. They must be installed from PaddlePaddle's official index:
|
||||
```bash
|
||||
# For CUDA 12.x
|
||||
pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
||||
|
||||
# For CUDA 11.8
|
||||
pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
|
||||
```
|
||||
The Dockerfile.gpu handles this automatically.
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
The project includes a Gitea Actions workflow (`.gitea/workflows/ci.yaml`) for automated builds.
|
||||
|
||||
### What CI Builds
|
||||
|
||||
| Image | Architecture | Source |
|
||||
|-------|--------------|--------|
|
||||
| `paddle-ocr-cpu:amd64` | amd64 | PyPI paddlepaddle |
|
||||
| `paddle-ocr-cpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
|
||||
| `paddle-ocr-gpu:amd64` | amd64 | PyPI paddlepaddle-gpu |
|
||||
| `paddle-ocr-gpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
|
||||
|
||||
### ARM64 Wheel Workflow
|
||||
|
||||
Since PyPI wheels don't work on ARM64 (x86 SSE instructions), wheels must be built from source using sse2neon:
|
||||
|
||||
1. Built manually on an ARM64 machine (one-time)
|
||||
2. Uploaded to Gitea generic packages
|
||||
3. Downloaded by CI when building ARM64 images
|
||||
|
||||
#### Step 1: Build ARM64 Wheels (One-time, on ARM64 machine)
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build GPU wheel (requires NVIDIA GPU, takes 1-2 hours)
|
||||
sudo docker build -t paddle-builder:gpu-arm64 -f Dockerfile.build-paddle .
|
||||
sudo docker run --rm -v ./wheels:/wheels paddle-builder:gpu-arm64
|
||||
|
||||
# Build CPU wheel (no GPU required, takes 1-2 hours)
|
||||
sudo docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
|
||||
sudo docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
|
||||
|
||||
# Verify wheels were created
|
||||
ls -la wheels/paddlepaddle*.whl
|
||||
# paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl (GPU)
|
||||
# paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl (CPU)
|
||||
```
|
||||
|
||||
#### Step 2: Upload Wheels to Gitea Packages
|
||||
|
||||
```bash
|
||||
export GITEA_TOKEN="your-token-here"
|
||||
|
||||
# Upload GPU wheel
|
||||
curl -X PUT \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
--upload-file wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl \
|
||||
"https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
|
||||
# Upload CPU wheel
|
||||
curl -X PUT \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
--upload-file wheels/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl \
|
||||
"https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
```
|
||||
|
||||
Wheels available at:
|
||||
```
|
||||
https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
```
|
||||
|
||||
#### Step 3: CI Builds Images
|
||||
|
||||
CI automatically:
|
||||
1. Downloads ARM64 wheels from Gitea packages (for arm64 builds only)
|
||||
2. Builds both CPU and GPU images for amd64 and arm64
|
||||
3. Pushes to registry with arch-specific tags
|
||||
|
||||
### Required CI Secrets
|
||||
|
||||
Configure these in Gitea repository settings:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `CI_READWRITE` | Gitea token with registry read/write access |
|
||||
|
||||
### Manual Image Push
|
||||
|
||||
```bash
|
||||
# Login to registry
|
||||
docker login seryus.ddns.net
|
||||
|
||||
# Build and push CPU (multi-arch)
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t seryus.ddns.net/unir/paddle-ocr-api:cpu \
|
||||
--push .
|
||||
|
||||
# Build and push GPU (x86_64)
|
||||
docker build -f Dockerfile.gpu -t seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64 .
|
||||
docker push seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64
|
||||
|
||||
# Build and push GPU (ARM64) - requires wheel in wheels/
|
||||
docker buildx build -f Dockerfile.gpu \
|
||||
--platform linux/arm64 \
|
||||
-t seryus.ddns.net/unir/paddle-ocr-api:gpu-arm64 \
|
||||
--push .
|
||||
```
|
||||
|
||||
### Updating the ARM64 Wheels
|
||||
|
||||
When PaddlePaddle releases a new version:
|
||||
|
||||
1. Update `PADDLE_VERSION` in `Dockerfile.build-paddle` and `Dockerfile.build-paddle-cpu`
|
||||
2. Rebuild both wheels on an ARM64 machine
|
||||
3. Upload to Gitea packages with new version
|
||||
4. Update `PADDLE_VERSION` in `.gitea/workflows/ci.yaml`
|
||||
74
src/paddle_ocr/dataset_manager.py
Normal file
74
src/paddle_ocr/dataset_manager.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Imports
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
self.samples = []
|
||||
|
||||
for folder in sorted(os.listdir(root)):
|
||||
sub = os.path.join(root, folder)
|
||||
img_dir = os.path.join(sub, "img")
|
||||
txt_dir = os.path.join(sub, "txt")
|
||||
|
||||
if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
|
||||
continue
|
||||
|
||||
for fname in sorted(os.listdir(img_dir)):
|
||||
if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
continue
|
||||
|
||||
img_path = os.path.join(img_dir, fname)
|
||||
|
||||
# text file must have same name but .txt
|
||||
txt_name = os.path.splitext(fname)[0] + ".txt"
|
||||
txt_path = os.path.join(txt_dir, txt_name)
|
||||
|
||||
if not os.path.exists(txt_path):
|
||||
continue
|
||||
|
||||
self.samples.append((img_path, txt_path))
|
||||
def __len__(self):
|
||||
return len(self.samples)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img_path, txt_path = self.samples[idx]
|
||||
|
||||
# Load image
|
||||
image = Image.open(img_path).convert("RGB")
|
||||
|
||||
# Load text
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
return image, text
|
||||
|
||||
def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
|
||||
"""Get output path for saving OCR result to debugset folder.
|
||||
|
||||
Args:
|
||||
idx: Sample index
|
||||
output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
|
||||
debugset_root: Root folder for debug output (default: /app/debugset)
|
||||
|
||||
Returns:
|
||||
Path like /app/debugset/doc1/{output_subdir}/page_001.txt
|
||||
"""
|
||||
img_path, _ = self.samples[idx]
|
||||
# img_path: /app/dataset/doc1/img/page_001.png
|
||||
# Extract relative path: doc1/img/page_001.png
|
||||
parts = img_path.split("/dataset/", 1)
|
||||
if len(parts) == 2:
|
||||
rel_path = parts[1] # doc1/img/page_001.png
|
||||
else:
|
||||
rel_path = os.path.basename(img_path)
|
||||
|
||||
# Replace /img/ with /{output_subdir}/
|
||||
rel_parts = rel_path.rsplit("/img/", 1)
|
||||
doc_folder = rel_parts[0] # doc1
|
||||
fname = os.path.splitext(rel_parts[1])[0] + ".txt" # page_001.txt
|
||||
|
||||
out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
return os.path.join(out_dir, fname)
|
||||
26
src/paddle_ocr/docker-compose.cpu-registry.yml
Normal file
26
src/paddle_ocr/docker-compose.cpu-registry.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
# docker-compose.cpu-registry.yml - Pull CPU image from registry
|
||||
# Usage: docker compose -f docker-compose.cpu-registry.yml up
|
||||
|
||||
services:
|
||||
ocr-cpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
|
||||
container_name: paddle-ocr-cpu-registry
|
||||
ports:
|
||||
- "8001:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
39
src/paddle_ocr/docker-compose.gpu-registry.yml
Normal file
39
src/paddle_ocr/docker-compose.gpu-registry.yml
Normal file
@@ -0,0 +1,39 @@
|
||||
# docker-compose.gpu-registry.yml - Pull GPU image from registry
|
||||
# Usage: docker compose -f docker-compose.gpu-registry.yml up
|
||||
#
|
||||
# Requires: NVIDIA GPU + nvidia-container-toolkit installed
|
||||
|
||||
services:
|
||||
ocr-gpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
container_name: paddle-ocr-gpu-registry
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
- ./scripts:/app/scripts:ro
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
140
src/paddle_ocr/docker-compose.workers.yml
Normal file
140
src/paddle_ocr/docker-compose.workers.yml
Normal file
@@ -0,0 +1,140 @@
|
||||
# docker-compose.workers.yml - Multiple PaddleOCR workers for parallel Ray Tune
|
||||
#
|
||||
# Usage:
|
||||
# GPU (4 workers sharing GPU):
|
||||
# docker compose -f docker-compose.workers.yml up
|
||||
#
|
||||
# CPU (4 workers):
|
||||
# docker compose -f docker-compose.workers.yml --profile cpu up
|
||||
#
|
||||
# Scale workers (e.g., 8 workers):
|
||||
# NUM_WORKERS=8 docker compose -f docker-compose.workers.yml up
|
||||
#
|
||||
# Each worker runs on a separate port: 8001, 8002, 8003, 8004, ...
|
||||
|
||||
x-ocr-gpu-common: &ocr-gpu-common
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
x-ocr-cpu-common: &ocr-cpu-common
|
||||
image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
services:
|
||||
# GPU Workers (gpu profile) - share single GPU
|
||||
ocr-worker-1:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-1
|
||||
ports:
|
||||
- "8001:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-2:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-2
|
||||
ports:
|
||||
- "8002:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-3:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-3
|
||||
ports:
|
||||
- "8003:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-4:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-4
|
||||
ports:
|
||||
- "8004:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-5:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-5
|
||||
ports:
|
||||
- "8005:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
# CPU Workers (cpu profile) - for systems without GPU
|
||||
ocr-cpu-worker-1:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-1
|
||||
ports:
|
||||
- "8001:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-2:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-2
|
||||
ports:
|
||||
- "8002:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-3:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-3
|
||||
ports:
|
||||
- "8003:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-4:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-4
|
||||
ports:
|
||||
- "8004:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-5:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-5
|
||||
ports:
|
||||
- "8005:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
111
src/paddle_ocr/docker-compose.yml
Normal file
111
src/paddle_ocr/docker-compose.yml
Normal file
@@ -0,0 +1,111 @@
|
||||
# docker-compose.yml - PaddleOCR REST API
|
||||
# Usage:
|
||||
# CPU: docker compose up ocr-cpu
|
||||
# GPU: docker compose up ocr-gpu
|
||||
# Test: docker compose run --rm test
|
||||
# Build: CUDA_ARCH=120 docker compose --profile build run --rm build-paddle
|
||||
#
|
||||
# Auto-detect CUDA arch before building:
|
||||
# export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.')
|
||||
# docker compose --profile build run --rm build-paddle
|
||||
|
||||
services:
|
||||
# PaddlePaddle GPU wheel builder (ARM64 only, one-time build)
|
||||
# Creates ./wheels/paddlepaddle_gpu-*.whl for ARM64 GPU support
|
||||
# CUDA_ARCH env var controls target GPU architecture (default: 120 for Blackwell base)
|
||||
build-paddle:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.build-paddle
|
||||
args:
|
||||
CUDA_ARCH: ${CUDA_ARCH:-120}
|
||||
volumes:
|
||||
- ./wheels:/wheels
|
||||
profiles:
|
||||
- build
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
|
||||
# CPU-only service (works on any architecture)
|
||||
ocr-cpu:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.cpu
|
||||
args:
|
||||
# Models to bake into image (change before building):
|
||||
DET_MODEL: PP-OCRv5_server_det
|
||||
REC_MODEL: PP-OCRv5_server_rec
|
||||
image: paddle-ocr-api:cpu
|
||||
container_name: paddle-ocr-cpu
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw # Your dataset
|
||||
- paddlex-cache:/root/.paddlex # For additional models at runtime
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
# Override models at runtime (uncomment to use different models):
|
||||
# - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
# - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# GPU service (requires NVIDIA Container Toolkit)
|
||||
ocr-gpu:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.gpu
|
||||
args:
|
||||
DET_MODEL: PP-OCRv5_server_det
|
||||
REC_MODEL: PP-OCRv5_server_rec
|
||||
image: paddle-ocr-api:gpu
|
||||
container_name: paddle-ocr-gpu
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
# Override models at runtime:
|
||||
# - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
# - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
# Test client (runs once and exits)
|
||||
test:
|
||||
image: python:3.11-slim
|
||||
container_name: paddle-ocr-test
|
||||
depends_on:
|
||||
ocr-cpu:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./test.py:/app/test.py:ro
|
||||
working_dir: /app
|
||||
command: >
|
||||
sh -c "pip install -q requests && python test.py --url http://ocr-cpu:8000 --dataset /app/dataset"
|
||||
network_mode: "service:ocr-cpu"
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
340
src/paddle_ocr/paddle_ocr_tuning_rest.py
Normal file
340
src/paddle_ocr/paddle_ocr_tuning_rest.py
Normal file
@@ -0,0 +1,340 @@
|
||||
# paddle_ocr_tuning_rest.py
|
||||
# FastAPI REST service for PaddleOCR hyperparameter evaluation
|
||||
# Usage: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
from typing import Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
from jiwer import wer, cer
|
||||
from dataset_manager import ImageTextDataset
|
||||
|
||||
|
||||
def get_gpu_info() -> dict:
|
||||
"""Get GPU status information from PaddlePaddle."""
|
||||
info = {
|
||||
"cuda_available": paddle.device.is_compiled_with_cuda(),
|
||||
"device": str(paddle.device.get_device()),
|
||||
"gpu_count": 0,
|
||||
"gpu_name": None,
|
||||
"gpu_memory_total": None,
|
||||
"gpu_memory_used": None,
|
||||
}
|
||||
|
||||
if info["cuda_available"]:
|
||||
try:
|
||||
info["gpu_count"] = paddle.device.cuda.device_count()
|
||||
if info["gpu_count"] > 0:
|
||||
# Get GPU properties
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
info["gpu_name"] = props.name
|
||||
info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
|
||||
|
||||
# Get current memory usage
|
||||
mem_reserved = paddle.device.cuda.memory_reserved(0)
|
||||
mem_allocated = paddle.device.cuda.memory_allocated(0)
|
||||
info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
|
||||
info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
|
||||
except Exception as e:
|
||||
info["gpu_error"] = str(e)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
# Model configuration via environment variables (with defaults)
|
||||
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
|
||||
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
|
||||
|
||||
|
||||
# Global state for model and dataset
|
||||
class AppState:
|
||||
ocr: Optional[PaddleOCR] = None
|
||||
dataset: Optional[ImageTextDataset] = None
|
||||
dataset_path: Optional[str] = None
|
||||
det_model: str = DEFAULT_DET_MODEL
|
||||
rec_model: str = DEFAULT_REC_MODEL
|
||||
lock: threading.Lock = None # Protects OCR model from concurrent access
|
||||
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
state = AppState()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load OCR model at startup."""
|
||||
# Log GPU status
|
||||
gpu_info = get_gpu_info()
|
||||
print("=" * 50)
|
||||
print("GPU STATUS")
|
||||
print("=" * 50)
|
||||
print(f" CUDA available: {gpu_info['cuda_available']}")
|
||||
print(f" Device: {gpu_info['device']}")
|
||||
if gpu_info['cuda_available']:
|
||||
print(f" GPU count: {gpu_info['gpu_count']}")
|
||||
print(f" GPU name: {gpu_info['gpu_name']}")
|
||||
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
|
||||
print("=" * 50)
|
||||
|
||||
print(f"Loading PaddleOCR models...")
|
||||
print(f" Detection: {state.det_model}")
|
||||
print(f" Recognition: {state.rec_model}")
|
||||
state.ocr = PaddleOCR(
|
||||
text_detection_model_name=state.det_model,
|
||||
text_recognition_model_name=state.rec_model,
|
||||
)
|
||||
|
||||
# Log GPU memory after model load
|
||||
if gpu_info['cuda_available']:
|
||||
gpu_after = get_gpu_info()
|
||||
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
|
||||
|
||||
print("Model loaded successfully!")
|
||||
yield
|
||||
# Cleanup on shutdown
|
||||
state.ocr = None
|
||||
state.dataset = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="PaddleOCR Tuning API",
|
||||
description="REST API for OCR hyperparameter evaluation",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
class EvaluateRequest(BaseModel):
|
||||
"""Request schema matching CLI arguments."""
|
||||
pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
|
||||
use_doc_orientation_classify: bool = Field(False, description="Use document orientation classification")
|
||||
use_doc_unwarping: bool = Field(False, description="Use document unwarping")
|
||||
textline_orientation: bool = Field(True, description="Use textline orientation classification")
|
||||
text_det_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection pixel threshold")
|
||||
text_det_box_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection box threshold")
|
||||
text_det_unclip_ratio: float = Field(1.5, ge=0.0, description="Text detection expansion coefficient")
|
||||
text_rec_score_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Recognition score threshold")
|
||||
start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
|
||||
end_page: int = Field(10, ge=1, description="End page index (exclusive)")
|
||||
save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
|
||||
|
||||
|
||||
class EvaluateResponse(BaseModel):
|
||||
"""Response schema matching CLI output."""
|
||||
CER: float
|
||||
WER: float
|
||||
TIME: float
|
||||
PAGES: int
|
||||
TIME_PER_PAGE: float
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
model_loaded: bool
|
||||
dataset_loaded: bool
|
||||
dataset_size: Optional[int] = None
|
||||
det_model: Optional[str] = None
|
||||
rec_model: Optional[str] = None
|
||||
# GPU info
|
||||
cuda_available: Optional[bool] = None
|
||||
device: Optional[str] = None
|
||||
gpu_name: Optional[str] = None
|
||||
gpu_memory_used: Optional[str] = None
|
||||
gpu_memory_total: Optional[str] = None
|
||||
|
||||
|
||||
def _normalize_box_xyxy(box):
|
||||
"""Normalize bounding box to (x0, y0, x1, y1) format."""
|
||||
if isinstance(box, (list, tuple)) and box and isinstance(box[0], (list, tuple)):
|
||||
xs = [p[0] for p in box]
|
||||
ys = [p[1] for p in box]
|
||||
return min(xs), min(ys), max(xs), max(ys)
|
||||
|
||||
if isinstance(box, (list, tuple)):
|
||||
if len(box) == 4:
|
||||
x0, y0, x1, y1 = box
|
||||
return min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1)
|
||||
if len(box) == 8:
|
||||
xs = box[0::2]
|
||||
ys = box[1::2]
|
||||
return min(xs), min(ys), max(xs), max(ys)
|
||||
|
||||
raise ValueError(f"Unrecognized box format: {box!r}")
|
||||
|
||||
|
||||
def assemble_from_paddle_result(paddleocr_predict, min_score=0.0, line_tol_factor=0.6):
|
||||
"""
|
||||
Robust line grouping for PaddleOCR outputs.
|
||||
Normalizes boxes, groups by line, and returns assembled text.
|
||||
"""
|
||||
boxes_all = []
|
||||
for item in paddleocr_predict:
|
||||
res = item.json.get("res", {})
|
||||
boxes = res.get("rec_boxes", []) or []
|
||||
texts = res.get("rec_texts", []) or []
|
||||
scores = res.get("rec_scores", None)
|
||||
|
||||
for i, (box, text) in enumerate(zip(boxes, texts)):
|
||||
try:
|
||||
x0, y0, x1, y1 = _normalize_box_xyxy(box)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
y_mid = 0.5 * (y0 + y1)
|
||||
score = float(scores[i]) if (scores is not None and i < len(scores)) else 1.0
|
||||
|
||||
t = re.sub(r"\s+", " ", str(text)).strip()
|
||||
if not t:
|
||||
continue
|
||||
|
||||
boxes_all.append((x0, y0, x1, y1, y_mid, t, score))
|
||||
|
||||
if min_score > 0:
|
||||
boxes_all = [b for b in boxes_all if b[6] >= min_score]
|
||||
|
||||
if not boxes_all:
|
||||
return ""
|
||||
|
||||
# Adaptive line tolerance
|
||||
heights = [b[3] - b[1] for b in boxes_all]
|
||||
median_h = float(np.median(heights)) if heights else 20.0
|
||||
line_tol = max(8.0, line_tol_factor * median_h)
|
||||
|
||||
# Sort by vertical mid, then x0
|
||||
boxes_all.sort(key=lambda b: (b[4], b[0]))
|
||||
|
||||
# Group into lines
|
||||
lines, cur, last_y = [], [], None
|
||||
for x0, y0, x1, y1, y_mid, text, score in boxes_all:
|
||||
if last_y is None or abs(y_mid - last_y) <= line_tol:
|
||||
cur.append((x0, text))
|
||||
else:
|
||||
cur.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur))
|
||||
cur = [(x0, text)]
|
||||
last_y = y_mid
|
||||
|
||||
if cur:
|
||||
cur.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur))
|
||||
|
||||
res = "\n".join(lines)
|
||||
res = re.sub(r"\s+\n", "\n", res).strip()
|
||||
return res
|
||||
|
||||
|
||||
def evaluate_text(reference: str, prediction: str) -> dict:
|
||||
"""Calculate WER and CER metrics."""
|
||||
return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
def health_check():
|
||||
"""Check if the service is ready."""
|
||||
gpu_info = get_gpu_info()
|
||||
return HealthResponse(
|
||||
status="ok" if state.ocr is not None else "initializing",
|
||||
model_loaded=state.ocr is not None,
|
||||
dataset_loaded=state.dataset is not None,
|
||||
dataset_size=len(state.dataset) if state.dataset else None,
|
||||
det_model=state.det_model,
|
||||
rec_model=state.rec_model,
|
||||
cuda_available=gpu_info.get("cuda_available"),
|
||||
device=gpu_info.get("device"),
|
||||
gpu_name=gpu_info.get("gpu_name"),
|
||||
gpu_memory_used=gpu_info.get("gpu_memory_used"),
|
||||
gpu_memory_total=gpu_info.get("gpu_memory_total"),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate", response_model=EvaluateResponse)
|
||||
def evaluate(request: EvaluateRequest):
|
||||
"""
|
||||
Evaluate OCR with given hyperparameters.
|
||||
Returns CER, WER, and timing metrics.
|
||||
"""
|
||||
if state.ocr is None:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
||||
|
||||
# Load or reload dataset if path changed
|
||||
if state.dataset is None or state.dataset_path != request.pdf_folder:
|
||||
if not os.path.isdir(request.pdf_folder):
|
||||
raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
|
||||
state.dataset = ImageTextDataset(request.pdf_folder)
|
||||
state.dataset_path = request.pdf_folder
|
||||
|
||||
if len(state.dataset) == 0:
|
||||
raise HTTPException(status_code=400, detail="Dataset is empty")
|
||||
|
||||
# Validate page range
|
||||
start = request.start_page
|
||||
end = min(request.end_page, len(state.dataset))
|
||||
if start >= end:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
|
||||
|
||||
cer_list, wer_list = [], []
|
||||
time_per_page_list = []
|
||||
t0 = time.time()
|
||||
|
||||
# Lock to prevent concurrent OCR access (model is not thread-safe)
|
||||
with state.lock:
|
||||
for idx in range(start, end):
|
||||
img, ref = state.dataset[idx]
|
||||
arr = np.array(img)
|
||||
|
||||
tp0 = time.time()
|
||||
out = state.ocr.predict(
|
||||
arr,
|
||||
use_doc_orientation_classify=request.use_doc_orientation_classify,
|
||||
use_doc_unwarping=request.use_doc_unwarping,
|
||||
use_textline_orientation=request.textline_orientation,
|
||||
text_det_thresh=request.text_det_thresh,
|
||||
text_det_box_thresh=request.text_det_box_thresh,
|
||||
text_det_unclip_ratio=request.text_det_unclip_ratio,
|
||||
text_rec_score_thresh=request.text_rec_score_thresh,
|
||||
)
|
||||
|
||||
pred = assemble_from_paddle_result(out)
|
||||
time_per_page_list.append(float(time.time() - tp0))
|
||||
|
||||
# Save prediction to debugset if requested
|
||||
if request.save_output:
|
||||
out_path = state.dataset.get_output_path(idx, "paddle_text")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
f.write(pred)
|
||||
|
||||
m = evaluate_text(ref, pred)
|
||||
cer_list.append(m["CER"])
|
||||
wer_list.append(m["WER"])
|
||||
|
||||
return EvaluateResponse(
|
||||
CER=float(np.mean(cer_list)) if cer_list else 1.0,
|
||||
WER=float(np.mean(wer_list)) if wer_list else 1.0,
|
||||
TIME=float(time.time() - t0),
|
||||
PAGES=len(cer_list),
|
||||
TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate_full", response_model=EvaluateResponse)
|
||||
def evaluate_full(request: EvaluateRequest):
|
||||
"""Evaluate on ALL pages (ignores start_page/end_page)."""
|
||||
request.start_page = 0
|
||||
request.end_page = 9999 # Will be clamped to dataset size
|
||||
return evaluate(request)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
22
src/paddle_ocr/requirements-gpu.txt
Normal file
22
src/paddle_ocr/requirements-gpu.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
# PaddleOCR REST API - GPU Requirements
|
||||
# Install: pip install -r requirements-gpu.txt
|
||||
|
||||
# PaddlePaddle (GPU version with CUDA)
|
||||
paddlepaddle-gpu==3.2.0
|
||||
|
||||
# PaddleOCR
|
||||
paddleocr==3.3.2
|
||||
|
||||
# OCR evaluation metrics
|
||||
jiwer
|
||||
|
||||
# Numerical computing
|
||||
numpy
|
||||
|
||||
# REST API framework
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic
|
||||
|
||||
# Image processing
|
||||
Pillow
|
||||
22
src/paddle_ocr/requirements.txt
Normal file
22
src/paddle_ocr/requirements.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
# PaddleOCR REST API - CPU Requirements
|
||||
# Install: pip install -r requirements.txt
|
||||
|
||||
# PaddlePaddle (CPU version)
|
||||
paddlepaddle==3.2.2
|
||||
|
||||
# PaddleOCR
|
||||
paddleocr==3.3.2
|
||||
|
||||
# OCR evaluation metrics
|
||||
jiwer
|
||||
|
||||
# Numerical computing
|
||||
numpy
|
||||
|
||||
# REST API framework
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic
|
||||
|
||||
# Image processing (pulled by paddleocr, but explicit)
|
||||
Pillow
|
||||
199
src/paddle_ocr/scripts/debug_gpu_detection.py
Normal file
199
src/paddle_ocr/scripts/debug_gpu_detection.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script for GPU OCR detection issues.
|
||||
|
||||
This script tests the raw inference output from PaddlePaddle detection models
|
||||
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
|
||||
|
||||
Usage:
|
||||
docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
|
||||
|
||||
Expected behavior:
|
||||
- Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
|
||||
- Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def check_gpu_status():
|
||||
"""Check GPU availability and properties."""
|
||||
print("=" * 60)
|
||||
print("GPU STATUS")
|
||||
print("=" * 60)
|
||||
print(f"Device: {paddle.device.get_device()}")
|
||||
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||||
|
||||
if paddle.device.is_compiled_with_cuda():
|
||||
print(f"GPU count: {paddle.device.cuda.device_count()}")
|
||||
if paddle.device.cuda.device_count() > 0:
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
print(f"GPU name: {props.name}")
|
||||
print(f"Compute capability: {props.major}.{props.minor}")
|
||||
print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
|
||||
print()
|
||||
|
||||
|
||||
def test_basic_ops():
|
||||
"""Test basic GPU tensor operations."""
|
||||
print("=" * 60)
|
||||
print("BASIC GPU OPERATIONS")
|
||||
print("=" * 60)
|
||||
|
||||
# Test tensor creation
|
||||
x = paddle.randn([2, 3])
|
||||
print(f"Tensor place: {x.place}")
|
||||
|
||||
# Test conv2d
|
||||
x = paddle.randn([1, 3, 64, 64])
|
||||
conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
|
||||
y = conv(x)
|
||||
print(f"Conv2d output shape: {y.shape}, place: {y.place}")
|
||||
|
||||
# Test softmax
|
||||
s = paddle.nn.functional.softmax(y, axis=1)
|
||||
print(f"Softmax output shape: {s.shape}")
|
||||
print("Basic operations: OK")
|
||||
print()
|
||||
|
||||
|
||||
def test_detection_model(image_path: str):
|
||||
"""Test detection model raw output."""
|
||||
print("=" * 60)
|
||||
print("DETECTION MODEL TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddle.inference import Config, create_predictor
|
||||
|
||||
model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
|
||||
inference_file = f'{model_dir}/inference.json'
|
||||
params_file = f'{model_dir}/inference.pdiparams'
|
||||
|
||||
if not os.path.exists(inference_file):
|
||||
print(f"Model not found at {model_dir}")
|
||||
print("Run PaddleOCR once to download models first.")
|
||||
return
|
||||
|
||||
# Create config
|
||||
config = Config()
|
||||
config.set_prog_file(inference_file)
|
||||
config.set_params_file(params_file)
|
||||
config.enable_use_gpu(1024, 0)
|
||||
|
||||
print("Creating predictor...")
|
||||
predictor = create_predictor(config)
|
||||
|
||||
# Get input/output names
|
||||
input_names = predictor.get_input_names()
|
||||
output_names = predictor.get_output_names()
|
||||
print(f"Input names: {input_names}")
|
||||
print(f"Output names: {output_names}")
|
||||
|
||||
# Load and preprocess image
|
||||
img = Image.open(image_path)
|
||||
img = img.resize((640, 640))
|
||||
arr = np.array(img).astype('float32')
|
||||
arr = arr / 255.0
|
||||
arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW
|
||||
print(f"Input tensor shape: {arr.shape}")
|
||||
|
||||
# Set input
|
||||
input_handle = predictor.get_input_handle(input_names[0])
|
||||
input_handle.reshape(arr.shape)
|
||||
input_handle.copy_from_cpu(arr)
|
||||
|
||||
# Run prediction
|
||||
print("Running inference...")
|
||||
predictor.run()
|
||||
|
||||
# Get output
|
||||
output_handle = predictor.get_output_handle(output_names[0])
|
||||
output = output_handle.copy_to_cpu()
|
||||
|
||||
print()
|
||||
print("OUTPUT ANALYSIS:")
|
||||
print(f" Shape: {output.shape}")
|
||||
print(f" Min: {output.min():.6f}")
|
||||
print(f" Max: {output.max():.6f}")
|
||||
print(f" Mean: {output.mean():.6f}")
|
||||
print(f" Std: {output.std():.6f}")
|
||||
print(f" Has NaN: {np.isnan(output).any()}")
|
||||
print(f" Has Inf: {np.isinf(output).any()}")
|
||||
|
||||
# Diagnosis
|
||||
print()
|
||||
print("DIAGNOSIS:")
|
||||
if output.min() == output.max():
|
||||
print(" PROBLEM: Output is constant - model inference is broken!")
|
||||
print(" This typically indicates GPU compute capability mismatch.")
|
||||
print(" GB10 (sm_121) may need CUDA 13.0+ for native support.")
|
||||
elif output.max() < 0.01:
|
||||
print(" PROBLEM: Output values too low - detection will find nothing.")
|
||||
elif np.isnan(output).any() or np.isinf(output).any():
|
||||
print(" PROBLEM: Output contains NaN/Inf - numerical instability.")
|
||||
else:
|
||||
print(" OK: Output values look reasonable.")
|
||||
print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
|
||||
|
||||
|
||||
def test_paddleocr_output(image_path: str):
|
||||
"""Test full PaddleOCR pipeline."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PADDLEOCR PIPELINE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name='PP-OCRv4_mobile_det',
|
||||
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||||
)
|
||||
|
||||
img = Image.open(image_path)
|
||||
arr = np.array(img)
|
||||
|
||||
out = ocr.predict(arr)
|
||||
res = out[0].json['res']
|
||||
|
||||
dt_polys = res.get('dt_polys', [])
|
||||
rec_texts = res.get('rec_texts', [])
|
||||
|
||||
print(f"Detection polygons: {len(dt_polys)}")
|
||||
print(f"Recognition texts: {len(rec_texts)}")
|
||||
|
||||
if rec_texts:
|
||||
print(f"Sample texts: {rec_texts[:5]}")
|
||||
else:
|
||||
print("No text detected!")
|
||||
|
||||
|
||||
def main():
|
||||
# Default test image
|
||||
image_path = '/app/dataset/0/img/page_0001.png'
|
||||
if len(sys.argv) > 1:
|
||||
image_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Image not found: {image_path}")
|
||||
print("Usage: python debug_gpu_detection.py [image_path]")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Testing with image: {image_path}")
|
||||
print()
|
||||
|
||||
check_gpu_status()
|
||||
test_basic_ops()
|
||||
test_detection_model(image_path)
|
||||
test_paddleocr_output(image_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
207
src/paddle_ocr/scripts/test_dynamic_mode.py
Normal file
207
src/paddle_ocr/scripts/test_dynamic_mode.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test PaddleOCR in dynamic graph mode (not inference mode).
|
||||
|
||||
Dynamic mode compiles kernels at runtime, which may work on Blackwell.
|
||||
Inference mode uses pre-compiled kernels which fail on sm_121.
|
||||
|
||||
Usage:
|
||||
python test_dynamic_mode.py [image_path]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||||
# Force dynamic graph mode
|
||||
os.environ['FLAGS_enable_pir_api'] = '0'
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def check_gpu():
|
||||
"""Check GPU status."""
|
||||
print("=" * 60)
|
||||
print("GPU STATUS")
|
||||
print("=" * 60)
|
||||
print(f"Device: {paddle.device.get_device()}")
|
||||
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||||
|
||||
if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
print(f"GPU: {props.name} (sm_{props.major}{props.minor})")
|
||||
print(f"Memory: {props.total_memory / (1024**3):.1f} GB")
|
||||
print()
|
||||
|
||||
|
||||
def test_paddleocr_dynamic(image_path: str):
|
||||
"""Test PaddleOCR with dynamic execution."""
|
||||
print("=" * 60)
|
||||
print("PADDLEOCR DYNAMIC MODE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
# Import PaddleOCR
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
# Try to force dynamic mode by setting use_static=False if available
|
||||
# or by using the model in eval mode directly
|
||||
|
||||
print("Creating PaddleOCR instance...")
|
||||
print("(This may download models on first run)")
|
||||
|
||||
try:
|
||||
# Create OCR instance - this might still use inference internally
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name='PP-OCRv4_mobile_det',
|
||||
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||||
use_angle_cls=False, # Simplify
|
||||
lang='es',
|
||||
)
|
||||
|
||||
# Load image
|
||||
img = Image.open(image_path)
|
||||
arr = np.array(img)
|
||||
print(f"Image shape: {arr.shape}")
|
||||
|
||||
# Run prediction
|
||||
print("Running OCR prediction...")
|
||||
result = ocr.predict(arr)
|
||||
|
||||
# Parse results
|
||||
res = result[0].json['res']
|
||||
dt_polys = res.get('dt_polys', [])
|
||||
rec_texts = res.get('rec_texts', [])
|
||||
|
||||
print()
|
||||
print("RESULTS:")
|
||||
print(f" Detected boxes: {len(dt_polys)}")
|
||||
print(f" Recognized texts: {len(rec_texts)}")
|
||||
|
||||
if rec_texts:
|
||||
print(f" First 5 texts: {rec_texts[:5]}")
|
||||
return True
|
||||
else:
|
||||
print(" WARNING: No text recognized!")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_paddle_dynamic_model():
|
||||
"""Test loading a paddle model in dynamic graph mode."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PADDLE DYNAMIC GRAPH TEST")
|
||||
print("=" * 60)
|
||||
|
||||
# Ensure we're in dynamic mode
|
||||
paddle.disable_static()
|
||||
|
||||
# Test a simple model forward pass
|
||||
print("Testing dynamic graph execution...")
|
||||
|
||||
# Create a simple ResNet-like block
|
||||
x = paddle.randn([1, 3, 224, 224])
|
||||
|
||||
# Conv -> BN -> ReLU
|
||||
conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)
|
||||
bn = paddle.nn.BatchNorm2D(64)
|
||||
|
||||
# Forward pass (dynamic mode - compiles at runtime)
|
||||
y = conv(x)
|
||||
y = bn(y)
|
||||
y = paddle.nn.functional.relu(y)
|
||||
|
||||
print(f"Input shape: {x.shape}")
|
||||
print(f"Output shape: {y.shape}")
|
||||
print(f"Output min: {y.min().item():.4f}")
|
||||
print(f"Output max: {y.max().item():.4f}")
|
||||
print(f"Output mean: {y.mean().item():.4f}")
|
||||
|
||||
if y.min() != y.max():
|
||||
print("Dynamic graph mode: WORKING")
|
||||
return True
|
||||
else:
|
||||
print("Dynamic graph mode: BROKEN (constant output)")
|
||||
return False
|
||||
|
||||
|
||||
def test_ppocr_model_direct():
|
||||
"""Try loading PPOCRv4 model directly in dynamic mode."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PPOCR MODEL DIRECT LOAD TEST")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Try to import ppocr modules directly
|
||||
# This bypasses the inference predictor
|
||||
from paddleocr.ppocr.modeling.architectures import build_model
|
||||
from paddleocr.ppocr.postprocess import build_post_process
|
||||
from paddleocr.ppocr.utils.save_load import load_model
|
||||
|
||||
print("Direct model import available")
|
||||
|
||||
# Note: This approach requires model config files
|
||||
# which may or may not be bundled with paddleocr
|
||||
|
||||
except ImportError as e:
|
||||
print(f"Direct model import not available: {e}")
|
||||
print("PaddleOCR may only support inference mode")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
# Default test image
|
||||
image_path = '/app/dataset/0/img/page_0001.png'
|
||||
if len(sys.argv) > 1:
|
||||
image_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Image not found: {image_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Testing with image: {image_path}")
|
||||
print()
|
||||
|
||||
check_gpu()
|
||||
|
||||
# Test 1: Basic dynamic graph
|
||||
dynamic_works = test_paddle_dynamic_model()
|
||||
|
||||
if not dynamic_works:
|
||||
print("\nDynamic graph mode is broken - GPU likely unsupported")
|
||||
sys.exit(1)
|
||||
|
||||
# Test 2: Direct model load
|
||||
test_ppocr_model_direct()
|
||||
|
||||
# Test 3: PaddleOCR pipeline
|
||||
ocr_works = test_paddleocr_dynamic(image_path)
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")
|
||||
print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")
|
||||
|
||||
if dynamic_works and not ocr_works:
|
||||
print()
|
||||
print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")
|
||||
print("This means PaddleOCR internally uses inference predictor")
|
||||
print("which has pre-compiled kernels without Blackwell support.")
|
||||
print()
|
||||
print("Potential solutions:")
|
||||
print("1. Modify PaddleOCR to use dynamic mode")
|
||||
print("2. Use ONNX export + ONNXRuntime")
|
||||
print("3. Wait for PaddlePaddle Blackwell support")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
69
src/paddle_ocr/scripts/upload-wheel.sh
Executable file
69
src/paddle_ocr/scripts/upload-wheel.sh
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
# Upload PaddlePaddle ARM64 wheel to Gitea generic packages
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/upload-wheel.sh [wheel_file] [token]
|
||||
#
|
||||
# Environment variables (alternative to arguments):
|
||||
# GITEA_TOKEN - Gitea API token
|
||||
# WHEEL_FILE - Path to wheel file (default: auto-detect in wheels/)
|
||||
|
||||
set -e
|
||||
|
||||
GITEA_URL="https://seryus.ddns.net"
|
||||
GITEA_ORG="unir"
|
||||
PACKAGE_NAME="paddlepaddle-gpu-arm64"
|
||||
|
||||
# Get wheel file
|
||||
WHEEL_FILE="${1:-${WHEEL_FILE:-$(ls wheels/paddlepaddle*.whl 2>/dev/null | head -1)}}"
|
||||
if [ -z "$WHEEL_FILE" ] || [ ! -f "$WHEEL_FILE" ]; then
|
||||
echo "Error: No wheel file found"
|
||||
echo "Usage: $0 [wheel_file] [token]"
|
||||
echo " or set WHEEL_FILE environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get token
|
||||
TOKEN="${2:-${GITEA_TOKEN}}"
|
||||
if [ -z "$TOKEN" ]; then
|
||||
echo "Error: No token provided"
|
||||
echo "Usage: $0 [wheel_file] [token]"
|
||||
echo " or set GITEA_TOKEN environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract version from wheel filename
|
||||
# Format: paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
FILENAME=$(basename "$WHEEL_FILE")
|
||||
VERSION=$(echo "$FILENAME" | sed -E 's/paddlepaddle[_-]gpu-([0-9.]+)-.*/\1/')
|
||||
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "Error: Could not extract version from filename: $FILENAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Uploading wheel to Gitea packages..."
|
||||
echo " File: $WHEEL_FILE"
|
||||
echo " Package: $PACKAGE_NAME"
|
||||
echo " Version: $VERSION"
|
||||
echo " URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
|
||||
|
||||
# Upload using PUT request
|
||||
HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
|
||||
-X PUT \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@$WHEEL_FILE" \
|
||||
"$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "Success! Wheel uploaded."
|
||||
echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
|
||||
elif [ "$HTTP_CODE" = "409" ]; then
|
||||
echo "Package version already exists (HTTP 409)"
|
||||
echo "To update, delete the existing version first in Gitea UI"
|
||||
else
|
||||
echo "Error: Upload failed with HTTP $HTTP_CODE"
|
||||
cat /tmp/upload_response.txt
|
||||
exit 1
|
||||
fi
|
||||
114
src/paddle_ocr/test.py
Normal file
114
src/paddle_ocr/test.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# test.py - Simple client to test PaddleOCR REST API
|
||||
# Usage: python test.py [--url URL] [--dataset PATH]
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def wait_for_health(url: str, timeout: int = 120) -> bool:
|
||||
"""Wait for API to be ready."""
|
||||
health_url = f"{url}/health"
|
||||
start = time.time()
|
||||
|
||||
print(f"Waiting for API at {health_url}...")
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
resp = requests.get(health_url, timeout=5)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("model_loaded"):
|
||||
print(f"API ready! Model loaded in {time.time() - start:.1f}s")
|
||||
return True
|
||||
print(f" Model loading... ({time.time() - start:.0f}s)")
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(f" Connecting... ({time.time() - start:.0f}s)")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
print("Timeout waiting for API")
|
||||
return False
|
||||
|
||||
|
||||
def test_evaluate(url: str, config: dict) -> dict:
|
||||
"""Run evaluation with given config."""
|
||||
eval_url = f"{url}/evaluate"
|
||||
|
||||
print(f"\nTesting config: {config}")
|
||||
start = time.time()
|
||||
|
||||
resp = requests.post(eval_url, json=config, timeout=600)
|
||||
resp.raise_for_status()
|
||||
|
||||
result = resp.json()
|
||||
elapsed = time.time() - start
|
||||
|
||||
print(f"Results (took {elapsed:.1f}s):")
|
||||
print(f" CER: {result['CER']:.4f} ({result['CER']*100:.2f}%)")
|
||||
print(f" WER: {result['WER']:.4f} ({result['WER']*100:.2f}%)")
|
||||
print(f" Pages: {result['PAGES']}")
|
||||
print(f" Time/page: {result['TIME_PER_PAGE']:.2f}s")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test PaddleOCR REST API")
|
||||
parser.add_argument("--url", default="http://localhost:8001", help="API base URL")
|
||||
parser.add_argument("--dataset", default="/app/dataset", help="Dataset path (inside container)")
|
||||
parser.add_argument("--skip-health", action="store_true", help="Skip health check wait")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Wait for API to be ready
|
||||
if not args.skip_health:
|
||||
if not wait_for_health(args.url):
|
||||
sys.exit(1)
|
||||
|
||||
# Test 1: Baseline config (default PaddleOCR)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 1: Baseline Configuration")
|
||||
print("="*50)
|
||||
baseline = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": False, # Baseline: disabled
|
||||
"text_det_thresh": 0.0,
|
||||
"text_det_box_thresh": 0.0,
|
||||
"text_det_unclip_ratio": 1.5,
|
||||
"text_rec_score_thresh": 0.0,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Test 2: Optimized config (from Ray Tune results)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 2: Optimized Configuration")
|
||||
print("="*50)
|
||||
optimized = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": True, # KEY: enabled
|
||||
"text_det_thresh": 0.4690,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.6350,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
print("SUMMARY")
|
||||
print("="*50)
|
||||
cer_reduction = (1 - optimized["CER"] / baseline["CER"]) * 100 if baseline["CER"] > 0 else 0
|
||||
print(f"Baseline CER: {baseline['CER']*100:.2f}%")
|
||||
print(f"Optimized CER: {optimized['CER']*100:.2f}%")
|
||||
print(f"Improvement: {cer_reduction:.1f}% reduction in errors")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
src/paddle_ocr/wheels/.gitkeep
Normal file
0
src/paddle_ocr/wheels/.gitkeep
Normal file
Reference in New Issue
Block a user