From 78fe3e8c81fd88952109298b57775cc367cd4e22 Mon Sep 17 00:00:00 2001 From: Sergio Jimenez Jimenez Date: Sat, 17 Jan 2026 10:46:36 +0100 Subject: [PATCH] gpu dgx --- src/paddle_ocr/Dockerfile.build-paddle | 141 ++++++++++++++++++ src/paddle_ocr/Dockerfile.gpu | 39 ++++- src/paddle_ocr/README.md | 197 ++++++++++++++++++++----- src/paddle_ocr/docker-compose.yml | 26 +++- src/paddle_ocr/wheels/.gitkeep | 0 5 files changed, 358 insertions(+), 45 deletions(-) create mode 100644 src/paddle_ocr/Dockerfile.build-paddle create mode 100644 src/paddle_ocr/wheels/.gitkeep diff --git a/src/paddle_ocr/Dockerfile.build-paddle b/src/paddle_ocr/Dockerfile.build-paddle new file mode 100644 index 0000000..e5caf69 --- /dev/null +++ b/src/paddle_ocr/Dockerfile.build-paddle @@ -0,0 +1,141 @@ +# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64 +# +# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64. +# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration. +# +# Build time: 2-4 hours depending on hardware +# Output: /output/paddlepaddle_gpu-*.whl +# +# Usage: +# docker compose run build-paddle +# # or +# docker build -f Dockerfile.build-paddle -t paddle-builder . +# docker run -v ./wheels:/output paddle-builder + +FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 + +LABEL maintainer="Sergio Jimenez" +LABEL description="PaddlePaddle GPU wheel builder for ARM64" + +# Build arguments +ARG PADDLE_VERSION=v3.0.0 +ARG PYTHON_VERSION=3.11 + +# Environment setup +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + # Python + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-dev \ + python${PYTHON_VERSION}-venv \ + python3-pip \ + # Build tools + build-essential \ + cmake \ + ninja-build \ + git \ + wget \ + curl \ + pkg-config \ + # Libraries + libssl-dev \ + libffi-dev \ + zlib1g-dev \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + liblzma-dev \ + libncurses5-dev \ + libncursesw5-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libprotobuf-dev \ + protobuf-compiler \ + patchelf \ + # Additional dependencies for Paddle + libopenblas-dev \ + liblapack-dev \ + swig \ + && rm -rf /var/lib/apt/lists/* \ + && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ + && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 + +# Upgrade pip and install Python build dependencies +RUN python -m pip install --upgrade pip setuptools wheel \ + && python -m pip install \ + numpy \ + protobuf \ + pyyaml \ + requests \ + packaging \ + astor \ + decorator \ + paddle-bfloat \ + opt-einsum + +WORKDIR /build + +# Clone PaddlePaddle repository +RUN git clone --depth 1 --branch ${PADDLE_VERSION} \ + https://github.com/PaddlePaddle/Paddle.git + +WORKDIR /build/Paddle + +# Install additional Python requirements for building +RUN pip install -r python/requirements.txt || true + +# Create build directory +RUN mkdir -p build +WORKDIR /build/Paddle/build + +# Configure CMake for ARM64 + CUDA build +# Note: Adjust CUDA_ARCH_NAME based on your GPU architecture +# Common values: Auto, Ampere, Ada, Hopper +RUN cmake .. \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DPY_VERSION=${PYTHON_VERSION} \ + -DWITH_GPU=ON \ + -DWITH_TESTING=OFF \ + -DWITH_DISTRIBUTE=OFF \ + -DWITH_NCCL=OFF \ + -DWITH_MKL=OFF \ + -DWITH_MKLDNN=OFF \ + -DON_INFER=OFF \ + -DWITH_PYTHON=ON \ + -DWITH_AVX=OFF \ + -DCUDA_ARCH_NAME=Auto \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + +# Build PaddlePaddle (this takes 2-4 hours) +RUN ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4 + +# Build the Python wheel +WORKDIR /build/Paddle/build +RUN ninja paddle_python + +# Create output directory and copy wheel +RUN mkdir -p /output + +# The wheel should be in python/dist/ +WORKDIR /build/Paddle + +# Build wheel package +RUN cd python && python setup.py bdist_wheel + +# Copy wheel to output +RUN cp python/dist/*.whl /output/ 2>/dev/null || \ + cp build/python/dist/*.whl /output/ 2>/dev/null || \ + echo "Wheel location may vary, checking build artifacts..." + +# List what was built +RUN ls -la /output/ && \ + echo "=== Build complete ===" && \ + echo "Wheel files:" && \ + find /build -name "*.whl" -type f 2>/dev/null + +# Default command: copy wheel to mounted volume +CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"] diff --git a/src/paddle_ocr/Dockerfile.gpu b/src/paddle_ocr/Dockerfile.gpu index 5c3ca27..df0e4df 100644 --- a/src/paddle_ocr/Dockerfile.gpu +++ b/src/paddle_ocr/Dockerfile.gpu @@ -1,6 +1,15 @@ # Dockerfile.gpu - CUDA-enabled PaddleOCR REST API -# Supports: x86_64 with NVIDIA GPU (CUDA 12.x) -# For DGX Spark (ARM64 + CUDA): build natively on the device +# +# Supports: +# - x86_64: Uses prebuilt paddlepaddle-gpu wheel from PyPI +# - ARM64: Uses locally compiled wheel from ./wheels/ directory +# +# For ARM64, first build the wheel: +# docker compose run build-paddle +# Then build this image: +# docker compose build ocr-gpu +# +# See README.md for detailed ARM64 GPU build instructions. FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 @@ -28,9 +37,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && rm -rf /var/lib/apt/lists/* \ && ln -sf /usr/bin/python3.11 /usr/bin/python -# Install Python dependencies from requirements file +# Copy local wheels directory (may be empty or contain ARM64 wheel) +# The wheels/ directory is created by: docker compose run build-paddle +COPY wheels/ /tmp/wheels/ + +# Install Python dependencies +# Strategy: +# 1. If local paddlepaddle wheel exists (ARM64), install it first +# 2. Then install remaining dependencies (excluding paddlepaddle-gpu from requirements) COPY requirements-gpu.txt . -RUN pip install --no-cache-dir -r requirements-gpu.txt + +# Install paddlepaddle: prefer local wheel, fallback to pip +RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \ + echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \ + pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \ + else \ + echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \ + pip install --no-cache-dir paddlepaddle-gpu==3.0.0; \ + fi + +# Install remaining dependencies (skip paddlepaddle-gpu line) +RUN grep -v "paddlepaddle-gpu" requirements-gpu.txt > /tmp/requirements-no-paddle.txt && \ + pip install --no-cache-dir -r /tmp/requirements-no-paddle.txt + +# Cleanup +RUN rm -rf /tmp/wheels /tmp/requirements-no-paddle.txt # Copy application code COPY paddle_ocr_tuning_rest.py . diff --git a/src/paddle_ocr/README.md b/src/paddle_ocr/README.md index 1012a2b..113298d 100644 --- a/src/paddle_ocr/README.md +++ b/src/paddle_ocr/README.md @@ -66,8 +66,10 @@ docker compose up ocr-cpu | `dataset_manager.py` | Dataset loader | | `test.py` | API test client | | `Dockerfile.cpu` | CPU-only image (multi-arch) | -| `Dockerfile.gpu` | GPU/CUDA image (x86_64) | +| `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) | +| `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 | | `docker-compose.yml` | Service orchestration | +| `wheels/` | Local PaddlePaddle wheels (created by build-paddle) | ## API Endpoints @@ -147,54 +149,172 @@ docker run -d -p 8000:8000 --gpus all \ paddle-ocr-api:gpu ``` -## DGX Spark (ARM64 + CUDA) +## GPU Support Analysis -DGX Spark uses ARM64 (Grace CPU) with NVIDIA Hopper GPU. You have two options: +### Host System Reference (DGX Spark) -### Option 1: Native ARM64 Build (Recommended) +This section documents GPU support findings based on testing on an NVIDIA DGX Spark: -PaddlePaddle has ARM64 support. Build natively: +| Component | Value | +|-----------|-------| +| Architecture | ARM64 (aarch64) | +| CPU | NVIDIA Grace (ARM) | +| GPU | NVIDIA GB10 | +| CUDA Version | 13.0 | +| Driver | 580.95.05 | +| OS | Ubuntu 24.04 LTS | +| Container Toolkit | nvidia-container-toolkit 1.18.1 | +| Docker | 28.5.1 | +| Docker Compose | v2.40.0 | + +### PaddlePaddle GPU Platform Support + +**Critical Finding:** PaddlePaddle-GPU does **NOT** support ARM64/aarch64 architecture. + +| Platform | CPU | GPU | +|----------|-----|-----| +| Linux x86_64 | ✅ | ✅ CUDA 10.2/11.x/12.x | +| Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x | +| macOS x64 | ✅ | ❌ | +| macOS ARM64 (M1/M2) | ✅ | ❌ | +| Linux ARM64 (Jetson/DGX) | ✅ | ❌ No wheels | + +**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available. + +### Why GPU Doesn't Work on ARM64 + +1. **No prebuilt wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist +2. **Not a CUDA issue**: The NVIDIA CUDA base images work fine on ARM64 (`nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04`) +3. **Not a container toolkit issue**: `nvidia-container-toolkit` is installed and functional +4. **PaddlePaddle limitation**: The Paddle team hasn't compiled GPU wheels for ARM64 + +When you run `pip install paddlepaddle-gpu` on ARM64: +``` +ERROR: No matching distribution found for paddlepaddle-gpu +``` + +### Options for ARM64 Systems + +#### Option 1: CPU-Only (Recommended) + +Use `Dockerfile.cpu` which works on ARM64: ```bash -# On DGX Spark or ARM64 machine +# On DGX Spark +docker compose up ocr-cpu + +# Or build directly +docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu . +``` + +**Performance:** CPU inference on ARM64 Grace is surprisingly fast due to high core count. Expect ~2-5 seconds per page. + +#### Option 2: Build PaddlePaddle from Source (Docker-based) + +Use the included Docker builder to compile PaddlePaddle GPU for ARM64: + +```bash +cd src/paddle_ocr + +# Step 1: Build the PaddlePaddle GPU wheel (one-time, 2-4 hours) +docker compose --profile build run --rm build-paddle + +# Verify wheel was created +ls -la wheels/paddlepaddle*.whl + +# Step 2: Build the GPU image (uses local wheel) +docker compose build ocr-gpu + +# Step 3: Run with GPU +docker compose up ocr-gpu + +# Verify GPU is working +docker compose exec ocr-gpu python -c "import paddle; print(paddle.device.is_compiled_with_cuda())" +``` + +**What this does:** +1. `build-paddle` compiles PaddlePaddle from source inside a CUDA container +2. The wheel is saved to `./wheels/` directory +3. `Dockerfile.gpu` detects the local wheel and uses it instead of PyPI + +**Caveats:** +- Build takes 2-4 hours on first run +- Requires ~20GB disk space during build +- Not officially supported by PaddlePaddle team +- May need adjustments for future PaddlePaddle versions + +See: [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327) + +#### Option 3: Alternative OCR Engines + +For ARM64 GPU acceleration, consider alternatives: + +| Engine | ARM64 GPU | Notes | +|--------|-----------|-------| +| **Tesseract** | ❌ CPU-only | Good fallback, widely available | +| **EasyOCR** | ⚠️ Via PyTorch | PyTorch has ARM64 GPU support | +| **TrOCR** | ⚠️ Via Transformers | Hugging Face Transformers + PyTorch | +| **docTR** | ⚠️ Via TensorFlow/PyTorch | Both backends have ARM64 support | + +EasyOCR with PyTorch is a viable alternative: +```bash +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121 +pip install easyocr +``` + +### x86_64 GPU Setup (Working) + +For x86_64 systems with NVIDIA GPU, the GPU Docker works: + +```bash +# Verify GPU is accessible +nvidia-smi + +# Verify Docker GPU access +docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi + +# Build and run GPU version +docker compose up ocr-gpu +``` + +### GPU Docker Compose Configuration + +The `docker-compose.yml` configures GPU access via: + +```yaml +deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] +``` + +This requires Docker Compose v2 and nvidia-container-toolkit. + +## DGX Spark / ARM64 Quick Start + +For ARM64 systems (DGX Spark, Jetson, Graviton), use CPU-only: + +```bash +cd src/paddle_ocr + +# Build ARM64-native CPU image docker build -f Dockerfile.cpu -t paddle-ocr-api:arm64 . -``` -For GPU acceleration on ARM64, you'll need to modify `Dockerfile.gpu` to use ARM-compatible base image: - -```dockerfile -# Change this line in Dockerfile.gpu: -FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 - -# To ARM64-compatible version: -FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 -# (same image works on ARM64 when pulled on ARM machine) -``` - -Then build on the DGX Spark: -```bash -docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu-arm64 . -``` - -### Option 2: x86_64 Emulation via QEMU (Slow) - -You CAN run x86_64 images on ARM via emulation, but it's ~10-20x slower: - -```bash -# On DGX Spark, enable QEMU emulation -docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - -# Run x86_64 image with emulation -docker run --platform linux/amd64 -p 8000:8000 \ +# Run +docker run -d -p 8000:8000 \ -v $(pwd)/../dataset:/app/dataset:ro \ - paddle-ocr-api:cpu + paddle-ocr-api:arm64 + +# Test +curl http://localhost:8000/health ``` -**Not recommended** for production due to severe performance penalty. +### Cross-Compile from x86_64 -### Option 3: Cross-compile from x86_64 - -Build ARM64 images from your x86_64 machine: +Build ARM64 images from an x86_64 machine: ```bash # Setup buildx for multi-arch @@ -209,6 +329,7 @@ docker buildx build -f Dockerfile.cpu \ # Save and transfer to DGX Spark docker save paddle-ocr-api:arm64 | gzip > paddle-ocr-arm64.tar.gz scp paddle-ocr-arm64.tar.gz dgx-spark:~/ + # On DGX Spark: docker load < paddle-ocr-arm64.tar.gz ``` diff --git a/src/paddle_ocr/docker-compose.yml b/src/paddle_ocr/docker-compose.yml index 1bbd6e0..5f27afd 100644 --- a/src/paddle_ocr/docker-compose.yml +++ b/src/paddle_ocr/docker-compose.yml @@ -1,10 +1,30 @@ # docker-compose.yml - PaddleOCR REST API # Usage: -# CPU: docker compose up ocr-cpu -# GPU: docker compose up ocr-gpu -# Test: docker compose run --rm test +# CPU: docker compose up ocr-cpu +# GPU: docker compose up ocr-gpu +# Test: docker compose run --rm test +# Build: docker compose run --rm build-paddle (ARM64 GPU wheel, one-time) services: + # PaddlePaddle GPU wheel builder (ARM64 only, one-time build) + # Creates ./wheels/paddlepaddle_gpu-*.whl for ARM64 GPU support + # Run once: docker compose run --rm build-paddle + build-paddle: + build: + context: . + dockerfile: Dockerfile.build-paddle + volumes: + - ./wheels:/wheels + profiles: + - build + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + # CPU-only service (works on any architecture) ocr-cpu: build: diff --git a/src/paddle_ocr/wheels/.gitkeep b/src/paddle_ocr/wheels/.gitkeep new file mode 100644 index 0000000..e69de29