This commit is contained in:
2026-01-17 10:46:36 +01:00
parent 27609a0ed0
commit 78fe3e8c81
5 changed files with 358 additions and 45 deletions

View File

@@ -0,0 +1,141 @@
# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
#
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
#
# Build time: 2-4 hours depending on hardware
# Output: /output/paddlepaddle_gpu-*.whl
#
# Usage:
# docker compose run build-paddle
# # or
# docker build -f Dockerfile.build-paddle -t paddle-builder .
# docker run -v ./wheels:/output paddle-builder
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
LABEL maintainer="Sergio Jimenez"
LABEL description="PaddlePaddle GPU wheel builder for ARM64"
# Build arguments
ARG PADDLE_VERSION=v3.0.0
ARG PYTHON_VERSION=3.11
# Environment setup
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# Python
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python3-pip \
# Build tools
build-essential \
cmake \
ninja-build \
git \
wget \
curl \
pkg-config \
# Libraries
libssl-dev \
libffi-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
liblzma-dev \
libncurses5-dev \
libncursesw5-dev \
libgflags-dev \
libgoogle-glog-dev \
libprotobuf-dev \
protobuf-compiler \
patchelf \
# Additional dependencies for Paddle
libopenblas-dev \
liblapack-dev \
swig \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
# Upgrade pip and install Python build dependencies
RUN python -m pip install --upgrade pip setuptools wheel \
&& python -m pip install \
numpy \
protobuf \
pyyaml \
requests \
packaging \
astor \
decorator \
paddle-bfloat \
opt-einsum
WORKDIR /build
# Clone PaddlePaddle repository
RUN git clone --depth 1 --branch ${PADDLE_VERSION} \
https://github.com/PaddlePaddle/Paddle.git
WORKDIR /build/Paddle
# Install additional Python requirements for building
RUN pip install -r python/requirements.txt || true
# Create build directory
RUN mkdir -p build
WORKDIR /build/Paddle/build
# Configure CMake for ARM64 + CUDA build
# Note: Adjust CUDA_ARCH_NAME based on your GPU architecture
# Common values: Auto, Ampere, Ada, Hopper
RUN cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DPY_VERSION=${PYTHON_VERSION} \
-DWITH_GPU=ON \
-DWITH_TESTING=OFF \
-DWITH_DISTRIBUTE=OFF \
-DWITH_NCCL=OFF \
-DWITH_MKL=OFF \
-DWITH_MKLDNN=OFF \
-DON_INFER=OFF \
-DWITH_PYTHON=ON \
-DWITH_AVX=OFF \
-DCUDA_ARCH_NAME=Auto \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
# Build PaddlePaddle (this takes 2-4 hours)
RUN ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
# Build the Python wheel
WORKDIR /build/Paddle/build
RUN ninja paddle_python
# Create output directory and copy wheel
RUN mkdir -p /output
# The wheel should be in python/dist/
WORKDIR /build/Paddle
# Build wheel package
RUN cd python && python setup.py bdist_wheel
# Copy wheel to output
RUN cp python/dist/*.whl /output/ 2>/dev/null || \
cp build/python/dist/*.whl /output/ 2>/dev/null || \
echo "Wheel location may vary, checking build artifacts..."
# List what was built
RUN ls -la /output/ && \
echo "=== Build complete ===" && \
echo "Wheel files:" && \
find /build -name "*.whl" -type f 2>/dev/null
# Default command: copy wheel to mounted volume
CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]

View File

@@ -1,6 +1,15 @@
# Dockerfile.gpu - CUDA-enabled PaddleOCR REST API
# Supports: x86_64 with NVIDIA GPU (CUDA 12.x)
# For DGX Spark (ARM64 + CUDA): build natively on the device
#
# Supports:
# - x86_64: Uses prebuilt paddlepaddle-gpu wheel from PyPI
# - ARM64: Uses locally compiled wheel from ./wheels/ directory
#
# For ARM64, first build the wheel:
# docker compose run build-paddle
# Then build this image:
# docker compose build ocr-gpu
#
# See README.md for detailed ARM64 GPU build instructions.
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
@@ -28,9 +37,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3.11 /usr/bin/python
# Install Python dependencies from requirements file
# Copy local wheels directory (may be empty or contain ARM64 wheel)
# The wheels/ directory is created by: docker compose run build-paddle
COPY wheels/ /tmp/wheels/
# Install Python dependencies
# Strategy:
# 1. If local paddlepaddle wheel exists (ARM64), install it first
# 2. Then install remaining dependencies (excluding paddlepaddle-gpu from requirements)
COPY requirements-gpu.txt .
RUN pip install --no-cache-dir -r requirements-gpu.txt
# Install paddlepaddle: prefer local wheel, fallback to pip
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
else \
echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \
pip install --no-cache-dir paddlepaddle-gpu==3.0.0; \
fi
# Install remaining dependencies (skip paddlepaddle-gpu line)
RUN grep -v "paddlepaddle-gpu" requirements-gpu.txt > /tmp/requirements-no-paddle.txt && \
pip install --no-cache-dir -r /tmp/requirements-no-paddle.txt
# Cleanup
RUN rm -rf /tmp/wheels /tmp/requirements-no-paddle.txt
# Copy application code
COPY paddle_ocr_tuning_rest.py .

View File

@@ -66,8 +66,10 @@ docker compose up ocr-cpu
| `dataset_manager.py` | Dataset loader |
| `test.py` | API test client |
| `Dockerfile.cpu` | CPU-only image (multi-arch) |
| `Dockerfile.gpu` | GPU/CUDA image (x86_64) |
| `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) |
| `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 |
| `docker-compose.yml` | Service orchestration |
| `wheels/` | Local PaddlePaddle wheels (created by build-paddle) |
## API Endpoints
@@ -147,54 +149,172 @@ docker run -d -p 8000:8000 --gpus all \
paddle-ocr-api:gpu
```
## DGX Spark (ARM64 + CUDA)
## GPU Support Analysis
DGX Spark uses ARM64 (Grace CPU) with NVIDIA Hopper GPU. You have two options:
### Host System Reference (DGX Spark)
### Option 1: Native ARM64 Build (Recommended)
This section documents GPU support findings based on testing on an NVIDIA DGX Spark:
PaddlePaddle has ARM64 support. Build natively:
| Component | Value |
|-----------|-------|
| Architecture | ARM64 (aarch64) |
| CPU | NVIDIA Grace (ARM) |
| GPU | NVIDIA GB10 |
| CUDA Version | 13.0 |
| Driver | 580.95.05 |
| OS | Ubuntu 24.04 LTS |
| Container Toolkit | nvidia-container-toolkit 1.18.1 |
| Docker | 28.5.1 |
| Docker Compose | v2.40.0 |
### PaddlePaddle GPU Platform Support
**Critical Finding:** PaddlePaddle-GPU does **NOT** support ARM64/aarch64 architecture.
| Platform | CPU | GPU |
|----------|-----|-----|
| Linux x86_64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
| Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
| macOS x64 | ✅ | ❌ |
| macOS ARM64 (M1/M2) | ✅ | ❌ |
| Linux ARM64 (Jetson/DGX) | ✅ | ❌ No wheels |
**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available.
### Why GPU Doesn't Work on ARM64
1. **No prebuilt wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist
2. **Not a CUDA issue**: The NVIDIA CUDA base images work fine on ARM64 (`nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04`)
3. **Not a container toolkit issue**: `nvidia-container-toolkit` is installed and functional
4. **PaddlePaddle limitation**: The Paddle team hasn't compiled GPU wheels for ARM64
When you run `pip install paddlepaddle-gpu` on ARM64:
```
ERROR: No matching distribution found for paddlepaddle-gpu
```
### Options for ARM64 Systems
#### Option 1: CPU-Only (Recommended)
Use `Dockerfile.cpu` which works on ARM64:
```bash
# On DGX Spark or ARM64 machine
# On DGX Spark
docker compose up ocr-cpu
# Or build directly
docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
```
**Performance:** CPU inference on ARM64 Grace is surprisingly fast due to high core count. Expect ~2-5 seconds per page.
#### Option 2: Build PaddlePaddle from Source (Docker-based)
Use the included Docker builder to compile PaddlePaddle GPU for ARM64:
```bash
cd src/paddle_ocr
# Step 1: Build the PaddlePaddle GPU wheel (one-time, 2-4 hours)
docker compose --profile build run --rm build-paddle
# Verify wheel was created
ls -la wheels/paddlepaddle*.whl
# Step 2: Build the GPU image (uses local wheel)
docker compose build ocr-gpu
# Step 3: Run with GPU
docker compose up ocr-gpu
# Verify GPU is working
docker compose exec ocr-gpu python -c "import paddle; print(paddle.device.is_compiled_with_cuda())"
```
**What this does:**
1. `build-paddle` compiles PaddlePaddle from source inside a CUDA container
2. The wheel is saved to `./wheels/` directory
3. `Dockerfile.gpu` detects the local wheel and uses it instead of PyPI
**Caveats:**
- Build takes 2-4 hours on first run
- Requires ~20GB disk space during build
- Not officially supported by PaddlePaddle team
- May need adjustments for future PaddlePaddle versions
See: [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327)
#### Option 3: Alternative OCR Engines
For ARM64 GPU acceleration, consider alternatives:
| Engine | ARM64 GPU | Notes |
|--------|-----------|-------|
| **Tesseract** | ❌ CPU-only | Good fallback, widely available |
| **EasyOCR** | ⚠️ Via PyTorch | PyTorch has ARM64 GPU support |
| **TrOCR** | ⚠️ Via Transformers | Hugging Face Transformers + PyTorch |
| **docTR** | ⚠️ Via TensorFlow/PyTorch | Both backends have ARM64 support |
EasyOCR with PyTorch is a viable alternative:
```bash
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install easyocr
```
### x86_64 GPU Setup (Working)
For x86_64 systems with NVIDIA GPU, the GPU Docker works:
```bash
# Verify GPU is accessible
nvidia-smi
# Verify Docker GPU access
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
# Build and run GPU version
docker compose up ocr-gpu
```
### GPU Docker Compose Configuration
The `docker-compose.yml` configures GPU access via:
```yaml
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
```
This requires Docker Compose v2 and nvidia-container-toolkit.
## DGX Spark / ARM64 Quick Start
For ARM64 systems (DGX Spark, Jetson, Graviton), use CPU-only:
```bash
cd src/paddle_ocr
# Build ARM64-native CPU image
docker build -f Dockerfile.cpu -t paddle-ocr-api:arm64 .
```
For GPU acceleration on ARM64, you'll need to modify `Dockerfile.gpu` to use ARM-compatible base image:
```dockerfile
# Change this line in Dockerfile.gpu:
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
# To ARM64-compatible version:
FROM nvcr.io/nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
# (same image works on ARM64 when pulled on ARM machine)
```
Then build on the DGX Spark:
```bash
docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu-arm64 .
```
### Option 2: x86_64 Emulation via QEMU (Slow)
You CAN run x86_64 images on ARM via emulation, but it's ~10-20x slower:
```bash
# On DGX Spark, enable QEMU emulation
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# Run x86_64 image with emulation
docker run --platform linux/amd64 -p 8000:8000 \
# Run
docker run -d -p 8000:8000 \
-v $(pwd)/../dataset:/app/dataset:ro \
paddle-ocr-api:cpu
paddle-ocr-api:arm64
# Test
curl http://localhost:8000/health
```
**Not recommended** for production due to severe performance penalty.
### Cross-Compile from x86_64
### Option 3: Cross-compile from x86_64
Build ARM64 images from your x86_64 machine:
Build ARM64 images from an x86_64 machine:
```bash
# Setup buildx for multi-arch
@@ -209,6 +329,7 @@ docker buildx build -f Dockerfile.cpu \
# Save and transfer to DGX Spark
docker save paddle-ocr-api:arm64 | gzip > paddle-ocr-arm64.tar.gz
scp paddle-ocr-arm64.tar.gz dgx-spark:~/
# On DGX Spark:
docker load < paddle-ocr-arm64.tar.gz
```

View File

@@ -3,8 +3,28 @@
# CPU: docker compose up ocr-cpu
# GPU: docker compose up ocr-gpu
# Test: docker compose run --rm test
# Build: docker compose run --rm build-paddle (ARM64 GPU wheel, one-time)
services:
# PaddlePaddle GPU wheel builder (ARM64 only, one-time build)
# Creates ./wheels/paddlepaddle_gpu-*.whl for ARM64 GPU support
# Run once: docker compose run --rm build-paddle
build-paddle:
build:
context: .
dockerfile: Dockerfile.build-paddle
volumes:
- ./wheels:/wheels
profiles:
- build
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# CPU-only service (works on any architecture)
ocr-cpu:
build:

View File