From b96dc1ed91bd340896239c27a25fe74b9b149c5b Mon Sep 17 00:00:00 2001 From: Sergio Jimenez Jimenez Date: Sat, 17 Jan 2026 17:25:05 +0100 Subject: [PATCH] build multi arch --- .gitea/workflows/ci.yaml | 42 +++++++++++++++++++ src/paddle_ocr/Dockerfile.build-paddle-cpu | 8 +++- src/paddle_ocr/README.md | 27 ++++++------ .../docker-compose.cpu-registry.yml | 2 +- .../docker-compose.gpu-registry.yml | 2 +- 5 files changed, 64 insertions(+), 17 deletions(-) diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml index 12f3712..c3aa000 100644 --- a/.gitea/workflows/ci.yaml +++ b/.gitea/workflows/ci.yaml @@ -137,3 +137,45 @@ jobs: tags: | ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }} ${{ needs.essential.outputs.image_gpu }}:${{ steps.arch.outputs.suffix }} + + # Create multi-arch manifest for CPU image + manifest_cpu: + runs-on: ubuntu-latest + needs: [essential, build_cpu] + steps: + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Create multi-arch manifest (CPU) + run: | + docker buildx imagetools create -t ${{ needs.essential.outputs.image_cpu }}:latest \ + ${{ needs.essential.outputs.image_cpu }}:amd64 \ + ${{ needs.essential.outputs.image_cpu }}:arm64 + docker buildx imagetools create -t ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }} \ + ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}-amd64 \ + ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}-arm64 + + # Create multi-arch manifest for GPU image + manifest_gpu: + runs-on: ubuntu-latest + needs: [essential, build_gpu] + steps: + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Create multi-arch manifest (GPU) + run: | + docker buildx imagetools create -t ${{ needs.essential.outputs.image_gpu }}:latest \ + ${{ needs.essential.outputs.image_gpu }}:amd64 \ + ${{ needs.essential.outputs.image_gpu }}:arm64 + docker buildx imagetools create -t ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }} \ + ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \ + ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-arm64 diff --git a/src/paddle_ocr/Dockerfile.build-paddle-cpu b/src/paddle_ocr/Dockerfile.build-paddle-cpu index 688c465..d95b89a 100644 --- a/src/paddle_ocr/Dockerfile.build-paddle-cpu +++ b/src/paddle_ocr/Dockerfile.build-paddle-cpu @@ -99,17 +99,20 @@ RUN pip install -r python/requirements.txt || true RUN mkdir -p build WORKDIR /build/Paddle/build -# Configure for CPU-only build +# Configure for CPU-only ARM64 build +# WITH_ARM=ON enables ARM NEON optimizations and disables x86-specific code (XBYAK, MKL) RUN cmake .. \ -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DPY_VERSION=${PYTHON_VERSION} \ -DWITH_GPU=OFF \ + -DWITH_ARM=ON \ -DWITH_TESTING=OFF \ -DWITH_DISTRIBUTE=OFF \ -DWITH_NCCL=OFF \ -DWITH_MKL=OFF \ -DWITH_MKLDNN=OFF \ + -DWITH_XBYAK=OFF \ -DON_INFER=OFF \ -DWITH_PYTHON=ON \ -DWITH_AVX=OFF \ @@ -121,8 +124,9 @@ RUN cmake .. \ RUN --mount=type=cache,target=/ccache \ ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3 +# Note: extern_xbyak excluded - it's x86-only and disabled with WITH_ARM=ON RUN --mount=type=cache,target=/ccache \ - ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak + ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo # Build PaddlePaddle RUN --mount=type=cache,target=/ccache \ diff --git a/src/paddle_ocr/README.md b/src/paddle_ocr/README.md index 492d23f..99c3ebf 100644 --- a/src/paddle_ocr/README.md +++ b/src/paddle_ocr/README.md @@ -126,7 +126,7 @@ docker buildx build -f Dockerfile.cpu \ --push . ``` -### GPU Image (x86_64 only) +### GPU Image (x86_64 + ARM64 with local wheel) ```bash docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu . @@ -174,7 +174,7 @@ This section documents GPU support findings based on testing on an NVIDIA DGX Sp ### PaddlePaddle GPU Platform Support -**Critical Finding:** PaddlePaddle-GPU does **NOT** support ARM64/aarch64 architecture. +**Note:** PaddlePaddle-GPU does NOT have prebuilt ARM64 wheels on PyPI, but ARM64 support is available via custom-built wheels. | Platform | CPU | GPU | |----------|-----|-----| @@ -182,21 +182,22 @@ This section documents GPU support findings based on testing on an NVIDIA DGX Sp | Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x | | macOS x64 | ✅ | ❌ | | macOS ARM64 (M1/M2) | ✅ | ❌ | -| Linux ARM64 (Jetson/DGX) | ✅ | ❌ No wheels | +| Linux ARM64 (Jetson/DGX) | ✅ | ✅ Custom wheel required | -**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available. +**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available on PyPI. ARM64 wheels must be built from source or downloaded from Gitea packages. -### Why GPU Doesn't Work on ARM64 +### ARM64 GPU Support -1. **No prebuilt wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist -2. **Not a CUDA issue**: The NVIDIA CUDA base images work fine on ARM64 (`nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04`) -3. **Not a container toolkit issue**: `nvidia-container-toolkit` is installed and functional -4. **PaddlePaddle limitation**: The Paddle team hasn't compiled GPU wheels for ARM64 +ARM64 GPU support is available but requires custom-built wheels: -When you run `pip install paddlepaddle-gpu` on ARM64: -``` -ERROR: No matching distribution found for paddlepaddle-gpu -``` +1. **No prebuilt PyPI wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist on PyPI +2. **Custom wheels work**: This project provides Dockerfiles to build ARM64 GPU wheels from source +3. **CI/CD builds ARM64 GPU images**: Pre-built wheels are available from Gitea packages + +**To use GPU on ARM64:** +- Use the pre-built images from the container registry, or +- Build the wheel locally using `Dockerfile.build-paddle` (see Option 2 below), or +- Download the wheel from Gitea packages: `wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl` ### Options for ARM64 Systems diff --git a/src/paddle_ocr/docker-compose.cpu-registry.yml b/src/paddle_ocr/docker-compose.cpu-registry.yml index a9d67b0..1d9246f 100644 --- a/src/paddle_ocr/docker-compose.cpu-registry.yml +++ b/src/paddle_ocr/docker-compose.cpu-registry.yml @@ -3,7 +3,7 @@ services: ocr-cpu: - image: seryus.ddns.net/unir/paddle-ocr-cpu:arm64 + image: seryus.ddns.net/unir/paddle-ocr-cpu:latest container_name: paddle-ocr-cpu-registry ports: - "8001:8000" diff --git a/src/paddle_ocr/docker-compose.gpu-registry.yml b/src/paddle_ocr/docker-compose.gpu-registry.yml index c1629d9..ed37626 100644 --- a/src/paddle_ocr/docker-compose.gpu-registry.yml +++ b/src/paddle_ocr/docker-compose.gpu-registry.yml @@ -5,7 +5,7 @@ services: ocr-gpu: - image: seryus.ddns.net/unir/paddle-ocr-gpu:arm64 + image: seryus.ddns.net/unir/paddle-ocr-gpu:latest container_name: paddle-ocr-gpu-registry ports: - "8002:8000"