2026-01-19 17:35:25 +00:00
5 changed files with 64 additions and 17 deletions
--- a/.gitea/workflows/ci.yaml
+++ b/.gitea/workflows/ci.yaml
@@ -137,3 +137,45 @@ jobs:
          tags: |
            ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }}
            ${{ needs.essential.outputs.image_gpu }}:${{ steps.arch.outputs.suffix }}
+
+  # Create multi-arch manifest for CPU image
+  manifest_cpu:
+    runs-on: ubuntu-latest
+    needs: [essential, build_cpu]
+    steps:
+      - name: Login to Gitea Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ needs.essential.outputs.repo }}
+          username: username
+          password: ${{ secrets.CI_READWRITE }}
+
+      - name: Create multi-arch manifest (CPU)
+        run: |
+          docker buildx imagetools create -t ${{ needs.essential.outputs.image_cpu }}:latest \
+            ${{ needs.essential.outputs.image_cpu }}:amd64 \
+            ${{ needs.essential.outputs.image_cpu }}:arm64
+          docker buildx imagetools create -t ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }} \
+            ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}-amd64 \
+            ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}-arm64
+
+  # Create multi-arch manifest for GPU image
+  manifest_gpu:
+    runs-on: ubuntu-latest
+    needs: [essential, build_gpu]
+    steps:
+      - name: Login to Gitea Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ needs.essential.outputs.repo }}
+          username: username
+          password: ${{ secrets.CI_READWRITE }}
+
+      - name: Create multi-arch manifest (GPU)
+        run: |
+          docker buildx imagetools create -t ${{ needs.essential.outputs.image_gpu }}:latest \
+            ${{ needs.essential.outputs.image_gpu }}:amd64 \
+            ${{ needs.essential.outputs.image_gpu }}:arm64
+          docker buildx imagetools create -t ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }} \
+            ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \
+            ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-arm64
--- a/src/paddle_ocr/Dockerfile.build-paddle-cpu
+++ b/src/paddle_ocr/Dockerfile.build-paddle-cpu
@@ -99,17 +99,20 @@ RUN pip install -r python/requirements.txt || true
 RUN mkdir -p build
 WORKDIR /build/Paddle/build

-# Configure for CPU-only build
+# Configure for CPU-only ARM64 build
+# WITH_ARM=ON enables ARM NEON optimizations and disables x86-specific code (XBYAK, MKL)
 RUN cmake .. \
    -GNinja \
    -DCMAKE_BUILD_TYPE=Release \
    -DPY_VERSION=${PYTHON_VERSION} \
    -DWITH_GPU=OFF \
+    -DWITH_ARM=ON \
    -DWITH_TESTING=OFF \
    -DWITH_DISTRIBUTE=OFF \
    -DWITH_NCCL=OFF \
    -DWITH_MKL=OFF \
    -DWITH_MKLDNN=OFF \
+    -DWITH_XBYAK=OFF \
    -DON_INFER=OFF \
    -DWITH_PYTHON=ON \
    -DWITH_AVX=OFF \
@@ -121,8 +124,9 @@ RUN cmake .. \
 RUN --mount=type=cache,target=/ccache \
    ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3

+# Note: extern_xbyak excluded - it's x86-only and disabled with WITH_ARM=ON
 RUN --mount=type=cache,target=/ccache \
-    ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
+    ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo

 # Build PaddlePaddle
 RUN --mount=type=cache,target=/ccache \
--- a/src/paddle_ocr/README.md
+++ b/src/paddle_ocr/README.md
@@ -126,7 +126,7 @@ docker buildx build -f Dockerfile.cpu \
  --push .
 ```

-### GPU Image (x86_64 only)
+### GPU Image (x86_64 + ARM64 with local wheel)

 ```bash
 docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu .
@@ -174,7 +174,7 @@ This section documents GPU support findings based on testing on an NVIDIA DGX Sp

 ### PaddlePaddle GPU Platform Support

-**Critical Finding:** PaddlePaddle-GPU does **NOT** support ARM64/aarch64 architecture.
+**Note:** PaddlePaddle-GPU does NOT have prebuilt ARM64 wheels on PyPI, but ARM64 support is available via custom-built wheels.

 | Platform | CPU | GPU |
 |----------|-----|-----|
@@ -182,21 +182,22 @@ This section documents GPU support findings based on testing on an NVIDIA DGX Sp
 | Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
 | macOS x64 | ✅ | ❌ |
 | macOS ARM64 (M1/M2) | ✅ | ❌ |
-| Linux ARM64 (Jetson/DGX) | ✅ | ❌ No wheels |
+| Linux ARM64 (Jetson/DGX) | ✅ | ✅ Custom wheel required |

-**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available.
+**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available on PyPI. ARM64 wheels must be built from source or downloaded from Gitea packages.

-### Why GPU Doesn't Work on ARM64
+### ARM64 GPU Support

-1. **No prebuilt wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist
-2. **Not a CUDA issue**: The NVIDIA CUDA base images work fine on ARM64 (`nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04`)
-3. **Not a container toolkit issue**: `nvidia-container-toolkit` is installed and functional
-4. **PaddlePaddle limitation**: The Paddle team hasn't compiled GPU wheels for ARM64
+ARM64 GPU support is available but requires custom-built wheels:

-When you run `pip install paddlepaddle-gpu` on ARM64:
-```
-ERROR: No matching distribution found for paddlepaddle-gpu
-```
+1. **No prebuilt PyPI wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist on PyPI
+2. **Custom wheels work**: This project provides Dockerfiles to build ARM64 GPU wheels from source
+3. **CI/CD builds ARM64 GPU images**: Pre-built wheels are available from Gitea packages
+
+**To use GPU on ARM64:**
+- Use the pre-built images from the container registry, or
+- Build the wheel locally using `Dockerfile.build-paddle` (see Option 2 below), or
+- Download the wheel from Gitea packages: `wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl`

 ### Options for ARM64 Systems

--- a/src/paddle_ocr/docker-compose.cpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.cpu-registry.yml
@@ -3,7 +3,7 @@

 services:
  ocr-cpu:
-    image: seryus.ddns.net/unir/paddle-ocr-cpu:arm64
+    image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
    container_name: paddle-ocr-cpu-registry
    ports:
      - "8001:8000"
--- a/src/paddle_ocr/docker-compose.gpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.gpu-registry.yml
@@ -5,7 +5,7 @@

 services:
  ocr-gpu:
-    image: seryus.ddns.net/unir/paddle-ocr-gpu:arm64
+    image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
    container_name: paddle-ocr-gpu-registry
    ports:
      - "8002:8000"