2026-01-19 17:35:25 +00:00
9 changed files with 525 additions and 22 deletions
--- a/.gitea/workflows/ci.yaml
+++ b/.gitea/workflows/ci.yaml
@@ -9,6 +9,11 @@ on:
  push:
    branches:
      - main
      - gpu_support
 env:
  PADDLE_VERSION: "3.0.0"
  WHEEL_BASE_URL: "https://seryus.ddns.net/api/packages/unir/generic"
 jobs:
  essential:
@@ -25,7 +30,7 @@ jobs:
          echo "Version: 1.0.${{ gitea.run_number }}" >> $GITHUB_STEP_SUMMARY
          echo "Event: ${{ gitea.event_name }}" >> $GITHUB_STEP_SUMMARY
-  # CPU image: Matrix build for amd64 and arm64 (each pushes as soon as done)
+  # CPU image: Matrix build for amd64 and arm64
  build_cpu:
    runs-on: ubuntu-latest
    needs: essential
@@ -60,6 +65,14 @@ jobs:
            echo "suffix=arm64" >> $GITHUB_OUTPUT
          fi
      - name: Download ARM64 wheel from Gitea packages
        if: matrix.platform == 'linux/arm64'
        run: |
          mkdir -p src/paddle_ocr/wheels
          curl -L -o src/paddle_ocr/wheels/paddlepaddle-${{ env.PADDLE_VERSION }}-cp311-cp311-linux_aarch64.whl \
            "${{ env.WHEEL_BASE_URL }}/paddlepaddle-cpu-arm64/${{ env.PADDLE_VERSION }}/paddlepaddle-${{ env.PADDLE_VERSION }}-cp311-cp311-linux_aarch64.whl"
          ls -la src/paddle_ocr/wheels/
      - name: Build and push CPU image (${{ matrix.platform }})
        uses: docker/build-push-action@v5
        with:
@@ -71,29 +84,56 @@ jobs:
            ${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }}
            ${{ needs.essential.outputs.image_cpu }}:${{ steps.arch.outputs.suffix }}
-  # GPU image: x86_64 only (PaddlePaddle GPU doesn't support ARM64)
+  # GPU image: Matrix build for amd64 and arm64
  build_gpu:
    runs-on: ubuntu-latest
    needs: essential
    strategy:
      matrix:
        platform:
          - linux/amd64
          - linux/arm64
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Login to Gitea Registry
-        run: |
+        uses: docker/login-action@v3
-          echo ${{ secrets.CI_READWRITE }} | docker login \
+        with:
-            -u username \
+          registry: ${{ needs.essential.outputs.repo }}
-            --password-stdin ${{ needs.essential.outputs.repo }}
+          username: username
          password: ${{ secrets.CI_READWRITE }}
-      - name: Build GPU image (x86_64)
+      - name: Get arch suffix
        id: arch
        run: |
-          docker build \
+          if [ "${{ matrix.platform }}" = "linux/amd64" ]; then
-            -f src/paddle_ocr/Dockerfile.gpu \
+            echo "suffix=amd64" >> $GITHUB_OUTPUT
-            -t ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }} \
+          else
-            -t ${{ needs.essential.outputs.image_gpu }}:latest \
+            echo "suffix=arm64" >> $GITHUB_OUTPUT
-            src/paddle_ocr/
+          fi
-      - name: Push GPU image
+      - name: Download ARM64 GPU wheel from Gitea packages
        if: matrix.platform == 'linux/arm64'
        run: |
-          docker push ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}
+          mkdir -p src/paddle_ocr/wheels
-          docker push ${{ needs.essential.outputs.image_gpu }}:latest
+          curl -L -o src/paddle_ocr/wheels/paddlepaddle_gpu-${{ env.PADDLE_VERSION }}-cp311-cp311-linux_aarch64.whl \
            "${{ env.WHEEL_BASE_URL }}/paddlepaddle-gpu-arm64/${{ env.PADDLE_VERSION }}/paddlepaddle_gpu-${{ env.PADDLE_VERSION }}-cp311-cp311-linux_aarch64.whl"
          ls -la src/paddle_ocr/wheels/
      - name: Build and push GPU image (${{ matrix.platform }})
        uses: docker/build-push-action@v5
        with:
          context: src/paddle_ocr
          file: src/paddle_ocr/Dockerfile.gpu
          platforms: ${{ matrix.platform }}
          push: true
          tags: |
            ${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }}
            ${{ needs.essential.outputs.image_gpu }}:${{ steps.arch.outputs.suffix }}
--- a/src/paddle_ocr/Dockerfile.build-paddle
+++ b/src/paddle_ocr/Dockerfile.build-paddle
@@ -91,6 +91,43 @@ RUN sed -i 's/-m64//g' cmake/flags.cmake && \
    find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
    echo "Patched -m64 flag for ARM64 compatibility"
 # Patch for ARM64: Install sse2neon to translate x86 SSE intrinsics to ARM NEON
 # sse2neon provides drop-in replacements for x86 SIMD headers
 RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
    mkdir -p /usr/local/include/sse2neon && \
    cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
    rm -rf /tmp/sse2neon && \
    echo "Installed sse2neon for x86->ARM NEON translation"
 # Create wrapper headers that use sse2neon for ARM64
 RUN mkdir -p /usr/local/include/x86_stubs && \
    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/immintrin.h && \
    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
    echo "#else" >> /usr/local/include/x86_stubs/immintrin.h && \
    echo "#include_next <immintrin.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
    echo "#endif" >> /usr/local/include/x86_stubs/immintrin.h && \
    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/xmmintrin.h && \
    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
    echo "#else" >> /usr/local/include/x86_stubs/xmmintrin.h && \
    echo "#include_next <xmmintrin.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
    echo "#endif" >> /usr/local/include/x86_stubs/xmmintrin.h && \
    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/emmintrin.h && \
    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
    echo "#else" >> /usr/local/include/x86_stubs/emmintrin.h && \
    echo "#include_next <emmintrin.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
    echo "#endif" >> /usr/local/include/x86_stubs/emmintrin.h && \
    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/pmmintrin.h && \
    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
    echo "#else" >> /usr/local/include/x86_stubs/pmmintrin.h && \
    echo "#include_next <pmmintrin.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
    echo "#endif" >> /usr/local/include/x86_stubs/pmmintrin.h && \
    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/smmintrin.h && \
    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
    echo "#else" >> /usr/local/include/x86_stubs/smmintrin.h && \
    echo "#include_next <smmintrin.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
    echo "#endif" >> /usr/local/include/x86_stubs/smmintrin.h && \
    echo "Created x86 intrinsic wrapper headers for ARM64 using sse2neon"
 # Install additional Python requirements for building
 RUN pip install -r python/requirements.txt || true
@@ -99,6 +136,7 @@ RUN mkdir -p build
 WORKDIR /build/Paddle/build
 # Configure CMake for ARM64 + CUDA build
 # Note: -Wno-class-memaccess fixes Eigen NEON warning on ARM64
 RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
    cmake .. \
    -GNinja \
@@ -118,6 +156,7 @@ RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
    -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
    -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
    -DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs" \
    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
 # Build external dependencies first (cacheable layer)
@@ -142,14 +181,28 @@ RUN ninja paddle_python || true
 # Create output directory
 RUN mkdir -p /output
-# Build wheel package
+# Build wheel package - try multiple methods since PaddlePaddle build structure varies
 WORKDIR /build/Paddle
-RUN cd python && python setup.py bdist_wheel || pip wheel . -w dist/
+RUN echo "=== Looking for wheel build method ===" && \
    ls -la python/ 2>/dev/null && \
    ls -la build/python/ 2>/dev/null && \
    if [ -f build/python/setup.py ]; then \
        echo "Using build/python/setup.py" && \
        cd build/python && python setup.py bdist_wheel; \
    elif [ -f python/setup.py ]; then \
        echo "Using python/setup.py" && \
        cd python && python setup.py bdist_wheel; \
    else \
        echo "Looking for existing wheel..." && \
        find /build -name "paddlepaddle*.whl" -type f 2>/dev/null; \
    fi
 # Copy wheel to output
-RUN cp python/dist/*.whl /output/ 2>/dev/null || \
+RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
-    cp build/python/dist/*.whl /output/ 2>/dev/null || \
+    ls -la /output/ && \
-    find /build -name "paddlepaddle*.whl" -exec cp {} /output/ \;
+    if [ ! "$(ls -A /output/*.whl 2>/dev/null)" ]; then \
        echo "ERROR: No wheel found!" && exit 1; \
    fi
 # List what was built
 RUN ls -la /output/ && \
--- a/src/paddle_ocr/Dockerfile.build-paddle-cpu
+++ b/src/paddle_ocr/Dockerfile.build-paddle-cpu
@@ -0,0 +1,145 @@
 # Dockerfile.build-paddle-cpu - Build PaddlePaddle CPU wheel for ARM64
 #
 # Required because PyPI wheels don't work on ARM64 (x86 SSE instructions).
 #
 # Build time: ~1-2 hours
 # Output: /output/paddlepaddle-*.whl
 #
 # Usage:
 #   docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
 #   docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
 # syntax=docker/dockerfile:1.4
 FROM ubuntu:22.04
 LABEL maintainer="Sergio Jimenez"
 LABEL description="PaddlePaddle CPU wheel builder for ARM64"
 ARG PADDLE_VERSION=v3.0.0
 ARG PYTHON_VERSION=3.11
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV CCACHE_DIR=/ccache
 ENV PATH="/usr/lib/ccache:${PATH}"
 # Install build dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    python${PYTHON_VERSION} \
    python${PYTHON_VERSION}-dev \
    python${PYTHON_VERSION}-venv \
    python3-pip \
    build-essential \
    cmake \
    ninja-build \
    git \
    wget \
    curl \
    pkg-config \
    ccache \
    libssl-dev \
    libffi-dev \
    zlib1g-dev \
    libbz2-dev \
    libreadline-dev \
    libsqlite3-dev \
    liblzma-dev \
    libncurses5-dev \
    libncursesw5-dev \
    libgflags-dev \
    libgoogle-glog-dev \
    libprotobuf-dev \
    protobuf-compiler \
    patchelf \
    libopenblas-dev \
    liblapack-dev \
    swig \
    && rm -rf /var/lib/apt/lists/* \
    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
 # Setup ccache
 RUN mkdir -p /usr/lib/ccache && \
    ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
    ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
    ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
    ln -sf /usr/bin/ccache /usr/lib/ccache/c++
 RUN python -m pip install --upgrade pip setuptools wheel && \
    python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
 WORKDIR /build
 RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
 WORKDIR /build/Paddle
 # Patch -m64 flag (x86_64 specific)
 RUN sed -i 's/-m64//g' cmake/flags.cmake && \
    sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
    find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true
 # Install sse2neon for x86 SSE -> ARM NEON translation
 RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
    mkdir -p /usr/local/include/sse2neon && \
    cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
    rm -rf /tmp/sse2neon
 # Create x86 intrinsic wrapper headers
 RUN mkdir -p /usr/local/include/x86_stubs && \
    for h in immintrin xmmintrin emmintrin pmmintrin smmintrin; do \
        echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/${h}.h && \
        echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/${h}.h && \
        echo "#else" >> /usr/local/include/x86_stubs/${h}.h && \
        echo "#include_next <${h}.h>" >> /usr/local/include/x86_stubs/${h}.h && \
        echo "#endif" >> /usr/local/include/x86_stubs/${h}.h; \
    done
 RUN pip install -r python/requirements.txt || true
 RUN mkdir -p build
 WORKDIR /build/Paddle/build
 # Configure for CPU-only build
 RUN cmake .. \
    -GNinja \
    -DCMAKE_BUILD_TYPE=Release \
    -DPY_VERSION=${PYTHON_VERSION} \
    -DWITH_GPU=OFF \
    -DWITH_TESTING=OFF \
    -DWITH_DISTRIBUTE=OFF \
    -DWITH_NCCL=OFF \
    -DWITH_MKL=OFF \
    -DWITH_MKLDNN=OFF \
    -DON_INFER=OFF \
    -DWITH_PYTHON=ON \
    -DWITH_AVX=OFF \
    -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
    -DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs"
 # Build external dependencies
 RUN --mount=type=cache,target=/ccache \
    ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
 RUN --mount=type=cache,target=/ccache \
    ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
 # Build PaddlePaddle
 RUN --mount=type=cache,target=/ccache \
    ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
 RUN ninja paddle_python || true
 RUN mkdir -p /output
 WORKDIR /build/Paddle
 RUN if [ -f build/python/setup.py ]; then \
        cd build/python && python setup.py bdist_wheel; \
    elif [ -f python/setup.py ]; then \
        cd python && python setup.py bdist_wheel; \
    fi
 RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
    ls -la /output/
 CMD ["sh", "-c", "cp /output/*.whl /wheels/ && ls -la /wheels/"]
--- a/src/paddle_ocr/Dockerfile.cpu
+++ b/src/paddle_ocr/Dockerfile.cpu
@@ -29,7 +29,20 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*
-# Install Python dependencies
+# Copy local wheels directory (may contain ARM64 wheel from build-paddle-cpu)
 COPY wheels/ /tmp/wheels/
 # Install paddlepaddle: prefer local wheel (ARM64), fallback to PyPI (x86_64)
 RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
        echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
        pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
    else \
        echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \
        pip install --no-cache-dir paddlepaddle==3.0.0; \
    fi && \
    rm -rf /tmp/wheels
 # Install remaining Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -38,6 +38,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    && rm -rf /var/lib/apt/lists/* \
    && ln -sf /usr/bin/python3.11 /usr/bin/python
 # Fix cuDNN library path for ARM64 only (PaddlePaddle looks in /usr/local/cuda/lib64)
 # x86_64 doesn't need this - PyPI wheel handles paths correctly
 RUN if [ "$(uname -m)" = "aarch64" ]; then \
        mkdir -p /usr/local/cuda/lib64 && \
        ln -sf /usr/lib/aarch64-linux-gnu/libcudnn*.so* /usr/local/cuda/lib64/ && \
        ln -sf /usr/lib/aarch64-linux-gnu/libcudnn.so.9 /usr/local/cuda/lib64/libcudnn.so && \
        ldconfig; \
    fi
 # Copy local wheels directory (may contain ARM64 wheel from build-paddle)
 COPY wheels/ /tmp/wheels/
--- a/src/paddle_ocr/README.md
+++ b/src/paddle_ocr/README.md
@@ -65,10 +65,13 @@ docker compose up ocr-cpu
 | `paddle_ocr_tuning_rest.py` | FastAPI REST service |
 | `dataset_manager.py` | Dataset loader |
 | `test.py` | API test client |
-| `Dockerfile.cpu` | CPU-only image (multi-arch) |
+| `Dockerfile.cpu` | CPU-only image (x86_64 + ARM64 with local wheel) |
 | `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) |
 | `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 |
 | `Dockerfile.build-paddle-cpu` | PaddlePaddle CPU wheel builder for ARM64 |
 | `docker-compose.yml` | Service orchestration |
 | `docker-compose.cpu-registry.yml` | Pull CPU image from registry |
 | `docker-compose.gpu-registry.yml` | Pull GPU image from registry |
 | `wheels/` | Local PaddlePaddle wheels (created by build-paddle) |
 ## API Endpoints
@@ -461,3 +464,114 @@ pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/
 pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
 ```
 The Dockerfile.gpu handles this automatically.
 ## CI/CD Pipeline
 The project includes a Gitea Actions workflow (`.gitea/workflows/ci.yaml`) for automated builds.
 ### What CI Builds
 | Image | Architecture | Source |
 |-------|--------------|--------|
 | `paddle-ocr-cpu:amd64` | amd64 | PyPI paddlepaddle |
 | `paddle-ocr-cpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
 | `paddle-ocr-gpu:amd64` | amd64 | PyPI paddlepaddle-gpu |
 | `paddle-ocr-gpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
 ### ARM64 Wheel Workflow
 Since PyPI wheels don't work on ARM64 (x86 SSE instructions), wheels must be built from source using sse2neon:
 1. Built manually on an ARM64 machine (one-time)
 2. Uploaded to Gitea generic packages
 3. Downloaded by CI when building ARM64 images
 #### Step 1: Build ARM64 Wheels (One-time, on ARM64 machine)
 ```bash
 cd src/paddle_ocr
 # Build GPU wheel (requires NVIDIA GPU, takes 1-2 hours)
 sudo docker build -t paddle-builder:gpu-arm64 -f Dockerfile.build-paddle .
 sudo docker run --rm -v ./wheels:/wheels paddle-builder:gpu-arm64
 # Build CPU wheel (no GPU required, takes 1-2 hours)
 sudo docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
 sudo docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
 # Verify wheels were created
 ls -la wheels/paddlepaddle*.whl
 # paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl (GPU)
 # paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl (CPU)
 ```
 #### Step 2: Upload Wheels to Gitea Packages
 ```bash
 export GITEA_TOKEN="your-token-here"
 # Upload GPU wheel
 curl -X PUT \
  -H "Authorization: token $GITEA_TOKEN" \
  --upload-file wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl \
  "https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl"
 # Upload CPU wheel
 curl -X PUT \
  -H "Authorization: token $GITEA_TOKEN" \
  --upload-file wheels/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl \
  "https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl"
 ```
 Wheels available at:
 ```
 https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
 https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl
 ```
 #### Step 3: CI Builds Images
 CI automatically:
 1. Downloads ARM64 wheels from Gitea packages (for arm64 builds only)
 2. Builds both CPU and GPU images for amd64 and arm64
 3. Pushes to registry with arch-specific tags
 ### Required CI Secrets
 Configure these in Gitea repository settings:
 | Secret | Description |
 |--------|-------------|
 | `CI_READWRITE` | Gitea token with registry read/write access |
 ### Manual Image Push
 ```bash
 # Login to registry
 docker login seryus.ddns.net
 # Build and push CPU (multi-arch)
 docker buildx build -f Dockerfile.cpu \
  --platform linux/amd64,linux/arm64 \
  -t seryus.ddns.net/unir/paddle-ocr-api:cpu \
  --push .
 # Build and push GPU (x86_64)
 docker build -f Dockerfile.gpu -t seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64 .
 docker push seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64
 # Build and push GPU (ARM64) - requires wheel in wheels/
 docker buildx build -f Dockerfile.gpu \
  --platform linux/arm64 \
  -t seryus.ddns.net/unir/paddle-ocr-api:gpu-arm64 \
  --push .
 ```
 ### Updating the ARM64 Wheels
 When PaddlePaddle releases a new version:
 1. Update `PADDLE_VERSION` in `Dockerfile.build-paddle` and `Dockerfile.build-paddle-cpu`
 2. Rebuild both wheels on an ARM64 machine
 3. Upload to Gitea packages with new version
 4. Update `PADDLE_VERSION` in `.gitea/workflows/ci.yaml`
--- a/src/paddle_ocr/docker-compose.cpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.cpu-registry.yml
@@ -0,0 +1,25 @@
 # docker-compose.cpu-registry.yml - Pull CPU image from registry
 # Usage: docker compose -f docker-compose.cpu-registry.yml up
 services:
  ocr-cpu:
    image: seryus.ddns.net/unir/paddle-ocr-cpu:arm64
    container_name: paddle-ocr-cpu-registry
    ports:
      - "8001:8000"
    volumes:
      - ../dataset:/app/dataset:ro
      - paddlex-cache:/root/.paddlex
    environment:
      - PYTHONUNBUFFERED=1
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
 volumes:
  paddlex-cache:
    name: paddlex-model-cache
--- a/src/paddle_ocr/docker-compose.gpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.gpu-registry.yml
@@ -0,0 +1,35 @@
 # docker-compose.gpu-registry.yml - Pull GPU image from registry
 # Usage: docker compose -f docker-compose.gpu-registry.yml up
 #
 # Requires: NVIDIA GPU + nvidia-container-toolkit installed
 services:
  ocr-gpu:
    image: seryus.ddns.net/unir/paddle-ocr-gpu:arm64
    container_name: paddle-ocr-gpu-registry
    ports:
      - "8002:8000"
    volumes:
      - ../dataset:/app/dataset:ro
      - paddlex-cache:/root/.paddlex
    environment:
      - PYTHONUNBUFFERED=1
      - CUDA_VISIBLE_DEVICES=0
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
 volumes:
  paddlex-cache:
    name: paddlex-model-cache
--- a/src/paddle_ocr/scripts/upload-wheel.sh
+++ b/src/paddle_ocr/scripts/upload-wheel.sh
@@ -0,0 +1,69 @@
 #!/bin/bash
 # Upload PaddlePaddle ARM64 wheel to Gitea generic packages
 #
 # Usage:
 #   ./scripts/upload-wheel.sh [wheel_file] [token]
 #
 # Environment variables (alternative to arguments):
 #   GITEA_TOKEN - Gitea API token
 #   WHEEL_FILE  - Path to wheel file (default: auto-detect in wheels/)
 set -e
 GITEA_URL="https://seryus.ddns.net"
 GITEA_ORG="unir"
 PACKAGE_NAME="paddlepaddle-gpu-arm64"
 # Get wheel file
 WHEEL_FILE="${1:-${WHEEL_FILE:-$(ls wheels/paddlepaddle*.whl 2>/dev/null | head -1)}}"
 if [ -z "$WHEEL_FILE" ] || [ ! -f "$WHEEL_FILE" ]; then
    echo "Error: No wheel file found"
    echo "Usage: $0 [wheel_file] [token]"
    echo "  or set WHEEL_FILE environment variable"
    exit 1
 fi
 # Get token
 TOKEN="${2:-${GITEA_TOKEN}}"
 if [ -z "$TOKEN" ]; then
    echo "Error: No token provided"
    echo "Usage: $0 [wheel_file] [token]"
    echo "  or set GITEA_TOKEN environment variable"
    exit 1
 fi
 # Extract version from wheel filename
 # Format: paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
 FILENAME=$(basename "$WHEEL_FILE")
 VERSION=$(echo "$FILENAME" | sed -E 's/paddlepaddle[_-]gpu-([0-9.]+)-.*/\1/')
 if [ -z "$VERSION" ]; then
    echo "Error: Could not extract version from filename: $FILENAME"
    exit 1
 fi
 echo "Uploading wheel to Gitea packages..."
 echo "  File: $WHEEL_FILE"
 echo "  Package: $PACKAGE_NAME"
 echo "  Version: $VERSION"
 echo "  URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
 # Upload using PUT request
 HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
    -X PUT \
    -H "Authorization: token $TOKEN" \
    -H "Content-Type: application/octet-stream" \
    --data-binary "@$WHEEL_FILE" \
    "$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME")
 if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
    echo "Success! Wheel uploaded."
    echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
 elif [ "$HTTP_CODE" = "409" ]; then
    echo "Package version already exists (HTTP 409)"
    echo "To update, delete the existing version first in Gitea UI"
 else
    echo "Error: Upload failed with HTTP $HTTP_CODE"
    cat /tmp/upload_response.txt
    exit 1
 fi