src/paddle_ocr/Dockerfile.build-paddle

# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
#
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
#
# Build time: 2-4 hours depending on hardware
# Output: /output/paddlepaddle_gpu-*.whl
#
# Usage:
#   docker compose run build-paddle
#   # or
#   docker build -f Dockerfile.build-paddle -t paddle-builder .
#   docker run -v ./wheels:/output paddle-builder

FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04

LABEL maintainer="Sergio Jimenez"
LABEL description="PaddlePaddle GPU wheel builder for ARM64"

# Build arguments
ARG PADDLE_VERSION=v3.0.0
ARG PYTHON_VERSION=3.11

# Environment setup
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    # Python
    python${PYTHON_VERSION} \
    python${PYTHON_VERSION}-dev \
    python${PYTHON_VERSION}-venv \
    python3-pip \
    # Build tools
    build-essential \
    cmake \
    ninja-build \
    git \
    wget \
    curl \
    pkg-config \
    # Libraries
    libssl-dev \
    libffi-dev \
    zlib1g-dev \
    libbz2-dev \
    libreadline-dev \
    libsqlite3-dev \
    liblzma-dev \
    libncurses5-dev \
    libncursesw5-dev \
    libgflags-dev \
    libgoogle-glog-dev \
    libprotobuf-dev \
    protobuf-compiler \
    patchelf \
    # Additional dependencies for Paddle
    libopenblas-dev \
    liblapack-dev \
    swig \
    && rm -rf /var/lib/apt/lists/* \
    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3

# Upgrade pip and install Python build dependencies
RUN python -m pip install --upgrade pip setuptools wheel \
    && python -m pip install \
    numpy \
    protobuf \
    pyyaml \
    requests \
    packaging \
    astor \
    decorator \
    paddle-bfloat \
    opt-einsum

WORKDIR /build

# Clone PaddlePaddle repository
RUN git clone --depth 1 --branch ${PADDLE_VERSION} \
    https://github.com/PaddlePaddle/Paddle.git

WORKDIR /build/Paddle

# Install additional Python requirements for building
RUN pip install -r python/requirements.txt || true

# Create build directory
RUN mkdir -p build
WORKDIR /build/Paddle/build

# Configure CMake for ARM64 + CUDA build
#
# CUDA_ARCH is auto-detected from host GPU and passed via docker-compose.
# To detect: nvidia-smi --query-gpu=compute_cap --format=csv,noheader
# Example: 12.1 -> use "90" (Hopper, closest supported), 9.0 -> use "90"
#
# Build time: ~30-60 min with single arch vs 2-4 hours with all archs

ARG CUDA_ARCH=90
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}"

RUN cmake .. \
    -GNinja \
    -DCMAKE_BUILD_TYPE=Release \
    -DPY_VERSION=${PYTHON_VERSION} \
    -DWITH_GPU=ON \
    -DWITH_TESTING=OFF \
    -DWITH_DISTRIBUTE=OFF \
    -DWITH_NCCL=OFF \
    -DWITH_MKL=OFF \
    -DWITH_MKLDNN=OFF \
    -DON_INFER=OFF \
    -DWITH_PYTHON=ON \
    -DWITH_AVX=OFF \
    -DCUDA_ARCH_NAME=Manual \
    -DCUDA_ARCH_BIN="${CUDA_ARCH}" \
    -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON

# Build PaddlePaddle (this takes 2-4 hours)
RUN ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4

# Build the Python wheel
WORKDIR /build/Paddle/build
RUN ninja paddle_python

# Create output directory and copy wheel
RUN mkdir -p /output

# The wheel should be in python/dist/
WORKDIR /build/Paddle

# Build wheel package
RUN cd python && python setup.py bdist_wheel

# Copy wheel to output
RUN cp python/dist/*.whl /output/ 2>/dev/null || \
    cp build/python/dist/*.whl /output/ 2>/dev/null || \
    echo "Wheel location may vary, checking build artifacts..."

# List what was built
RUN ls -la /output/ && \
    echo "=== Build complete ===" && \
    echo "Wheel files:" && \
    find /build -name "*.whl" -type f 2>/dev/null

# Default command: copy wheel to mounted volume
CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]
gpu dgx 2026-01-17 10:46:36 +01:00			`# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64`
			`#`
			`# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.`
			`# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.`
			`#`
			`# Build time: 2-4 hours depending on hardware`
			`# Output: /output/paddlepaddle_gpu-*.whl`
			`#`
			`# Usage:`
			`# docker compose run build-paddle`
			`# # or`
			`# docker build -f Dockerfile.build-paddle -t paddle-builder .`
			`# docker run -v ./wheels:/output paddle-builder`

			`FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04`

			`LABEL maintainer="Sergio Jimenez"`
			`LABEL description="PaddlePaddle GPU wheel builder for ARM64"`

			`# Build arguments`
			`ARG PADDLE_VERSION=v3.0.0`
			`ARG PYTHON_VERSION=3.11`

			`# Environment setup`
			`ENV DEBIAN_FRONTEND=noninteractive`
			`ENV PYTHONUNBUFFERED=1`

			`# Install build dependencies`
			`RUN apt-get update && apt-get install -y --no-install-recommends \`
			`# Python`
			`python${PYTHON_VERSION} \`
			`python${PYTHON_VERSION}-dev \`
			`python${PYTHON_VERSION}-venv \`
			`python3-pip \`
			`# Build tools`
			`build-essential \`
			`cmake \`
			`ninja-build \`
			`git \`
			`wget \`
			`curl \`
			`pkg-config \`
			`# Libraries`
			`libssl-dev \`
			`libffi-dev \`
			`zlib1g-dev \`
			`libbz2-dev \`
			`libreadline-dev \`
			`libsqlite3-dev \`
			`liblzma-dev \`
			`libncurses5-dev \`
			`libncursesw5-dev \`
			`libgflags-dev \`
			`libgoogle-glog-dev \`
			`libprotobuf-dev \`
			`protobuf-compiler \`
			`patchelf \`
			`# Additional dependencies for Paddle`
			`libopenblas-dev \`
			`liblapack-dev \`
			`swig \`
			`&& rm -rf /var/lib/apt/lists/* \`
			`&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \`
			`&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3`

			`# Upgrade pip and install Python build dependencies`
			`RUN python -m pip install --upgrade pip setuptools wheel \`
			`&& python -m pip install \`
			`numpy \`
			`protobuf \`
			`pyyaml \`
			`requests \`
			`packaging \`
			`astor \`
			`decorator \`
			`paddle-bfloat \`
			`opt-einsum`

			`WORKDIR /build`

			`# Clone PaddlePaddle repository`
			`RUN git clone --depth 1 --branch ${PADDLE_VERSION} \`
			`https://github.com/PaddlePaddle/Paddle.git`

			`WORKDIR /build/Paddle`

			`# Install additional Python requirements for building`
			`RUN pip install -r python/requirements.txt \|\| true`

			`# Create build directory`
			`RUN mkdir -p build`
			`WORKDIR /build/Paddle/build`

			`# Configure CMake for ARM64 + CUDA build`
docker gpu arm update 2026-01-17 11:04:25 +01:00			`#`
			`# CUDA_ARCH is auto-detected from host GPU and passed via docker-compose.`
			`# To detect: nvidia-smi --query-gpu=compute_cap --format=csv,noheader`
			`# Example: 12.1 -> use "90" (Hopper, closest supported), 9.0 -> use "90"`
			`#`
			`# Build time: ~30-60 min with single arch vs 2-4 hours with all archs`

			`ARG CUDA_ARCH=90`
			`RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}"`

gpu dgx 2026-01-17 10:46:36 +01:00			`RUN cmake .. \`
			`-GNinja \`
			`-DCMAKE_BUILD_TYPE=Release \`
			`-DPY_VERSION=${PYTHON_VERSION} \`
			`-DWITH_GPU=ON \`
			`-DWITH_TESTING=OFF \`
			`-DWITH_DISTRIBUTE=OFF \`
			`-DWITH_NCCL=OFF \`
			`-DWITH_MKL=OFF \`
			`-DWITH_MKLDNN=OFF \`
			`-DON_INFER=OFF \`
			`-DWITH_PYTHON=ON \`
			`-DWITH_AVX=OFF \`
docker gpu arm update 2026-01-17 11:04:25 +01:00			`-DCUDA_ARCH_NAME=Manual \`
			`-DCUDA_ARCH_BIN="${CUDA_ARCH}" \`
			`-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \`
gpu dgx 2026-01-17 10:46:36 +01:00			`-DCMAKE_EXPORT_COMPILE_COMMANDS=ON`

			`# Build PaddlePaddle (this takes 2-4 hours)`
			`RUN ninja -j$(nproc) \|\| ninja -j$(($(nproc)/2)) \|\| ninja -j4`

			`# Build the Python wheel`
			`WORKDIR /build/Paddle/build`
			`RUN ninja paddle_python`

			`# Create output directory and copy wheel`
			`RUN mkdir -p /output`

			`# The wheel should be in python/dist/`
			`WORKDIR /build/Paddle`

			`# Build wheel package`
			`RUN cd python && python setup.py bdist_wheel`

			`# Copy wheel to output`
			`RUN cp python/dist/*.whl /output/ 2>/dev/null \|\| \`
			`cp build/python/dist/*.whl /output/ 2>/dev/null \|\| \`
			`echo "Wheel location may vary, checking build artifacts..."`

			`# List what was built`
			`RUN ls -la /output/ && \`
			`echo "=== Build complete ===" && \`
			`echo "Wheel files:" && \`
			`find /build -name "*.whl" -type f 2>/dev/null`

			`# Default command: copy wheel to mounted volume`
			`CMD ["sh", "-c", "cp /output/.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ \|\| echo 'No wheel found in /output, checking other locations...' && find /build -name '.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]`