# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64 # # This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64. # The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration. # # Build time: ~1-2 hours with caching, 2-4 hours first build # Output: /output/paddlepaddle_gpu-*.whl # # Usage: # CUDA_ARCH=90 docker compose --profile build run --rm build-paddle # # Features: # - ccache for compiler caching (survives rebuilds) # - Split build stages for better layer caching # - ARM64 -m64 patch applied automatically # syntax=docker/dockerfile:1.4 FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 LABEL maintainer="Sergio Jimenez" LABEL description="PaddlePaddle GPU wheel builder for ARM64" # Build arguments ARG PADDLE_VERSION=v3.0.0 ARG PYTHON_VERSION=3.11 ARG CUDA_ARCH=90 # Environment setup ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV CCACHE_DIR=/ccache ENV PATH="/usr/lib/ccache:${PATH}" # Install build dependencies + ccache RUN apt-get update && apt-get install -y --no-install-recommends \ python${PYTHON_VERSION} \ python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-venv \ python3-pip \ build-essential \ cmake \ ninja-build \ git \ wget \ curl \ pkg-config \ ccache \ libssl-dev \ libffi-dev \ zlib1g-dev \ libbz2-dev \ libreadline-dev \ libsqlite3-dev \ liblzma-dev \ libncurses5-dev \ libncursesw5-dev \ libgflags-dev \ libgoogle-glog-dev \ libprotobuf-dev \ protobuf-compiler \ patchelf \ libopenblas-dev \ liblapack-dev \ swig \ && rm -rf /var/lib/apt/lists/* \ && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 # Setup ccache symlinks for CUDA RUN mkdir -p /usr/lib/ccache && \ ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \ ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \ ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \ ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \ ln -sf /usr/bin/ccache /usr/lib/ccache/c++ # Upgrade pip and install Python build dependencies RUN python -m pip install --upgrade pip setuptools wheel && \ python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum WORKDIR /build # Clone PaddlePaddle repository RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git WORKDIR /build/Paddle # Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64) RUN sed -i 's/-m64//g' cmake/flags.cmake && \ sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \ find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \ echo "Patched -m64 flag for ARM64 compatibility" # Install additional Python requirements for building RUN pip install -r python/requirements.txt || true # Create build directory RUN mkdir -p build WORKDIR /build/Paddle/build # Configure CMake for ARM64 + CUDA build RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \ cmake .. \ -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DPY_VERSION=${PYTHON_VERSION} \ -DWITH_GPU=ON \ -DWITH_TESTING=OFF \ -DWITH_DISTRIBUTE=OFF \ -DWITH_NCCL=OFF \ -DWITH_MKL=OFF \ -DWITH_MKLDNN=OFF \ -DON_INFER=OFF \ -DWITH_PYTHON=ON \ -DWITH_AVX=OFF \ -DCUDA_ARCH_NAME=Manual \ -DCUDA_ARCH_BIN="${CUDA_ARCH}" \ -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON # Build external dependencies first (cacheable layer) RUN --mount=type=cache,target=/ccache \ ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3 # Build flashattn (heaviest dependency, separate layer for caching) RUN --mount=type=cache,target=/ccache \ ninja extern_flashattn # Build remaining external dependencies RUN --mount=type=cache,target=/ccache \ ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak # Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM) RUN --mount=type=cache,target=/ccache \ ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4 # Build the Python wheel RUN ninja paddle_python || true # Create output directory RUN mkdir -p /output # Build wheel package WORKDIR /build/Paddle RUN cd python && python setup.py bdist_wheel || pip wheel . -w dist/ # Copy wheel to output RUN cp python/dist/*.whl /output/ 2>/dev/null || \ cp build/python/dist/*.whl /output/ 2>/dev/null || \ find /build -name "paddlepaddle*.whl" -exec cp {} /output/ \; # List what was built RUN ls -la /output/ && \ echo "=== Build complete ===" && \ find /build -name "*.whl" -type f 2>/dev/null # Default command: copy wheel to mounted volume CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]