diff --git a/src/paddle_ocr/Dockerfile.build-paddle b/src/paddle_ocr/Dockerfile.build-paddle index 3a48270..5f67f0c 100644 --- a/src/paddle_ocr/Dockerfile.build-paddle +++ b/src/paddle_ocr/Dockerfile.build-paddle @@ -3,15 +3,18 @@ # This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64. # The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration. # -# Build time: 2-4 hours depending on hardware +# Build time: ~1-2 hours with caching, 2-4 hours first build # Output: /output/paddlepaddle_gpu-*.whl # # Usage: -# docker compose run build-paddle -# # or -# docker build -f Dockerfile.build-paddle -t paddle-builder . -# docker run -v ./wheels:/output paddle-builder +# CUDA_ARCH=90 docker compose --profile build run --rm build-paddle +# +# Features: +# - ccache for compiler caching (survives rebuilds) +# - Split build stages for better layer caching +# - ARM64 -m64 patch applied automatically +# syntax=docker/dockerfile:1.4 FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 LABEL maintainer="Sergio Jimenez" @@ -20,19 +23,20 @@ LABEL description="PaddlePaddle GPU wheel builder for ARM64" # Build arguments ARG PADDLE_VERSION=v3.0.0 ARG PYTHON_VERSION=3.11 +ARG CUDA_ARCH=90 # Environment setup ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 +ENV CCACHE_DIR=/ccache +ENV PATH="/usr/lib/ccache:${PATH}" -# Install build dependencies +# Install build dependencies + ccache RUN apt-get update && apt-get install -y --no-install-recommends \ - # Python python${PYTHON_VERSION} \ python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-venv \ python3-pip \ - # Build tools build-essential \ cmake \ ninja-build \ @@ -40,7 +44,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ curl \ pkg-config \ - # Libraries + ccache \ libssl-dev \ libffi-dev \ zlib1g-dev \ @@ -55,7 +59,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libprotobuf-dev \ protobuf-compiler \ patchelf \ - # Additional dependencies for Paddle libopenblas-dev \ liblapack-dev \ swig \ @@ -63,27 +66,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 +# Setup ccache symlinks for CUDA +RUN mkdir -p /usr/lib/ccache && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \ + ln -sf /usr/bin/ccache /usr/lib/ccache/c++ + # Upgrade pip and install Python build dependencies -RUN python -m pip install --upgrade pip setuptools wheel \ - && python -m pip install \ - numpy \ - protobuf \ - pyyaml \ - requests \ - packaging \ - astor \ - decorator \ - paddle-bfloat \ - opt-einsum +RUN python -m pip install --upgrade pip setuptools wheel && \ + python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum WORKDIR /build # Clone PaddlePaddle repository -RUN git clone --depth 1 --branch ${PADDLE_VERSION} \ - https://github.com/PaddlePaddle/Paddle.git +RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git WORKDIR /build/Paddle +# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64) +RUN sed -i 's/-m64//g' cmake/flags.cmake && \ + sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \ + find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \ + echo "Patched -m64 flag for ARM64 compatibility" + # Install additional Python requirements for building RUN pip install -r python/requirements.txt || true @@ -92,17 +99,8 @@ RUN mkdir -p build WORKDIR /build/Paddle/build # Configure CMake for ARM64 + CUDA build -# -# CUDA_ARCH is auto-detected from host GPU and passed via docker-compose. -# To detect: nvidia-smi --query-gpu=compute_cap --format=csv,noheader -# Example: 12.1 -> use "90" (Hopper, closest supported), 9.0 -> use "90" -# -# Build time: ~30-60 min with single arch vs 2-4 hours with all archs - -ARG CUDA_ARCH=90 -RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" - -RUN cmake .. \ +RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \ + cmake .. \ -GNinja \ -DCMAKE_BUILD_TYPE=Release \ -DPY_VERSION=${PYTHON_VERSION} \ @@ -118,33 +116,44 @@ RUN cmake .. \ -DCUDA_ARCH_NAME=Manual \ -DCUDA_ARCH_BIN="${CUDA_ARCH}" \ -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -# Build PaddlePaddle (this takes 2-4 hours) -RUN ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4 +# Build external dependencies first (cacheable layer) +RUN --mount=type=cache,target=/ccache \ + ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3 + +# Build flashattn (heaviest dependency, separate layer for caching) +RUN --mount=type=cache,target=/ccache \ + ninja extern_flashattn + +# Build remaining external dependencies +RUN --mount=type=cache,target=/ccache \ + ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak + +# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM) +RUN --mount=type=cache,target=/ccache \ + ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4 # Build the Python wheel -WORKDIR /build/Paddle/build -RUN ninja paddle_python +RUN ninja paddle_python || true -# Create output directory and copy wheel +# Create output directory RUN mkdir -p /output -# The wheel should be in python/dist/ -WORKDIR /build/Paddle - # Build wheel package -RUN cd python && python setup.py bdist_wheel +WORKDIR /build/Paddle +RUN cd python && python setup.py bdist_wheel || pip wheel . -w dist/ # Copy wheel to output RUN cp python/dist/*.whl /output/ 2>/dev/null || \ cp build/python/dist/*.whl /output/ 2>/dev/null || \ - echo "Wheel location may vary, checking build artifacts..." + find /build -name "paddlepaddle*.whl" -exec cp {} /output/ \; # List what was built RUN ls -la /output/ && \ echo "=== Build complete ===" && \ - echo "Wheel files:" && \ find /build -name "*.whl" -type f 2>/dev/null # Default command: copy wheel to mounted volume