Files
MastersThesis/src/paddle_ocr/Dockerfile.build-paddle
Sergio Jimenez Jimenez a89ddd2d13
Some checks failed
build_docker / build_cpu (linux/arm64) (pull_request) Has been cancelled
build_docker / build_gpu (linux/amd64) (pull_request) Has been cancelled
build_docker / build_gpu (linux/arm64) (pull_request) Has been cancelled
build_docker / build_cpu (linux/amd64) (pull_request) Has been cancelled
build_docker / essential (push) Successful in 0s
build_docker / build_gpu (linux/amd64) (push) Has been cancelled
build_docker / build_gpu (linux/arm64) (push) Has been cancelled
build_docker / build_cpu (linux/amd64) (push) Has been cancelled
build_docker / build_cpu (linux/arm64) (push) Has been cancelled
build_docker / essential (pull_request) Successful in 1s
ci update
2026-01-17 16:15:53 +01:00

214 lines
8.4 KiB
Docker

# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
#
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
#
# Build time: ~1-2 hours with caching, 2-4 hours first build
# Output: /output/paddlepaddle_gpu-*.whl
#
# Usage:
# CUDA_ARCH=90 docker compose --profile build run --rm build-paddle
#
# Features:
# - ccache for compiler caching (survives rebuilds)
# - Split build stages for better layer caching
# - ARM64 -m64 patch applied automatically
# syntax=docker/dockerfile:1.4
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
LABEL maintainer="Sergio Jimenez"
LABEL description="PaddlePaddle GPU wheel builder for ARM64"
# Build arguments
ARG PADDLE_VERSION=v3.0.0
ARG PYTHON_VERSION=3.11
ARG CUDA_ARCH=90
# Environment setup
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV CCACHE_DIR=/ccache
ENV PATH="/usr/lib/ccache:${PATH}"
# Install build dependencies + ccache
RUN apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python3-pip \
build-essential \
cmake \
ninja-build \
git \
wget \
curl \
pkg-config \
ccache \
libssl-dev \
libffi-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
liblzma-dev \
libncurses5-dev \
libncursesw5-dev \
libgflags-dev \
libgoogle-glog-dev \
libprotobuf-dev \
protobuf-compiler \
patchelf \
libopenblas-dev \
liblapack-dev \
swig \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
# Setup ccache symlinks for CUDA
RUN mkdir -p /usr/lib/ccache && \
ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
# Upgrade pip and install Python build dependencies
RUN python -m pip install --upgrade pip setuptools wheel && \
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
WORKDIR /build
# Clone PaddlePaddle repository
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
WORKDIR /build/Paddle
# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64)
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
echo "Patched -m64 flag for ARM64 compatibility"
# Patch for ARM64: Install sse2neon to translate x86 SSE intrinsics to ARM NEON
# sse2neon provides drop-in replacements for x86 SIMD headers
RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
mkdir -p /usr/local/include/sse2neon && \
cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
rm -rf /tmp/sse2neon && \
echo "Installed sse2neon for x86->ARM NEON translation"
# Create wrapper headers that use sse2neon for ARM64
RUN mkdir -p /usr/local/include/x86_stubs && \
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/immintrin.h && \
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
echo "#else" >> /usr/local/include/x86_stubs/immintrin.h && \
echo "#include_next <immintrin.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
echo "#endif" >> /usr/local/include/x86_stubs/immintrin.h && \
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/xmmintrin.h && \
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
echo "#else" >> /usr/local/include/x86_stubs/xmmintrin.h && \
echo "#include_next <xmmintrin.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
echo "#endif" >> /usr/local/include/x86_stubs/xmmintrin.h && \
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/emmintrin.h && \
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
echo "#else" >> /usr/local/include/x86_stubs/emmintrin.h && \
echo "#include_next <emmintrin.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
echo "#endif" >> /usr/local/include/x86_stubs/emmintrin.h && \
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/pmmintrin.h && \
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
echo "#else" >> /usr/local/include/x86_stubs/pmmintrin.h && \
echo "#include_next <pmmintrin.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
echo "#endif" >> /usr/local/include/x86_stubs/pmmintrin.h && \
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/smmintrin.h && \
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
echo "#else" >> /usr/local/include/x86_stubs/smmintrin.h && \
echo "#include_next <smmintrin.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
echo "#endif" >> /usr/local/include/x86_stubs/smmintrin.h && \
echo "Created x86 intrinsic wrapper headers for ARM64 using sse2neon"
# Install additional Python requirements for building
RUN pip install -r python/requirements.txt || true
# Create build directory
RUN mkdir -p build
WORKDIR /build/Paddle/build
# Configure CMake for ARM64 + CUDA build
# Note: -Wno-class-memaccess fixes Eigen NEON warning on ARM64
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
cmake .. \
-GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DPY_VERSION=${PYTHON_VERSION} \
-DWITH_GPU=ON \
-DWITH_TESTING=OFF \
-DWITH_DISTRIBUTE=OFF \
-DWITH_NCCL=OFF \
-DWITH_MKL=OFF \
-DWITH_MKLDNN=OFF \
-DON_INFER=OFF \
-DWITH_PYTHON=ON \
-DWITH_AVX=OFF \
-DCUDA_ARCH_NAME=Manual \
-DCUDA_ARCH_BIN="${CUDA_ARCH}" \
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs" \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
# Build external dependencies first (cacheable layer)
RUN --mount=type=cache,target=/ccache \
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
# Build flashattn (heaviest dependency, separate layer for caching)
RUN --mount=type=cache,target=/ccache \
ninja extern_flashattn
# Build remaining external dependencies
RUN --mount=type=cache,target=/ccache \
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM)
RUN --mount=type=cache,target=/ccache \
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
# Build the Python wheel
RUN ninja paddle_python || true
# Create output directory
RUN mkdir -p /output
# Build wheel package - try multiple methods since PaddlePaddle build structure varies
WORKDIR /build/Paddle
RUN echo "=== Looking for wheel build method ===" && \
ls -la python/ 2>/dev/null && \
ls -la build/python/ 2>/dev/null && \
if [ -f build/python/setup.py ]; then \
echo "Using build/python/setup.py" && \
cd build/python && python setup.py bdist_wheel; \
elif [ -f python/setup.py ]; then \
echo "Using python/setup.py" && \
cd python && python setup.py bdist_wheel; \
else \
echo "Looking for existing wheel..." && \
find /build -name "paddlepaddle*.whl" -type f 2>/dev/null; \
fi
# Copy wheel to output
RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
ls -la /output/ && \
if [ ! "$(ls -A /output/*.whl 2>/dev/null)" ]; then \
echo "ERROR: No wheel found!" && exit 1; \
fi
# List what was built
RUN ls -la /output/ && \
echo "=== Build complete ===" && \
find /build -name "*.whl" -type f 2>/dev/null
# Default command: copy wheel to mounted volume
CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]