Paddle ocr gpu support. #4
@@ -3,15 +3,18 @@
|
|||||||
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
|
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
|
||||||
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
|
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
|
||||||
#
|
#
|
||||||
# Build time: 2-4 hours depending on hardware
|
# Build time: ~1-2 hours with caching, 2-4 hours first build
|
||||||
# Output: /output/paddlepaddle_gpu-*.whl
|
# Output: /output/paddlepaddle_gpu-*.whl
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# docker compose run build-paddle
|
# CUDA_ARCH=90 docker compose --profile build run --rm build-paddle
|
||||||
# # or
|
#
|
||||||
# docker build -f Dockerfile.build-paddle -t paddle-builder .
|
# Features:
|
||||||
# docker run -v ./wheels:/output paddle-builder
|
# - ccache for compiler caching (survives rebuilds)
|
||||||
|
# - Split build stages for better layer caching
|
||||||
|
# - ARM64 -m64 patch applied automatically
|
||||||
|
|
||||||
|
# syntax=docker/dockerfile:1.4
|
||||||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
||||||
|
|
||||||
LABEL maintainer="Sergio Jimenez"
|
LABEL maintainer="Sergio Jimenez"
|
||||||
@@ -20,19 +23,20 @@ LABEL description="PaddlePaddle GPU wheel builder for ARM64"
|
|||||||
# Build arguments
|
# Build arguments
|
||||||
ARG PADDLE_VERSION=v3.0.0
|
ARG PADDLE_VERSION=v3.0.0
|
||||||
ARG PYTHON_VERSION=3.11
|
ARG PYTHON_VERSION=3.11
|
||||||
|
ARG CUDA_ARCH=90
|
||||||
|
|
||||||
# Environment setup
|
# Environment setup
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV CCACHE_DIR=/ccache
|
||||||
|
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies + ccache
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
# Python
|
|
||||||
python${PYTHON_VERSION} \
|
python${PYTHON_VERSION} \
|
||||||
python${PYTHON_VERSION}-dev \
|
python${PYTHON_VERSION}-dev \
|
||||||
python${PYTHON_VERSION}-venv \
|
python${PYTHON_VERSION}-venv \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
# Build tools
|
|
||||||
build-essential \
|
build-essential \
|
||||||
cmake \
|
cmake \
|
||||||
ninja-build \
|
ninja-build \
|
||||||
@@ -40,7 +44,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
wget \
|
wget \
|
||||||
curl \
|
curl \
|
||||||
pkg-config \
|
pkg-config \
|
||||||
# Libraries
|
ccache \
|
||||||
libssl-dev \
|
libssl-dev \
|
||||||
libffi-dev \
|
libffi-dev \
|
||||||
zlib1g-dev \
|
zlib1g-dev \
|
||||||
@@ -55,7 +59,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
libprotobuf-dev \
|
libprotobuf-dev \
|
||||||
protobuf-compiler \
|
protobuf-compiler \
|
||||||
patchelf \
|
patchelf \
|
||||||
# Additional dependencies for Paddle
|
|
||||||
libopenblas-dev \
|
libopenblas-dev \
|
||||||
liblapack-dev \
|
liblapack-dev \
|
||||||
swig \
|
swig \
|
||||||
@@ -63,27 +66,31 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
||||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
||||||
|
|
||||||
|
# Setup ccache symlinks for CUDA
|
||||||
|
RUN mkdir -p /usr/lib/ccache && \
|
||||||
|
ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \
|
||||||
|
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
|
||||||
|
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
|
||||||
|
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
|
||||||
|
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
|
||||||
|
|
||||||
# Upgrade pip and install Python build dependencies
|
# Upgrade pip and install Python build dependencies
|
||||||
RUN python -m pip install --upgrade pip setuptools wheel \
|
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||||
&& python -m pip install \
|
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
|
||||||
numpy \
|
|
||||||
protobuf \
|
|
||||||
pyyaml \
|
|
||||||
requests \
|
|
||||||
packaging \
|
|
||||||
astor \
|
|
||||||
decorator \
|
|
||||||
paddle-bfloat \
|
|
||||||
opt-einsum
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
# Clone PaddlePaddle repository
|
# Clone PaddlePaddle repository
|
||||||
RUN git clone --depth 1 --branch ${PADDLE_VERSION} \
|
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
|
||||||
https://github.com/PaddlePaddle/Paddle.git
|
|
||||||
|
|
||||||
WORKDIR /build/Paddle
|
WORKDIR /build/Paddle
|
||||||
|
|
||||||
|
# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64)
|
||||||
|
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
|
||||||
|
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
|
||||||
|
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
|
||||||
|
echo "Patched -m64 flag for ARM64 compatibility"
|
||||||
|
|
||||||
# Install additional Python requirements for building
|
# Install additional Python requirements for building
|
||||||
RUN pip install -r python/requirements.txt || true
|
RUN pip install -r python/requirements.txt || true
|
||||||
|
|
||||||
@@ -92,17 +99,8 @@ RUN mkdir -p build
|
|||||||
WORKDIR /build/Paddle/build
|
WORKDIR /build/Paddle/build
|
||||||
|
|
||||||
# Configure CMake for ARM64 + CUDA build
|
# Configure CMake for ARM64 + CUDA build
|
||||||
#
|
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
|
||||||
# CUDA_ARCH is auto-detected from host GPU and passed via docker-compose.
|
cmake .. \
|
||||||
# To detect: nvidia-smi --query-gpu=compute_cap --format=csv,noheader
|
|
||||||
# Example: 12.1 -> use "90" (Hopper, closest supported), 9.0 -> use "90"
|
|
||||||
#
|
|
||||||
# Build time: ~30-60 min with single arch vs 2-4 hours with all archs
|
|
||||||
|
|
||||||
ARG CUDA_ARCH=90
|
|
||||||
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}"
|
|
||||||
|
|
||||||
RUN cmake .. \
|
|
||||||
-GNinja \
|
-GNinja \
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
-DPY_VERSION=${PYTHON_VERSION} \
|
-DPY_VERSION=${PYTHON_VERSION} \
|
||||||
@@ -118,33 +116,44 @@ RUN cmake .. \
|
|||||||
-DCUDA_ARCH_NAME=Manual \
|
-DCUDA_ARCH_NAME=Manual \
|
||||||
-DCUDA_ARCH_BIN="${CUDA_ARCH}" \
|
-DCUDA_ARCH_BIN="${CUDA_ARCH}" \
|
||||||
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
|
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
|
||||||
|
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||||
|
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||||
|
|
||||||
# Build PaddlePaddle (this takes 2-4 hours)
|
# Build external dependencies first (cacheable layer)
|
||||||
RUN ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
RUN --mount=type=cache,target=/ccache \
|
||||||
|
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
|
||||||
|
|
||||||
|
# Build flashattn (heaviest dependency, separate layer for caching)
|
||||||
|
RUN --mount=type=cache,target=/ccache \
|
||||||
|
ninja extern_flashattn
|
||||||
|
|
||||||
|
# Build remaining external dependencies
|
||||||
|
RUN --mount=type=cache,target=/ccache \
|
||||||
|
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
|
||||||
|
|
||||||
|
# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM)
|
||||||
|
RUN --mount=type=cache,target=/ccache \
|
||||||
|
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
||||||
|
|
||||||
# Build the Python wheel
|
# Build the Python wheel
|
||||||
WORKDIR /build/Paddle/build
|
RUN ninja paddle_python || true
|
||||||
RUN ninja paddle_python
|
|
||||||
|
|
||||||
# Create output directory and copy wheel
|
# Create output directory
|
||||||
RUN mkdir -p /output
|
RUN mkdir -p /output
|
||||||
|
|
||||||
# The wheel should be in python/dist/
|
|
||||||
WORKDIR /build/Paddle
|
|
||||||
|
|
||||||
# Build wheel package
|
# Build wheel package
|
||||||
RUN cd python && python setup.py bdist_wheel
|
WORKDIR /build/Paddle
|
||||||
|
RUN cd python && python setup.py bdist_wheel || pip wheel . -w dist/
|
||||||
|
|
||||||
# Copy wheel to output
|
# Copy wheel to output
|
||||||
RUN cp python/dist/*.whl /output/ 2>/dev/null || \
|
RUN cp python/dist/*.whl /output/ 2>/dev/null || \
|
||||||
cp build/python/dist/*.whl /output/ 2>/dev/null || \
|
cp build/python/dist/*.whl /output/ 2>/dev/null || \
|
||||||
echo "Wheel location may vary, checking build artifacts..."
|
find /build -name "paddlepaddle*.whl" -exec cp {} /output/ \;
|
||||||
|
|
||||||
# List what was built
|
# List what was built
|
||||||
RUN ls -la /output/ && \
|
RUN ls -la /output/ && \
|
||||||
echo "=== Build complete ===" && \
|
echo "=== Build complete ===" && \
|
||||||
echo "Wheel files:" && \
|
|
||||||
find /build -name "*.whl" -type f 2>/dev/null
|
find /build -name "*.whl" -type f 2>/dev/null
|
||||||
|
|
||||||
# Default command: copy wheel to mounted volume
|
# Default command: copy wheel to mounted volume
|
||||||
|
|||||||
Reference in New Issue
Block a user