diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml index cccd2ca..37df5e8 100644 --- a/.gitea/workflows/ci.yaml +++ b/.gitea/workflows/ci.yaml @@ -9,7 +9,6 @@ on: push: branches: - main - - gpu_support env: PADDLE_VERSION: "3.0.0" @@ -24,7 +23,9 @@ jobs: image_cpu: seryus.ddns.net/unir/paddle-ocr-cpu image_gpu: seryus.ddns.net/unir/paddle-ocr-gpu image_easyocr: seryus.ddns.net/unir/easyocr-cpu + image_easyocr_gpu: seryus.ddns.net/unir/easyocr-gpu image_doctr: seryus.ddns.net/unir/doctr-cpu + image_doctr_gpu: seryus.ddns.net/unir/doctr-gpu steps: - name: Output version info run: | @@ -315,3 +316,139 @@ jobs: docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }} \ ${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }}-amd64 \ ${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }}-arm64 + + # EasyOCR GPU image: Matrix build for amd64 and arm64 + # PyTorch cu128 has wheels for both architectures + build_easyocr_gpu: + runs-on: ubuntu-latest + needs: essential + strategy: + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Get arch suffix + id: arch + run: | + if [ "${{ matrix.platform }}" = "linux/amd64" ]; then + echo "suffix=amd64" >> $GITHUB_OUTPUT + else + echo "suffix=arm64" >> $GITHUB_OUTPUT + fi + + - name: Build and push EasyOCR GPU image (${{ matrix.platform }}) + uses: docker/build-push-action@v5 + with: + context: src/easyocr_service + file: src/easyocr_service/Dockerfile.gpu + platforms: ${{ matrix.platform }} + push: true + tags: | + ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }} + ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ steps.arch.outputs.suffix }} + + # DocTR GPU image: Matrix build for amd64 and arm64 + # PyTorch cu128 has wheels for both architectures + build_doctr_gpu: + runs-on: ubuntu-latest + needs: essential + strategy: + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Get arch suffix + id: arch + run: | + if [ "${{ matrix.platform }}" = "linux/amd64" ]; then + echo "suffix=amd64" >> $GITHUB_OUTPUT + else + echo "suffix=arm64" >> $GITHUB_OUTPUT + fi + + - name: Build and push DocTR GPU image (${{ matrix.platform }}) + uses: docker/build-push-action@v5 + with: + context: src/doctr_service + file: src/doctr_service/Dockerfile.gpu + platforms: ${{ matrix.platform }} + push: true + tags: | + ${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }} + ${{ needs.essential.outputs.image_doctr_gpu }}:${{ steps.arch.outputs.suffix }} + + # Create multi-arch manifest for EasyOCR GPU image + manifest_easyocr_gpu: + runs-on: ubuntu-latest + needs: [essential, build_easyocr_gpu] + steps: + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Create multi-arch manifest (EasyOCR GPU) + run: | + docker buildx imagetools create -t ${{ needs.essential.outputs.image_easyocr_gpu }}:latest \ + ${{ needs.essential.outputs.image_easyocr_gpu }}:amd64 \ + ${{ needs.essential.outputs.image_easyocr_gpu }}:arm64 + docker buildx imagetools create -t ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }} \ + ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \ + ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-arm64 + + # Create multi-arch manifest for DocTR GPU image + manifest_doctr_gpu: + runs-on: ubuntu-latest + needs: [essential, build_doctr_gpu] + steps: + - name: Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: ${{ needs.essential.outputs.repo }} + username: username + password: ${{ secrets.CI_READWRITE }} + + - name: Create multi-arch manifest (DocTR GPU) + run: | + docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr_gpu }}:latest \ + ${{ needs.essential.outputs.image_doctr_gpu }}:amd64 \ + ${{ needs.essential.outputs.image_doctr_gpu }}:arm64 + docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }} \ + ${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \ + ${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-arm64 diff --git a/src/doctr_service/Dockerfile.gpu b/src/doctr_service/Dockerfile.gpu new file mode 100644 index 0000000..a79ff09 --- /dev/null +++ b/src/doctr_service/Dockerfile.gpu @@ -0,0 +1,68 @@ +# Dockerfile.gpu - DocTR GPU Dockerfile for amd64/arm64 +# +# Build: +# docker build -t doctr-gpu:latest -f Dockerfile.gpu . +# +# Run: +# docker run --gpus all -p 8003:8000 -v ./dataset:/app/dataset doctr-gpu:latest + +# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support +FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04 + +LABEL maintainer="Sergio Jimenez" +LABEL description="DocTR Tuning REST API - GPU/CUDA version" + +WORKDIR /app + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV CUDA_VISIBLE_DEVICES=0 +ENV DOCTR_DET_ARCH=db_resnet50 +ENV DOCTR_RECO_ARCH=crnn_vgg16_bn + +# Install Python 3.12 and system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3-pip \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* \ + && ln -sf /usr/bin/python3.12 /usr/bin/python + +# Install PyTorch with CUDA support +# cu128 index has both amd64 and arm64 wheels +RUN python -m pip install --no-cache-dir \ + torch torchvision --index-url https://download.pytorch.org/whl/cu128 + +# Install DocTR and other dependencies +RUN python -m pip install --no-cache-dir \ + "python-doctr[torch]>=0.8.0" \ + fastapi>=0.104.0 \ + "uvicorn[standard]" \ + pydantic>=2.0.0 \ + jiwer>=3.0.0 \ + numpy>=1.24.0 \ + pillow>=10.0.0 + +# Copy application code +COPY doctr_tuning_rest.py . +COPY dataset_manager.py . + +# Volume for dataset and model cache +VOLUME ["/app/dataset", "/root/.cache/doctr"] + +# Expose API port +EXPOSE 8000 + +# Health check (longer start period for model download) +HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1 + +# Run the API server +CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/doctr_service/README.md b/src/doctr_service/README.md new file mode 100644 index 0000000..a06059d --- /dev/null +++ b/src/doctr_service/README.md @@ -0,0 +1,212 @@ +# DocTR Tuning REST API + +REST API service for DocTR (Document Text Recognition) hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search. + +## Quick Start + +### CPU Version + +```bash +cd src/doctr_service + +# Build +docker build -t doctr-api:cpu . + +# Run +docker run -d -p 8003:8000 \ + -v $(pwd)/../dataset:/app/dataset:ro \ + -v doctr-cache:/root/.cache/doctr \ + doctr-api:cpu + +# Test +curl http://localhost:8003/health +``` + +### GPU Version + +```bash +# Build GPU image +docker build -f Dockerfile.gpu -t doctr-api:gpu . + +# Run with GPU +docker run -d -p 8003:8000 --gpus all \ + -v $(pwd)/../dataset:/app/dataset:ro \ + -v doctr-cache:/root/.cache/doctr \ + doctr-api:gpu +``` + +## Files + +| File | Description | +|------|-------------| +| `doctr_tuning_rest.py` | FastAPI REST service with 9 tunable hyperparameters | +| `dataset_manager.py` | Dataset loader (shared with other services) | +| `Dockerfile` | CPU-only image (amd64 + arm64) | +| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) | +| `requirements.txt` | Python dependencies | + +## API Endpoints + +### `GET /health` + +Check if service is ready. + +```json +{ + "status": "ok", + "model_loaded": true, + "dataset_loaded": true, + "dataset_size": 24, + "det_arch": "db_resnet50", + "reco_arch": "crnn_vgg16_bn", + "cuda_available": true, + "device": "cuda", + "gpu_name": "NVIDIA GB10" +} +``` + +### `POST /evaluate` + +Run OCR evaluation with given hyperparameters. + +**Request (9 tunable parameters):** +```json +{ + "pdf_folder": "/app/dataset", + "assume_straight_pages": true, + "straighten_pages": false, + "preserve_aspect_ratio": true, + "symmetric_pad": true, + "disable_page_orientation": false, + "disable_crop_orientation": false, + "resolve_lines": true, + "resolve_blocks": false, + "paragraph_break": 0.035, + "start_page": 5, + "end_page": 10 +} +``` + +**Response:** +```json +{ + "CER": 0.0189, + "WER": 0.1023, + "TIME": 52.3, + "PAGES": 5, + "TIME_PER_PAGE": 10.46, + "model_reinitialized": false +} +``` + +**Note:** `model_reinitialized` indicates if the model was reloaded due to changed processing flags (adds ~2-5s overhead). + +## Hyperparameters + +### Processing Flags (Require Model Reinitialization) + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `assume_straight_pages` | true | Skip rotation handling for straight documents | +| `straighten_pages` | false | Pre-straighten pages before detection | +| `preserve_aspect_ratio` | true | Maintain document proportions during resize | +| `symmetric_pad` | true | Use symmetric padding when preserving aspect ratio | + +**Note:** Changing these flags requires model reinitialization (~2-5s). + +### Orientation Flags + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `disable_page_orientation` | false | Skip page orientation classification | +| `disable_crop_orientation` | false | Skip crop orientation detection | + +### Output Grouping + +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `resolve_lines` | true | bool | Group words into lines | +| `resolve_blocks` | false | bool | Group lines into blocks | +| `paragraph_break` | 0.035 | 0.0-1.0 | Minimum space ratio separating paragraphs | + +## Model Architecture + +DocTR uses a two-stage pipeline: + +1. **Detection** (`det_arch`): Localizes text regions + - Default: `db_resnet50` (DBNet with ResNet-50 backbone) + - Alternatives: `linknet_resnet18`, `db_mobilenet_v3_large` + +2. **Recognition** (`reco_arch`): Recognizes characters + - Default: `crnn_vgg16_bn` (CRNN with VGG-16 backbone) + - Alternatives: `sar_resnet31`, `master`, `vitstr_small` + +Architecture is set via environment variables (fixed at startup). + +## GPU Support + +### Platform Support + +| Platform | CPU | GPU | +|----------|-----|-----| +| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA | +| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) | +| macOS ARM64 (M1/M2) | ✅ | ❌ | + +### PyTorch CUDA on ARM64 + +Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index: + +```bash +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128 +``` + +This works on both amd64 and arm64 platforms with CUDA support. + +### GPU Detection + +DocTR automatically uses GPU when available: + +```python +import torch +print(torch.cuda.is_available()) # True if GPU available + +# DocTR model moves to GPU +model = ocr_predictor(pretrained=True) +if torch.cuda.is_available(): + model = model.cuda() +``` + +The `/health` endpoint shows GPU status: +```json +{ + "cuda_available": true, + "device": "cuda", + "gpu_name": "NVIDIA GB10", + "gpu_memory_total": "128.00 GB" +} +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `DOCTR_DET_ARCH` | `db_resnet50` | Detection architecture | +| `DOCTR_RECO_ARCH` | `crnn_vgg16_bn` | Recognition architecture | +| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection | + +## CI/CD + +Built images available from registry: + +| Image | Architecture | +|-------|--------------| +| `seryus.ddns.net/unir/doctr-cpu:latest` | amd64, arm64 | +| `seryus.ddns.net/unir/doctr-gpu:latest` | amd64, arm64 | + +## Sources + +- [DocTR Documentation](https://mindee.github.io/doctr/) +- [DocTR GitHub](https://github.com/mindee/doctr) +- [DocTR Model Usage](https://mindee.github.io/doctr/latest/using_doctr/using_models.html) +- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162) diff --git a/src/easyocr_service/Dockerfile.gpu b/src/easyocr_service/Dockerfile.gpu new file mode 100644 index 0000000..9b731cc --- /dev/null +++ b/src/easyocr_service/Dockerfile.gpu @@ -0,0 +1,67 @@ +# Dockerfile.gpu - EasyOCR GPU Dockerfile for amd64/arm64 +# +# Build: +# docker build -t easyocr-gpu:latest -f Dockerfile.gpu . +# +# Run: +# docker run --gpus all -p 8002:8000 -v ./dataset:/app/dataset easyocr-gpu:latest + +# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support +FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04 + +LABEL maintainer="Sergio Jimenez" +LABEL description="EasyOCR Tuning REST API - GPU/CUDA version" + +WORKDIR /app + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV CUDA_VISIBLE_DEVICES=0 +ENV EASYOCR_LANGUAGES=es,en + +# Install Python 3.12 and system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3-pip \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* \ + && ln -sf /usr/bin/python3.12 /usr/bin/python + +# Install PyTorch with CUDA support +# cu128 index has both amd64 and arm64 wheels +RUN python -m pip install --no-cache-dir \ + torch torchvision --index-url https://download.pytorch.org/whl/cu128 + +# Install EasyOCR and other dependencies +RUN python -m pip install --no-cache-dir \ + easyocr>=1.7.0 \ + fastapi>=0.104.0 \ + "uvicorn[standard]" \ + pydantic>=2.0.0 \ + jiwer>=3.0.0 \ + numpy>=1.24.0 \ + pillow>=10.0.0 + +# Copy application code +COPY easyocr_tuning_rest.py . +COPY dataset_manager.py . + +# Volume for dataset and model cache +VOLUME ["/app/dataset", "/root/.EasyOCR"] + +# Expose API port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1 + +# Run the API server +CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/easyocr_service/README.md b/src/easyocr_service/README.md new file mode 100644 index 0000000..d91fb9f --- /dev/null +++ b/src/easyocr_service/README.md @@ -0,0 +1,199 @@ +# EasyOCR Tuning REST API + +REST API service for EasyOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search. + +## Quick Start + +### CPU Version + +```bash +cd src/easyocr_service + +# Build +docker build -t easyocr-api:cpu . + +# Run +docker run -d -p 8002:8000 \ + -v $(pwd)/../dataset:/app/dataset:ro \ + -v easyocr-cache:/root/.EasyOCR \ + easyocr-api:cpu + +# Test +curl http://localhost:8002/health +``` + +### GPU Version + +```bash +# Build GPU image +docker build -f Dockerfile.gpu -t easyocr-api:gpu . + +# Run with GPU +docker run -d -p 8002:8000 --gpus all \ + -v $(pwd)/../dataset:/app/dataset:ro \ + -v easyocr-cache:/root/.EasyOCR \ + easyocr-api:gpu +``` + +## Files + +| File | Description | +|------|-------------| +| `easyocr_tuning_rest.py` | FastAPI REST service with 14 tunable hyperparameters | +| `dataset_manager.py` | Dataset loader (shared with other services) | +| `Dockerfile` | CPU-only image (amd64 + arm64) | +| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) | +| `requirements.txt` | Python dependencies | + +## API Endpoints + +### `GET /health` + +Check if service is ready. + +```json +{ + "status": "ok", + "model_loaded": true, + "dataset_loaded": true, + "dataset_size": 24, + "languages": ["es", "en"], + "cuda_available": true, + "device": "cuda", + "gpu_name": "NVIDIA GB10" +} +``` + +### `POST /evaluate` + +Run OCR evaluation with given hyperparameters. + +**Request (14 tunable parameters):** +```json +{ + "pdf_folder": "/app/dataset", + "text_threshold": 0.7, + "low_text": 0.4, + "link_threshold": 0.4, + "slope_ths": 0.1, + "ycenter_ths": 0.5, + "height_ths": 0.5, + "width_ths": 0.5, + "add_margin": 0.1, + "contrast_ths": 0.1, + "adjust_contrast": 0.5, + "decoder": "greedy", + "beamWidth": 5, + "min_size": 10, + "rotation_info": null, + "start_page": 5, + "end_page": 10 +} +``` + +**Response:** +```json +{"CER": 0.0234, "WER": 0.1156, "TIME": 45.2, "PAGES": 5, "TIME_PER_PAGE": 9.04} +``` + +## Hyperparameters + +### Detection (CRAFT Algorithm) + +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `text_threshold` | 0.7 | 0.0-1.0 | Text confidence threshold | +| `low_text` | 0.4 | 0.0-1.0 | Text lower-bound score | +| `link_threshold` | 0.4 | 0.0-1.0 | Link confidence threshold | + +### Bounding Box Merging + +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `slope_ths` | 0.1 | 0.0-1.0 | Max slope for merging | +| `ycenter_ths` | 0.5 | 0.0-2.0 | Max vertical shift | +| `height_ths` | 0.5 | 0.0-2.0 | Max height variance | +| `width_ths` | 0.5 | 0.0-2.0 | Max horizontal distance | +| `add_margin` | 0.1 | 0.0-1.0 | Bounding box extension | + +### Contrast + +| Parameter | Default | Range | Description | +|-----------|---------|-------|-------------| +| `contrast_ths` | 0.1 | 0.0-1.0 | Contrast threshold for dual-pass | +| `adjust_contrast` | 0.5 | 0.0-1.0 | Target contrast level | + +### Decoder + +| Parameter | Default | Options | Description | +|-----------|---------|---------|-------------| +| `decoder` | "greedy" | greedy, beamsearch, wordbeamsearch | Decoding method | +| `beamWidth` | 5 | 1-20 | Beam width (for beam search) | + +### Other + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `min_size` | 10 | Minimum text box pixels | +| `rotation_info` | null | Rotation angles to try: [90, 180, 270] | + +## GPU Support + +### Platform Support + +| Platform | CPU | GPU | +|----------|-----|-----| +| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA | +| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) | +| macOS ARM64 (M1/M2) | ✅ | ❌ | + +### PyTorch CUDA on ARM64 + +Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index: + +```bash +pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128 +``` + +This works on both amd64 and arm64 platforms with CUDA support. + +### GPU Detection + +EasyOCR automatically uses GPU when PyTorch CUDA is available: + +```python +import torch +print(torch.cuda.is_available()) # True if GPU available +``` + +The `/health` endpoint shows GPU status: +```json +{ + "cuda_available": true, + "device": "cuda", + "gpu_name": "NVIDIA GB10", + "gpu_memory_total": "128.00 GB" +} +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `EASYOCR_LANGUAGES` | `es,en` | Comma-separated language codes | +| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection | + +## CI/CD + +Built images available from registry: + +| Image | Architecture | +|-------|--------------| +| `seryus.ddns.net/unir/easyocr-cpu:latest` | amd64, arm64 | +| `seryus.ddns.net/unir/easyocr-gpu:latest` | amd64, arm64 | + +## Sources + +- [EasyOCR Documentation](https://www.jaided.ai/easyocr/documentation/) +- [EasyOCR GitHub](https://github.com/JaidedAI/EasyOCR) +- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)