Paddle ocr gpu support. #4
@@ -9,7 +9,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- gpu_support
|
||||
|
||||
env:
|
||||
PADDLE_VERSION: "3.0.0"
|
||||
@@ -24,7 +23,9 @@ jobs:
|
||||
image_cpu: seryus.ddns.net/unir/paddle-ocr-cpu
|
||||
image_gpu: seryus.ddns.net/unir/paddle-ocr-gpu
|
||||
image_easyocr: seryus.ddns.net/unir/easyocr-cpu
|
||||
image_easyocr_gpu: seryus.ddns.net/unir/easyocr-gpu
|
||||
image_doctr: seryus.ddns.net/unir/doctr-cpu
|
||||
image_doctr_gpu: seryus.ddns.net/unir/doctr-gpu
|
||||
steps:
|
||||
- name: Output version info
|
||||
run: |
|
||||
@@ -315,3 +316,139 @@ jobs:
|
||||
docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }} \
|
||||
${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }}-amd64 \
|
||||
${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }}-arm64
|
||||
|
||||
# EasyOCR GPU image: Matrix build for amd64 and arm64
|
||||
# PyTorch cu128 has wheels for both architectures
|
||||
build_easyocr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Get arch suffix
|
||||
id: arch
|
||||
run: |
|
||||
if [ "${{ matrix.platform }}" = "linux/amd64" ]; then
|
||||
echo "suffix=amd64" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "suffix=arm64" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push EasyOCR GPU image (${{ matrix.platform }})
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/easyocr_service
|
||||
file: src/easyocr_service/Dockerfile.gpu
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }}
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:${{ steps.arch.outputs.suffix }}
|
||||
|
||||
# DocTR GPU image: Matrix build for amd64 and arm64
|
||||
# PyTorch cu128 has wheels for both architectures
|
||||
build_doctr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Get arch suffix
|
||||
id: arch
|
||||
run: |
|
||||
if [ "${{ matrix.platform }}" = "linux/amd64" ]; then
|
||||
echo "suffix=amd64" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "suffix=arm64" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Build and push DocTR GPU image (${{ matrix.platform }})
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/doctr_service
|
||||
file: src/doctr_service/Dockerfile.gpu
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-${{ steps.arch.outputs.suffix }}
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:${{ steps.arch.outputs.suffix }}
|
||||
|
||||
# Create multi-arch manifest for EasyOCR GPU image
|
||||
manifest_easyocr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [essential, build_easyocr_gpu]
|
||||
steps:
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Create multi-arch manifest (EasyOCR GPU)
|
||||
run: |
|
||||
docker buildx imagetools create -t ${{ needs.essential.outputs.image_easyocr_gpu }}:latest \
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:amd64 \
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:arm64
|
||||
docker buildx imagetools create -t ${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }} \
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}-arm64
|
||||
|
||||
# Create multi-arch manifest for DocTR GPU image
|
||||
manifest_doctr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [essential, build_doctr_gpu]
|
||||
steps:
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Create multi-arch manifest (DocTR GPU)
|
||||
run: |
|
||||
docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr_gpu }}:latest \
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:amd64 \
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:arm64
|
||||
docker buildx imagetools create -t ${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }} \
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-amd64 \
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}-arm64
|
||||
|
||||
68
src/doctr_service/Dockerfile.gpu
Normal file
68
src/doctr_service/Dockerfile.gpu
Normal file
@@ -0,0 +1,68 @@
|
||||
# Dockerfile.gpu - DocTR GPU Dockerfile for amd64/arm64
|
||||
#
|
||||
# Build:
|
||||
# docker build -t doctr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Run:
|
||||
# docker run --gpus all -p 8003:8000 -v ./dataset:/app/dataset doctr-gpu:latest
|
||||
|
||||
# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
|
||||
FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="DocTR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV DOCTR_DET_ARCH=db_resnet50
|
||||
ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
|
||||
# Install Python 3.12 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.12 \
|
||||
python3.12-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.12 /usr/bin/python
|
||||
|
||||
# Install PyTorch with CUDA support
|
||||
# cu128 index has both amd64 and arm64 wheels
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
# Install DocTR and other dependencies
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
"python-doctr[torch]>=0.8.0" \
|
||||
fastapi>=0.104.0 \
|
||||
"uvicorn[standard]" \
|
||||
pydantic>=2.0.0 \
|
||||
jiwer>=3.0.0 \
|
||||
numpy>=1.24.0 \
|
||||
pillow>=10.0.0
|
||||
|
||||
# Copy application code
|
||||
COPY doctr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.cache/doctr"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check (longer start period for model download)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
212
src/doctr_service/README.md
Normal file
212
src/doctr_service/README.md
Normal file
@@ -0,0 +1,212 @@
|
||||
# DocTR Tuning REST API
|
||||
|
||||
REST API service for DocTR (Document Text Recognition) hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### CPU Version
|
||||
|
||||
```bash
|
||||
cd src/doctr_service
|
||||
|
||||
# Build
|
||||
docker build -t doctr-api:cpu .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8003:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v doctr-cache:/root/.cache/doctr \
|
||||
doctr-api:cpu
|
||||
|
||||
# Test
|
||||
curl http://localhost:8003/health
|
||||
```
|
||||
|
||||
### GPU Version
|
||||
|
||||
```bash
|
||||
# Build GPU image
|
||||
docker build -f Dockerfile.gpu -t doctr-api:gpu .
|
||||
|
||||
# Run with GPU
|
||||
docker run -d -p 8003:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v doctr-cache:/root/.cache/doctr \
|
||||
doctr-api:gpu
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `doctr_tuning_rest.py` | FastAPI REST service with 9 tunable hyperparameters |
|
||||
| `dataset_manager.py` | Dataset loader (shared with other services) |
|
||||
| `Dockerfile` | CPU-only image (amd64 + arm64) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
|
||||
| `requirements.txt` | Python dependencies |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"model_loaded": true,
|
||||
"dataset_loaded": true,
|
||||
"dataset_size": 24,
|
||||
"det_arch": "db_resnet50",
|
||||
"reco_arch": "crnn_vgg16_bn",
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10"
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request (9 tunable parameters):**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"assume_straight_pages": true,
|
||||
"straighten_pages": false,
|
||||
"preserve_aspect_ratio": true,
|
||||
"symmetric_pad": true,
|
||||
"disable_page_orientation": false,
|
||||
"disable_crop_orientation": false,
|
||||
"resolve_lines": true,
|
||||
"resolve_blocks": false,
|
||||
"paragraph_break": 0.035,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"CER": 0.0189,
|
||||
"WER": 0.1023,
|
||||
"TIME": 52.3,
|
||||
"PAGES": 5,
|
||||
"TIME_PER_PAGE": 10.46,
|
||||
"model_reinitialized": false
|
||||
}
|
||||
```
|
||||
|
||||
**Note:** `model_reinitialized` indicates if the model was reloaded due to changed processing flags (adds ~2-5s overhead).
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
### Processing Flags (Require Model Reinitialization)
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `assume_straight_pages` | true | Skip rotation handling for straight documents |
|
||||
| `straighten_pages` | false | Pre-straighten pages before detection |
|
||||
| `preserve_aspect_ratio` | true | Maintain document proportions during resize |
|
||||
| `symmetric_pad` | true | Use symmetric padding when preserving aspect ratio |
|
||||
|
||||
**Note:** Changing these flags requires model reinitialization (~2-5s).
|
||||
|
||||
### Orientation Flags
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `disable_page_orientation` | false | Skip page orientation classification |
|
||||
| `disable_crop_orientation` | false | Skip crop orientation detection |
|
||||
|
||||
### Output Grouping
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `resolve_lines` | true | bool | Group words into lines |
|
||||
| `resolve_blocks` | false | bool | Group lines into blocks |
|
||||
| `paragraph_break` | 0.035 | 0.0-1.0 | Minimum space ratio separating paragraphs |
|
||||
|
||||
## Model Architecture
|
||||
|
||||
DocTR uses a two-stage pipeline:
|
||||
|
||||
1. **Detection** (`det_arch`): Localizes text regions
|
||||
- Default: `db_resnet50` (DBNet with ResNet-50 backbone)
|
||||
- Alternatives: `linknet_resnet18`, `db_mobilenet_v3_large`
|
||||
|
||||
2. **Recognition** (`reco_arch`): Recognizes characters
|
||||
- Default: `crnn_vgg16_bn` (CRNN with VGG-16 backbone)
|
||||
- Alternatives: `sar_resnet31`, `master`, `vitstr_small`
|
||||
|
||||
Architecture is set via environment variables (fixed at startup).
|
||||
|
||||
## GPU Support
|
||||
|
||||
### Platform Support
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
|
||||
| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
|
||||
### PyTorch CUDA on ARM64
|
||||
|
||||
Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
|
||||
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
```
|
||||
|
||||
This works on both amd64 and arm64 platforms with CUDA support.
|
||||
|
||||
### GPU Detection
|
||||
|
||||
DocTR automatically uses GPU when available:
|
||||
|
||||
```python
|
||||
import torch
|
||||
print(torch.cuda.is_available()) # True if GPU available
|
||||
|
||||
# DocTR model moves to GPU
|
||||
model = ocr_predictor(pretrained=True)
|
||||
if torch.cuda.is_available():
|
||||
model = model.cuda()
|
||||
```
|
||||
|
||||
The `/health` endpoint shows GPU status:
|
||||
```json
|
||||
{
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10",
|
||||
"gpu_memory_total": "128.00 GB"
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `DOCTR_DET_ARCH` | `db_resnet50` | Detection architecture |
|
||||
| `DOCTR_RECO_ARCH` | `crnn_vgg16_bn` | Recognition architecture |
|
||||
| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
|
||||
|
||||
## CI/CD
|
||||
|
||||
Built images available from registry:
|
||||
|
||||
| Image | Architecture |
|
||||
|-------|--------------|
|
||||
| `seryus.ddns.net/unir/doctr-cpu:latest` | amd64, arm64 |
|
||||
| `seryus.ddns.net/unir/doctr-gpu:latest` | amd64, arm64 |
|
||||
|
||||
## Sources
|
||||
|
||||
- [DocTR Documentation](https://mindee.github.io/doctr/)
|
||||
- [DocTR GitHub](https://github.com/mindee/doctr)
|
||||
- [DocTR Model Usage](https://mindee.github.io/doctr/latest/using_doctr/using_models.html)
|
||||
- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
|
||||
67
src/easyocr_service/Dockerfile.gpu
Normal file
67
src/easyocr_service/Dockerfile.gpu
Normal file
@@ -0,0 +1,67 @@
|
||||
# Dockerfile.gpu - EasyOCR GPU Dockerfile for amd64/arm64
|
||||
#
|
||||
# Build:
|
||||
# docker build -t easyocr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Run:
|
||||
# docker run --gpus all -p 8002:8000 -v ./dataset:/app/dataset easyocr-gpu:latest
|
||||
|
||||
# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
|
||||
FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="EasyOCR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV EASYOCR_LANGUAGES=es,en
|
||||
|
||||
# Install Python 3.12 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.12 \
|
||||
python3.12-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.12 /usr/bin/python
|
||||
|
||||
# Install PyTorch with CUDA support
|
||||
# cu128 index has both amd64 and arm64 wheels
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
# Install EasyOCR and other dependencies
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
easyocr>=1.7.0 \
|
||||
fastapi>=0.104.0 \
|
||||
"uvicorn[standard]" \
|
||||
pydantic>=2.0.0 \
|
||||
jiwer>=3.0.0 \
|
||||
numpy>=1.24.0 \
|
||||
pillow>=10.0.0
|
||||
|
||||
# Copy application code
|
||||
COPY easyocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.EasyOCR"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
199
src/easyocr_service/README.md
Normal file
199
src/easyocr_service/README.md
Normal file
@@ -0,0 +1,199 @@
|
||||
# EasyOCR Tuning REST API
|
||||
|
||||
REST API service for EasyOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### CPU Version
|
||||
|
||||
```bash
|
||||
cd src/easyocr_service
|
||||
|
||||
# Build
|
||||
docker build -t easyocr-api:cpu .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8002:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v easyocr-cache:/root/.EasyOCR \
|
||||
easyocr-api:cpu
|
||||
|
||||
# Test
|
||||
curl http://localhost:8002/health
|
||||
```
|
||||
|
||||
### GPU Version
|
||||
|
||||
```bash
|
||||
# Build GPU image
|
||||
docker build -f Dockerfile.gpu -t easyocr-api:gpu .
|
||||
|
||||
# Run with GPU
|
||||
docker run -d -p 8002:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v easyocr-cache:/root/.EasyOCR \
|
||||
easyocr-api:gpu
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `easyocr_tuning_rest.py` | FastAPI REST service with 14 tunable hyperparameters |
|
||||
| `dataset_manager.py` | Dataset loader (shared with other services) |
|
||||
| `Dockerfile` | CPU-only image (amd64 + arm64) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
|
||||
| `requirements.txt` | Python dependencies |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"model_loaded": true,
|
||||
"dataset_loaded": true,
|
||||
"dataset_size": 24,
|
||||
"languages": ["es", "en"],
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10"
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request (14 tunable parameters):**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"text_threshold": 0.7,
|
||||
"low_text": 0.4,
|
||||
"link_threshold": 0.4,
|
||||
"slope_ths": 0.1,
|
||||
"ycenter_ths": 0.5,
|
||||
"height_ths": 0.5,
|
||||
"width_ths": 0.5,
|
||||
"add_margin": 0.1,
|
||||
"contrast_ths": 0.1,
|
||||
"adjust_contrast": 0.5,
|
||||
"decoder": "greedy",
|
||||
"beamWidth": 5,
|
||||
"min_size": 10,
|
||||
"rotation_info": null,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{"CER": 0.0234, "WER": 0.1156, "TIME": 45.2, "PAGES": 5, "TIME_PER_PAGE": 9.04}
|
||||
```
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
### Detection (CRAFT Algorithm)
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `text_threshold` | 0.7 | 0.0-1.0 | Text confidence threshold |
|
||||
| `low_text` | 0.4 | 0.0-1.0 | Text lower-bound score |
|
||||
| `link_threshold` | 0.4 | 0.0-1.0 | Link confidence threshold |
|
||||
|
||||
### Bounding Box Merging
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `slope_ths` | 0.1 | 0.0-1.0 | Max slope for merging |
|
||||
| `ycenter_ths` | 0.5 | 0.0-2.0 | Max vertical shift |
|
||||
| `height_ths` | 0.5 | 0.0-2.0 | Max height variance |
|
||||
| `width_ths` | 0.5 | 0.0-2.0 | Max horizontal distance |
|
||||
| `add_margin` | 0.1 | 0.0-1.0 | Bounding box extension |
|
||||
|
||||
### Contrast
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `contrast_ths` | 0.1 | 0.0-1.0 | Contrast threshold for dual-pass |
|
||||
| `adjust_contrast` | 0.5 | 0.0-1.0 | Target contrast level |
|
||||
|
||||
### Decoder
|
||||
|
||||
| Parameter | Default | Options | Description |
|
||||
|-----------|---------|---------|-------------|
|
||||
| `decoder` | "greedy" | greedy, beamsearch, wordbeamsearch | Decoding method |
|
||||
| `beamWidth` | 5 | 1-20 | Beam width (for beam search) |
|
||||
|
||||
### Other
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `min_size` | 10 | Minimum text box pixels |
|
||||
| `rotation_info` | null | Rotation angles to try: [90, 180, 270] |
|
||||
|
||||
## GPU Support
|
||||
|
||||
### Platform Support
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
|
||||
| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
|
||||
### PyTorch CUDA on ARM64
|
||||
|
||||
Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
|
||||
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
```
|
||||
|
||||
This works on both amd64 and arm64 platforms with CUDA support.
|
||||
|
||||
### GPU Detection
|
||||
|
||||
EasyOCR automatically uses GPU when PyTorch CUDA is available:
|
||||
|
||||
```python
|
||||
import torch
|
||||
print(torch.cuda.is_available()) # True if GPU available
|
||||
```
|
||||
|
||||
The `/health` endpoint shows GPU status:
|
||||
```json
|
||||
{
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10",
|
||||
"gpu_memory_total": "128.00 GB"
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `EASYOCR_LANGUAGES` | `es,en` | Comma-separated language codes |
|
||||
| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
|
||||
|
||||
## CI/CD
|
||||
|
||||
Built images available from registry:
|
||||
|
||||
| Image | Architecture |
|
||||
|-------|--------------|
|
||||
| `seryus.ddns.net/unir/easyocr-cpu:latest` | amd64, arm64 |
|
||||
| `seryus.ddns.net/unir/easyocr-gpu:latest` | amd64, arm64 |
|
||||
|
||||
## Sources
|
||||
|
||||
- [EasyOCR Documentation](https://www.jaided.ai/easyocr/documentation/)
|
||||
- [EasyOCR GitHub](https://github.com/JaidedAI/EasyOCR)
|
||||
- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
|
||||
Reference in New Issue
Block a user