Paddle ocr, easyicr and doctr gpu support. (#4)

2026-01-19 17:35:24 +00:00
parent 8e2b7a5096
commit c7ed7b2b9c
105 changed files with 8170 additions and 1263 deletions
--- a/src/doctr_service/Dockerfile
+++ b/src/doctr_service/Dockerfile
@@ -0,0 +1,49 @@
+# Dockerfile - DocTR Tuning REST API
+#
+# Build:
+#   docker build -t doctr-api:latest .
+#
+# Run:
+#   docker run -p 8003:8000 -v ./dataset:/app/dataset doctr-api:latest
+
+FROM python:3.11-slim
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="DocTR Tuning REST API"
+
+WORKDIR /app
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV DOCTR_DET_ARCH=db_resnet50
+ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
+
+# Install system dependencies for OpenCV and image processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY doctr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.cache/doctr"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check (longer start period for model download)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/doctr_service/Dockerfile.gpu
+++ b/src/doctr_service/Dockerfile.gpu
@@ -0,0 +1,63 @@
+# Dockerfile.gpu - DocTR GPU Dockerfile for amd64/arm64
+#
+# Build:
+#   docker build -t doctr-gpu:latest -f Dockerfile.gpu .
+#
+# Run:
+#   docker run --gpus all -p 8003:8000 -v ./dataset:/app/dataset doctr-gpu:latest
+
+# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
+FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="DocTR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_VISIBLE_DEVICES=0
+ENV DOCTR_DET_ARCH=db_resnet50
+ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
+
+# Install Python 3.12 and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.12 \
+    python3.12-venv \
+    python3-pip \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3.12 /usr/bin/python
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install PyTorch with CUDA support first (cu128 index has amd64 + arm64 wheels)
+RUN python -m pip install --no-cache-dir --break-system-packages \
+    torch torchvision --index-url https://download.pytorch.org/whl/cu128
+
+# Install remaining dependencies from requirements.txt (skip torch, already installed)
+RUN grep -v "^torch" requirements.txt | python -m pip install --no-cache-dir --break-system-packages -r /dev/stdin
+
+# Copy application code
+COPY doctr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.cache/doctr"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check (longer start period for model download)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/doctr_service/README.md
+++ b/src/doctr_service/README.md
@@ -0,0 +1,261 @@
+# DocTR Tuning REST API
+
+REST API service for DocTR (Document Text Recognition) hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
+
+## Quick Start
+
+### CPU Version
+
+```bash
+cd src/doctr_service
+
+# Build
+docker build -t doctr-api:cpu .
+
+# Run
+docker run -d -p 8003:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:cpu
+
+# Test
+curl http://localhost:8003/health
+```
+
+### GPU Version
+
+```bash
+# Build GPU image
+docker build -f Dockerfile.gpu -t doctr-api:gpu .
+
+# Run with GPU
+docker run -d -p 8003:8000 --gpus all \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:gpu
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `doctr_tuning_rest.py` | FastAPI REST service with 9 tunable hyperparameters |
+| `dataset_manager.py` | Dataset loader (shared with other services) |
+| `Dockerfile` | CPU-only image (amd64 + arm64) |
+| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
+| `requirements.txt` | Python dependencies |
+
+## API Endpoints
+
+### `GET /health`
+
+Check if service is ready.
+
+```json
+{
+  "status": "ok",
+  "model_loaded": true,
+  "dataset_loaded": true,
+  "dataset_size": 24,
+  "det_arch": "db_resnet50",
+  "reco_arch": "crnn_vgg16_bn",
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10"
+}
+```
+
+### `POST /evaluate`
+
+Run OCR evaluation with given hyperparameters.
+
+**Request (9 tunable parameters):**
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "assume_straight_pages": true,
+  "straighten_pages": false,
+  "preserve_aspect_ratio": true,
+  "symmetric_pad": true,
+  "disable_page_orientation": false,
+  "disable_crop_orientation": false,
+  "resolve_lines": true,
+  "resolve_blocks": false,
+  "paragraph_break": 0.035,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+**Response:**
+```json
+{
+  "CER": 0.0189,
+  "WER": 0.1023,
+  "TIME": 52.3,
+  "PAGES": 5,
+  "TIME_PER_PAGE": 10.46,
+  "model_reinitialized": false
+}
+```
+
+**Note:** `model_reinitialized` indicates if the model was reloaded due to changed processing flags (adds ~2-5s overhead).
+
+## Debug Output (debugset)
+
+The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
+
+### Enable Debug Output
+
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "save_output": true,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+### Output Structure
+
+```
+debugset/
+├── doc1/
+│   └── doctr/
+│       ├── page_0005.txt
+│       ├── page_0006.txt
+│       └── ...
+├── doc2/
+│   └── doctr/
+│       └── ...
+```
+
+Each `.txt` file contains the OCR-extracted text for that page.
+
+### Docker Mount
+
+Add the debugset volume to your docker run command:
+
+```bash
+docker run -d -p 8003:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v $(pwd)/../debugset:/app/debugset:rw \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:cpu
+```
+
+### Use Cases
+
+- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
+- **Debug hyperparameters**: See how different settings affect text extraction
+- **Ground truth comparison**: Compare predictions against expected output
+
+## Hyperparameters
+
+### Processing Flags (Require Model Reinitialization)
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `assume_straight_pages` | true | Skip rotation handling for straight documents |
+| `straighten_pages` | false | Pre-straighten pages before detection |
+| `preserve_aspect_ratio` | true | Maintain document proportions during resize |
+| `symmetric_pad` | true | Use symmetric padding when preserving aspect ratio |
+
+**Note:** Changing these flags requires model reinitialization (~2-5s).
+
+### Orientation Flags
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `disable_page_orientation` | false | Skip page orientation classification |
+| `disable_crop_orientation` | false | Skip crop orientation detection |
+
+### Output Grouping
+
+| Parameter | Default | Range | Description |
+|-----------|---------|-------|-------------|
+| `resolve_lines` | true | bool | Group words into lines |
+| `resolve_blocks` | false | bool | Group lines into blocks |
+| `paragraph_break` | 0.035 | 0.0-1.0 | Minimum space ratio separating paragraphs |
+
+## Model Architecture
+
+DocTR uses a two-stage pipeline:
+
+1. **Detection** (`det_arch`): Localizes text regions
+   - Default: `db_resnet50` (DBNet with ResNet-50 backbone)
+   - Alternatives: `linknet_resnet18`, `db_mobilenet_v3_large`
+
+2. **Recognition** (`reco_arch`): Recognizes characters
+   - Default: `crnn_vgg16_bn` (CRNN with VGG-16 backbone)
+   - Alternatives: `sar_resnet31`, `master`, `vitstr_small`
+
+Architecture is set via environment variables (fixed at startup).
+
+## GPU Support
+
+### Platform Support
+
+| Platform | CPU | GPU |
+|----------|-----|-----|
+| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
+| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
+| macOS ARM64 (M1/M2) | ✅ | ❌ |
+
+### PyTorch CUDA on ARM64
+
+Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
+
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
+```
+
+This works on both amd64 and arm64 platforms with CUDA support.
+
+### GPU Detection
+
+DocTR automatically uses GPU when available:
+
+```python
+import torch
+print(torch.cuda.is_available())  # True if GPU available
+
+# DocTR model moves to GPU
+model = ocr_predictor(pretrained=True)
+if torch.cuda.is_available():
+    model = model.cuda()
+```
+
+The `/health` endpoint shows GPU status:
+```json
+{
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10",
+  "gpu_memory_total": "128.00 GB"
+}
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `DOCTR_DET_ARCH` | `db_resnet50` | Detection architecture |
+| `DOCTR_RECO_ARCH` | `crnn_vgg16_bn` | Recognition architecture |
+| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
+
+## CI/CD
+
+Built images available from registry:
+
+| Image | Architecture |
+|-------|--------------|
+| `seryus.ddns.net/unir/doctr-cpu:latest` | amd64, arm64 |
+| `seryus.ddns.net/unir/doctr-gpu:latest` | amd64, arm64 |
+
+## Sources
+
+- [DocTR Documentation](https://mindee.github.io/doctr/)
+- [DocTR GitHub](https://github.com/mindee/doctr)
+- [DocTR Model Usage](https://mindee.github.io/doctr/latest/using_doctr/using_models.html)
+- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
--- a/src/doctr_service/dataset_manager.py
+++ b/src/doctr_service/dataset_manager.py
@@ -0,0 +1,74 @@
+# Imports
+import os
+from PIL import Image
+
+
+class ImageTextDataset:
+    def __init__(self, root):
+        self.samples = []
+
+        for folder in sorted(os.listdir(root)):
+            sub = os.path.join(root, folder)
+            img_dir = os.path.join(sub, "img")
+            txt_dir = os.path.join(sub, "txt")
+
+            if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
+                continue
+
+            for fname in sorted(os.listdir(img_dir)):
+                if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
+                    continue
+
+                img_path = os.path.join(img_dir, fname)
+
+                # text file must have same name but .txt
+                txt_name = os.path.splitext(fname)[0] + ".txt"
+                txt_path = os.path.join(txt_dir, txt_name)
+
+                if not os.path.exists(txt_path):
+                    continue
+
+                self.samples.append((img_path, txt_path))
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, idx):
+        img_path, txt_path = self.samples[idx]
+
+        # Load image
+        image = Image.open(img_path).convert("RGB")
+
+        # Load text
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+
+        return image, text
+
+    def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
+        """Get output path for saving OCR result to debugset folder.
+
+        Args:
+            idx: Sample index
+            output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
+            debugset_root: Root folder for debug output (default: /app/debugset)
+
+        Returns:
+            Path like /app/debugset/doc1/{output_subdir}/page_001.txt
+        """
+        img_path, _ = self.samples[idx]
+        # img_path: /app/dataset/doc1/img/page_001.png
+        # Extract relative path: doc1/img/page_001.png
+        parts = img_path.split("/dataset/", 1)
+        if len(parts) == 2:
+            rel_path = parts[1]  # doc1/img/page_001.png
+        else:
+            rel_path = os.path.basename(img_path)
+
+        # Replace /img/ with /{output_subdir}/
+        rel_parts = rel_path.rsplit("/img/", 1)
+        doc_folder = rel_parts[0]  # doc1
+        fname = os.path.splitext(rel_parts[1])[0] + ".txt"  # page_001.txt
+
+        out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
+        os.makedirs(out_dir, exist_ok=True)
+        return os.path.join(out_dir, fname)
--- a/src/doctr_service/docker-compose.yml
+++ b/src/doctr_service/docker-compose.yml
@@ -0,0 +1,63 @@
+# docker-compose.yml - DocTR REST API
+# Usage:
+#   CPU:  docker compose up ocr-cpu
+#   GPU:  docker compose up ocr-gpu
+#
+# Port: 8003
+
+services:
+  # CPU-only service
+  ocr-cpu:
+    image: seryus.ddns.net/unir/doctr-cpu:latest
+    container_name: doctr-cpu
+    ports:
+      - "8003:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+  # GPU service (requires NVIDIA Container Toolkit)
+  ocr-gpu:
+    image: seryus.ddns.net/unir/doctr-gpu:latest
+    container_name: doctr-gpu
+    ports:
+      - "8003:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+volumes:
+  doctr-cache:
+    name: doctr-model-cache
--- a/src/doctr_service/doctr_tuning_rest.py
+++ b/src/doctr_service/doctr_tuning_rest.py
@@ -0,0 +1,336 @@
+# doctr_tuning_rest.py
+# FastAPI REST service for DocTR hyperparameter evaluation
+# Usage: uvicorn doctr_tuning_rest:app --host 0.0.0.0 --port 8000
+
+import os
+import re
+import time
+import threading
+from typing import Optional
+from contextlib import asynccontextmanager
+
+import numpy as np
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+from doctr.models import ocr_predictor
+from jiwer import wer, cer
+from dataset_manager import ImageTextDataset
+
+
+def get_gpu_info() -> dict:
+    """Get GPU status information from PyTorch."""
+    info = {
+        "cuda_available": torch.cuda.is_available(),
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = torch.cuda.device_count()
+            if info["gpu_count"] > 0:
+                info["gpu_name"] = torch.cuda.get_device_name(0)
+                info["gpu_memory_total"] = f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB"
+                info["gpu_memory_used"] = f"{torch.cuda.memory_allocated(0) / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
+# Model configuration via environment variables
+DEFAULT_DET_ARCH = os.environ.get("DOCTR_DET_ARCH", "db_resnet50")
+DEFAULT_RECO_ARCH = os.environ.get("DOCTR_RECO_ARCH", "crnn_vgg16_bn")
+
+
+# Global state for model and dataset
+class AppState:
+    model: Optional[object] = None
+    dataset: Optional[ImageTextDataset] = None
+    dataset_path: Optional[str] = None
+    det_arch: str = DEFAULT_DET_ARCH
+    reco_arch: str = DEFAULT_RECO_ARCH
+    # Track current model config for cache invalidation
+    current_config: Optional[dict] = None
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    lock: threading.Lock = None  # Protects OCR model from concurrent access
+
+    def __init__(self):
+        self.lock = threading.Lock()
+
+
+state = AppState()
+
+
+def create_model(
+    assume_straight_pages: bool = True,
+    straighten_pages: bool = False,
+    preserve_aspect_ratio: bool = True,
+    symmetric_pad: bool = True,
+    disable_page_orientation: bool = False,
+    disable_crop_orientation: bool = False,
+) -> object:
+    """Create DocTR model with given configuration."""
+    model = ocr_predictor(
+        det_arch=state.det_arch,
+        reco_arch=state.reco_arch,
+        pretrained=True,
+        assume_straight_pages=assume_straight_pages,
+        straighten_pages=straighten_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+    )
+
+    # Apply orientation settings if supported
+    if hasattr(model, 'disable_page_orientation'):
+        model.disable_page_orientation = disable_page_orientation
+    if hasattr(model, 'disable_crop_orientation'):
+        model.disable_crop_orientation = disable_crop_orientation
+
+    # Move to GPU if available
+    if state.device == "cuda":
+        model = model.cuda()
+
+    return model
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load DocTR model at startup with default configuration."""
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
+    print(f"Loading DocTR models...")
+    print(f"  Detection: {state.det_arch}")
+    print(f"  Recognition: {state.reco_arch}")
+
+    # Load with default config
+    state.model = create_model()
+    state.current_config = {
+        "assume_straight_pages": True,
+        "straighten_pages": False,
+        "preserve_aspect_ratio": True,
+        "symmetric_pad": True,
+        "disable_page_orientation": False,
+        "disable_crop_orientation": False,
+    }
+
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
+    print("Model loaded successfully!")
+    yield
+    state.model = None
+    state.dataset = None
+
+
+app = FastAPI(
+    title="DocTR Tuning API",
+    description="REST API for DocTR hyperparameter evaluation",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
+class EvaluateRequest(BaseModel):
+    """Request schema with all tunable DocTR hyperparameters."""
+    pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
+
+    # Processing flags (require model reinit)
+    assume_straight_pages: bool = Field(True, description="Skip rotation handling for straight documents")
+    straighten_pages: bool = Field(False, description="Pre-straighten pages before detection")
+    preserve_aspect_ratio: bool = Field(True, description="Maintain document proportions during resize")
+    symmetric_pad: bool = Field(True, description="Use symmetric padding when preserving aspect ratio")
+
+    # Orientation flags
+    disable_page_orientation: bool = Field(False, description="Skip page orientation classification")
+    disable_crop_orientation: bool = Field(False, description="Skip crop orientation detection")
+
+    # Output grouping
+    resolve_lines: bool = Field(True, description="Group words into lines")
+    resolve_blocks: bool = Field(False, description="Group lines into blocks")
+    paragraph_break: float = Field(0.035, ge=0.0, le=1.0, description="Minimum space ratio separating paragraphs")
+
+    # Page range
+    start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
+    end_page: int = Field(10, ge=1, description="End page index (exclusive)")
+    save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
+
+
+class EvaluateResponse(BaseModel):
+    """Response schema matching CLI output."""
+    CER: float
+    WER: float
+    TIME: float
+    PAGES: int
+    TIME_PER_PAGE: float
+    model_reinitialized: bool = False
+
+
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    dataset_loaded: bool
+    dataset_size: Optional[int] = None
+    det_arch: Optional[str] = None
+    reco_arch: Optional[str] = None
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None
+
+
+def doctr_result_to_text(result, resolve_lines: bool = True, resolve_blocks: bool = False) -> str:
+    """
+    Convert DocTR result to plain text.
+    Structure: Document -> pages -> blocks -> lines -> words
+    """
+    lines = []
+    for page in result.pages:
+        for block in page.blocks:
+            for line in block.lines:
+                line_text = " ".join([w.value for w in line.words])
+                lines.append(line_text)
+            if resolve_blocks:
+                lines.append("")  # paragraph separator
+
+    text = " ".join([l for l in lines if l]).strip()
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def evaluate_text(reference: str, prediction: str) -> dict:
+    """Calculate WER and CER metrics."""
+    return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
+
+
+@app.get("/health", response_model=HealthResponse)
+def health_check():
+    """Check if the service is ready."""
+    gpu_info = get_gpu_info()
+    return HealthResponse(
+        status="ok" if state.model is not None else "initializing",
+        model_loaded=state.model is not None,
+        dataset_loaded=state.dataset is not None,
+        dataset_size=len(state.dataset) if state.dataset else None,
+        det_arch=state.det_arch,
+        reco_arch=state.reco_arch,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
+    )
+
+
+@app.post("/evaluate", response_model=EvaluateResponse)
+def evaluate(request: EvaluateRequest):
+    """
+    Evaluate OCR with given hyperparameters.
+    Returns CER, WER, and timing metrics.
+    Note: Model will be reinitialized if processing flags change.
+    """
+    if state.model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+
+    # Load or reload dataset if path changed
+    if state.dataset is None or state.dataset_path != request.pdf_folder:
+        if not os.path.isdir(request.pdf_folder):
+            raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
+        state.dataset = ImageTextDataset(request.pdf_folder)
+        state.dataset_path = request.pdf_folder
+
+    if len(state.dataset) == 0:
+        raise HTTPException(status_code=400, detail="Dataset is empty")
+
+    # Validate page range
+    start = request.start_page
+    end = min(request.end_page, len(state.dataset))
+    if start >= end:
+        raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
+
+    cer_list, wer_list = [], []
+    time_per_page_list = []
+    t0 = time.time()
+
+    # Lock to prevent concurrent OCR access (model is not thread-safe)
+    with state.lock:
+        # Check if model needs to be reinitialized
+        new_config = {
+            "assume_straight_pages": request.assume_straight_pages,
+            "straighten_pages": request.straighten_pages,
+            "preserve_aspect_ratio": request.preserve_aspect_ratio,
+            "symmetric_pad": request.symmetric_pad,
+            "disable_page_orientation": request.disable_page_orientation,
+            "disable_crop_orientation": request.disable_crop_orientation,
+        }
+
+        model_reinitialized = False
+        if state.current_config != new_config:
+            print(f"Model config changed, reinitializing...")
+            state.model = create_model(**new_config)
+            state.current_config = new_config
+            model_reinitialized = True
+
+        for idx in range(start, end):
+            img, ref = state.dataset[idx]
+            arr = np.array(img)
+
+            tp0 = time.time()
+            # DocTR expects a list of images
+            result = state.model([arr])
+
+            pred = doctr_result_to_text(
+                result,
+                resolve_lines=request.resolve_lines,
+                resolve_blocks=request.resolve_blocks,
+            )
+            time_per_page_list.append(float(time.time() - tp0))
+
+            # Save prediction to debugset if requested
+            if request.save_output:
+                out_path = state.dataset.get_output_path(idx, "doctr_text")
+                with open(out_path, "w", encoding="utf-8") as f:
+                    f.write(pred)
+
+            m = evaluate_text(ref, pred)
+            cer_list.append(m["CER"])
+            wer_list.append(m["WER"])
+
+    return EvaluateResponse(
+        CER=float(np.mean(cer_list)) if cer_list else 1.0,
+        WER=float(np.mean(wer_list)) if wer_list else 1.0,
+        TIME=float(time.time() - t0),
+        PAGES=len(cer_list),
+        TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
+        model_reinitialized=model_reinitialized,
+    )
+
+
+@app.post("/evaluate_full", response_model=EvaluateResponse)
+def evaluate_full(request: EvaluateRequest):
+    """Evaluate on ALL pages (ignores start_page/end_page)."""
+    request.start_page = 0
+    request.end_page = 9999
+    return evaluate(request)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/src/doctr_service/requirements.txt
+++ b/src/doctr_service/requirements.txt
@@ -0,0 +1,8 @@
+python-doctr[torch]>=0.8.0
+fastapi>=0.104.0
+uvicorn>=0.24.0
+pydantic>=2.0.0
+jiwer>=3.0.0
+numpy>=1.24.0
+pillow>=10.0.0
+torch>=2.0.0