Paddle ocr, easyicr and doctr gpu support. (#4)

2026-01-19 17:35:24 +00:00
parent 8e2b7a5096
commit c7ed7b2b9c
105 changed files with 8170 additions and 1263 deletions
--- a/src/README.md
+++ b/src/README.md
@@ -0,0 +1,153 @@
+# OCR Hyperparameter Tuning with Ray Tune
+
+This directory contains the Docker setup for running automated hyperparameter optimization on OCR services using Ray Tune with Optuna.
+
+## Prerequisites
+
+- Docker with NVIDIA GPU support (`nvidia-container-toolkit`)
+- NVIDIA GPU with CUDA support
+
+## Quick Start
+
+```bash
+cd src
+
+# Start PaddleOCR service and run tuning (images pulled from registry)
+docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
+docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
+```
+
+## Available Services
+
+| Service | Port | Compose File |
+|---------|------|--------------|
+| PaddleOCR | 8002 | `docker-compose.tuning.paddle.yml` |
+| DocTR | 8003 | `docker-compose.tuning.doctr.yml` |
+| EasyOCR | 8002 | `docker-compose.tuning.easyocr.yml` |
+
+**Note:** PaddleOCR and EasyOCR both use port 8002. Run them separately.
+
+## Usage Examples
+
+### PaddleOCR Tuning
+
+```bash
+# Start service
+docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
+
+# Wait for health check (check with)
+curl http://localhost:8002/health
+
+# Run tuning (64 samples)
+docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
+
+# Stop service
+docker compose -f docker-compose.tuning.paddle.yml down
+```
+
+### DocTR Tuning
+
+```bash
+docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
+curl http://localhost:8003/health
+docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
+docker compose -f docker-compose.tuning.doctr.yml down
+```
+
+### EasyOCR Tuning
+
+```bash
+docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
+curl http://localhost:8002/health
+docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
+docker compose -f docker-compose.tuning.easyocr.yml down
+```
+
+### Run Multiple Services (PaddleOCR + DocTR)
+
+```bash
+# Start both services
+docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
+
+# Run tuning for each
+docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
+docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
+
+# Stop all
+docker compose -f docker-compose.tuning.yml down
+```
+
+## Command Line Options
+
+```bash
+docker compose -f <compose-file> run raytune --service <service> --samples <n>
+```
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `--service` | OCR service: `paddle`, `doctr`, `easyocr` | Required |
+| `--samples` | Number of hyperparameter trials | 64 |
+
+## Output
+
+Results are saved to `src/results/` as CSV files:
+- `raytune_paddle_results_<timestamp>.csv`
+- `raytune_doctr_results_<timestamp>.csv`
+- `raytune_easyocr_results_<timestamp>.csv`
+
+## Directory Structure
+
+```
+src/
+├── docker-compose.tuning.yml          # All services (PaddleOCR + DocTR)
+├── docker-compose.tuning.paddle.yml   # PaddleOCR only
+├── docker-compose.tuning.doctr.yml    # DocTR only
+├── docker-compose.tuning.easyocr.yml  # EasyOCR only
+├── raytune/
+│   ├── Dockerfile
+│   ├── requirements.txt
+│   ├── raytune_ocr.py
+│   └── run_tuning.py
+├── dataset/                           # Input images and ground truth
+├── results/                           # Output CSV files
+└── debugset/                          # Debug output
+```
+
+## Docker Images
+
+All images are pre-built and pulled from registry:
+- `seryus.ddns.net/unir/raytune:latest` - Ray Tune tuning service
+- `seryus.ddns.net/unir/paddle-ocr-gpu:latest` - PaddleOCR GPU
+- `seryus.ddns.net/unir/doctr-gpu:latest` - DocTR GPU
+- `seryus.ddns.net/unir/easyocr-gpu:latest` - EasyOCR GPU
+
+### Build locally (development)
+
+```bash
+# Build raytune image locally
+docker build -t seryus.ddns.net/unir/raytune:latest ./raytune
+```
+
+## Troubleshooting
+
+### Service not ready
+Wait for the health check to pass before running tuning:
+```bash
+# Check service health
+curl http://localhost:8002/health
+# Expected: {"status": "ok", "model_loaded": true, ...}
+```
+
+### GPU not detected
+Ensure `nvidia-container-toolkit` is installed:
+```bash
+nvidia-smi  # Should show your GPU
+docker run --rm --gpus all nvidia/cuda:12.4.1-base nvidia-smi
+```
+
+### Port already in use
+Stop any running OCR services:
+```bash
+docker compose -f docker-compose.tuning.paddle.yml down
+docker compose -f docker-compose.tuning.easyocr.yml down
+```
--- a/src/dataset_formatting/convert_to_hf_dataset.py
+++ b/src/dataset_formatting/convert_to_hf_dataset.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""Convert custom OCR dataset to Hugging Face format."""
+
+import json
+import shutil
+from pathlib import Path
+
+
+def convert_dataset(source_dir: str, output_dir: str):
+    """Convert folder-based dataset to HF ImageFolder format."""
+
+    source = Path(source_dir)
+    output = Path(output_dir)
+    data_dir = output / "data"
+    data_dir.mkdir(parents=True, exist_ok=True)
+
+    metadata = []
+
+    for doc_folder in sorted(source.iterdir()):
+        if not doc_folder.is_dir():
+            continue
+
+        doc_id = doc_folder.name
+        img_dir = doc_folder / "img"
+        txt_dir = doc_folder / "txt"
+
+        if not img_dir.exists() or not txt_dir.exists():
+            continue
+
+        for img_file in sorted(img_dir.glob("*.png")):
+            txt_file = txt_dir / f"{img_file.stem}.txt"
+            if not txt_file.exists():
+                continue
+
+            # Extract page number
+            page_num = int(img_file.stem.split("_")[-1])
+
+            # New filename: page_{doc_id}_{page_num:04d}.png
+            new_name = f"page_{doc_id}_{page_num:04d}.png"
+
+            # Copy image
+            shutil.copy(img_file, data_dir / new_name)
+
+            # Read text
+            text = txt_file.read_text(encoding="utf-8").strip()
+
+            # Add metadata entry
+            metadata.append({
+                "file_name": f"data/{new_name}",
+                "text": text,
+                "document_id": doc_id,
+                "page_number": page_num
+            })
+
+    # Write metadata.jsonl
+    with open(output / "metadata.jsonl", "w", encoding="utf-8") as f:
+        for entry in metadata:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+
+    # Write dataset card
+    write_dataset_card(output, len(metadata))
+
+    print(f"Converted {len(metadata)} samples to {output}")
+
+
+def write_dataset_card(output_dir: Path, num_samples: int):
+    """Write HF dataset card."""
+    card = f'''---
+dataset_info:
+  features:
+    - name: image
+      dtype: image
+    - name: text
+      dtype: string
+    - name: document_id
+      dtype: string
+    - name: page_number
+      dtype: int32
+  splits:
+    - name: train
+      num_examples: {num_samples}
+license: cc-by-4.0
+language:
+  - es
+task_categories:
+  - image-to-text
+tags:
+  - ocr
+  - spanish
+  - academic-documents
+  - unir
+---
+
+# UNIR OCR Dataset
+
+Dataset de documentos académicos en español para evaluación de sistemas OCR.
+
+## Descripción
+
+- **Idioma**: Español
+- **Dominio**: Documentos académicos (instrucciones TFE de UNIR)
+- **Formato**: Imágenes PNG (300 DPI) + texto ground truth
+- **Total**: {num_samples} pares imagen-texto
+
+## Uso
+
+```python
+from datasets import load_dataset
+
+dataset = load_dataset("path/to/dataset")
+
+for sample in dataset["train"]:
+    image = sample["image"]
+    text = sample["text"]
+```
+
+## Estructura
+
+Cada muestra contiene:
+- `image`: Imagen de la página (PIL.Image)
+- `text`: Texto ground truth extraído del PDF
+- `document_id`: ID del documento fuente
+- `page_number`: Número de página
+
+## Citación
+
+Parte del TFM "Optimización de Hiperparámetros OCR con Ray Tune" - UNIR 2025
+'''
+    (output_dir / "README.md").write_text(card, encoding="utf-8")
+
+
+if __name__ == "__main__":
+    import sys
+
+    source = sys.argv[1] if len(sys.argv) > 1 else "src/dataset"
+    output = sys.argv[2] if len(sys.argv) > 2 else "src/dataset_hf"
+
+    convert_dataset(source, output)
--- a/src/dataset_formatting/upload-dataset.sh
+++ b/src/dataset_formatting/upload-dataset.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Upload OCR dataset to Gitea generic packages
+#
+# Usage:
+#   ./src/dataset_formatting/upload-dataset.sh [token]
+#
+# Environment variables:
+#   GITEA_TOKEN - Gitea API token
+
+set -e
+
+GITEA_URL="https://seryus.ddns.net"
+GITEA_ORG="unir"
+PACKAGE_NAME="ocr-dataset-spanish"
+VERSION="1.0.0"
+DATASET_DIR="src/dataset_hf"
+TARBALL="dataset-${VERSION}.tar.gz"
+
+# Get token
+TOKEN="${1:-${GITEA_TOKEN}}"
+if [ -z "$TOKEN" ]; then
+    echo "Error: No token provided"
+    echo "Usage: $0 [token]"
+    echo "  or set GITEA_TOKEN environment variable"
+    exit 1
+fi
+
+# Check dataset exists
+if [ ! -d "$DATASET_DIR" ]; then
+    echo "Error: Dataset not found at $DATASET_DIR"
+    echo "Run: python src/convert_to_hf_dataset.py first"
+    exit 1
+fi
+
+# Create tarball
+echo "Creating tarball..."
+tar -czvf "$TARBALL" -C "$DATASET_DIR" .
+echo "Created: $TARBALL ($(du -h $TARBALL | cut -f1))"
+
+# Upload
+echo "Uploading to Gitea packages..."
+echo "  URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL"
+
+HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
+    -X PUT \
+    -H "Authorization: token $TOKEN" \
+    -H "Content-Type: application/octet-stream" \
+    --data-binary "@$TARBALL" \
+    "$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL")
+
+if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
+    echo "Success! Dataset uploaded."
+    echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL"
+    rm "$TARBALL"
+elif [ "$HTTP_CODE" = "409" ]; then
+    echo "Package version already exists (HTTP 409)"
+    rm "$TARBALL"
+else
+    echo "Error: Upload failed with HTTP $HTTP_CODE"
+    cat /tmp/upload_response.txt
+    rm "$TARBALL"
+    exit 1
+fi
--- a/src/dataset_manager.py
+++ b/src/dataset_manager.py
@@ -42,4 +42,33 @@ class ImageTextDataset:
        with open(txt_path, "r", encoding="utf-8") as f:
            text = f.read()

-        return image, text
+        return image, text
+
+    def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
+        """Get output path for saving OCR result to debugset folder.
+
+        Args:
+            idx: Sample index
+            output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
+            debugset_root: Root folder for debug output (default: /app/debugset)
+
+        Returns:
+            Path like /app/debugset/doc1/{output_subdir}/page_001.txt
+        """
+        img_path, _ = self.samples[idx]
+        # img_path: /app/dataset/doc1/img/page_001.png
+        # Extract relative path: doc1/img/page_001.png
+        parts = img_path.split("/dataset/", 1)
+        if len(parts) == 2:
+            rel_path = parts[1]  # doc1/img/page_001.png
+        else:
+            rel_path = os.path.basename(img_path)
+
+        # Replace /img/ with /{output_subdir}/
+        rel_parts = rel_path.rsplit("/img/", 1)
+        doc_folder = rel_parts[0]  # doc1
+        fname = os.path.splitext(rel_parts[1])[0] + ".txt"  # page_001.txt
+
+        out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
+        os.makedirs(out_dir, exist_ok=True)
+        return os.path.join(out_dir, fname)
--- a/src/docker-compose.tuning.doctr.yml
+++ b/src/docker-compose.tuning.doctr.yml
@@ -0,0 +1,50 @@
+# docker-compose.tuning.doctr.yml - Ray Tune with DocTR GPU
+# Usage:
+#   docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
+#   docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
+#   docker compose -f docker-compose.tuning.doctr.yml down
+
+services:
+  raytune:
+    image: seryus.ddns.net/unir/raytune:latest
+    command: ["--service", "doctr", "--host", "doctr-gpu", "--port", "8000", "--samples", "64"]
+    volumes:
+      - ./results:/app/results:rw
+    environment:
+      - PYTHONUNBUFFERED=1
+    depends_on:
+      doctr-gpu:
+        condition: service_healthy
+
+  doctr-gpu:
+    image: seryus.ddns.net/unir/doctr-gpu:latest
+    container_name: doctr-gpu-tuning
+    ports:
+      - "8003:8000"
+    volumes:
+      - ./dataset:/app/dataset:ro
+      - ./debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+volumes:
+  doctr-cache:
+    name: doctr-model-cache
--- a/src/docker-compose.tuning.easyocr.yml
+++ b/src/docker-compose.tuning.easyocr.yml
@@ -0,0 +1,51 @@
+# docker-compose.tuning.easyocr.yml - Ray Tune with EasyOCR GPU
+# Usage:
+#   docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
+#   docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
+#   docker compose -f docker-compose.tuning.easyocr.yml down
+#
+# Note: EasyOCR uses port 8002 (same as PaddleOCR). Cannot run simultaneously.
+
+services:
+  raytune:
+    image: seryus.ddns.net/unir/raytune:latest
+    command: ["--service", "easyocr", "--host", "easyocr-gpu", "--port", "8000", "--samples", "64"]
+    volumes:
+      - ./results:/app/results:rw
+    environment:
+      - PYTHONUNBUFFERED=1
+    depends_on:
+      easyocr-gpu:
+        condition: service_healthy
+
+  easyocr-gpu:
+    image: seryus.ddns.net/unir/easyocr-gpu:latest
+    container_name: easyocr-gpu-tuning
+    ports:
+      - "8002:8000"
+    volumes:
+      - ./dataset:/app/dataset:ro
+      - ./debugset:/app/debugset:rw
+      - easyocr-cache:/root/.EasyOCR
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - EASYOCR_LANGUAGES=es,en
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 120s
+
+volumes:
+  easyocr-cache:
+    name: easyocr-model-cache
--- a/src/docker-compose.tuning.paddle.yml
+++ b/src/docker-compose.tuning.paddle.yml
@@ -0,0 +1,50 @@
+# docker-compose.tuning.paddle.yml - Ray Tune with PaddleOCR GPU
+# Usage:
+#   docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
+#   docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
+#   docker compose -f docker-compose.tuning.paddle.yml down
+
+services:
+  raytune:
+    image: seryus.ddns.net/unir/raytune:latest
+    command: ["--service", "paddle", "--host", "paddle-ocr-gpu", "--port", "8000", "--samples", "64"]
+    volumes:
+      - ./results:/app/results:rw
+    environment:
+      - PYTHONUNBUFFERED=1
+    depends_on:
+      paddle-ocr-gpu:
+        condition: service_healthy
+
+  paddle-ocr-gpu:
+    image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
+    container_name: paddle-ocr-gpu-tuning
+    ports:
+      - "8002:8000"
+    volumes:
+      - ./dataset:/app/dataset:ro
+      - ./debugset:/app/debugset:rw
+      - paddlex-cache:/root/.paddlex
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
+      - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
--- a/src/docker-compose.tuning.yml
+++ b/src/docker-compose.tuning.yml
@@ -0,0 +1,82 @@
+# docker-compose.tuning.yml - Ray Tune with all OCR services (PaddleOCR + DocTR)
+# Usage:
+#   docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
+#   docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
+#   docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
+#   docker compose -f docker-compose.tuning.yml down
+#
+# Note: EasyOCR uses port 8002 (same as PaddleOCR). Use docker-compose.tuning.easyocr.yml separately.
+
+services:
+  raytune:
+    image: seryus.ddns.net/unir/raytune:latest
+    network_mode: host
+    shm_size: '5gb'
+    volumes:
+      - ./results:/app/results:rw
+    environment:
+      - PYTHONUNBUFFERED=1
+
+  paddle-ocr-gpu:
+    image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
+    container_name: paddle-ocr-gpu-tuning
+    ports:
+      - "8002:8000"
+    volumes:
+      - ./dataset:/app/dataset:ro
+      - ./debugset:/app/debugset:rw
+      - paddlex-cache:/root/.paddlex
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
+      - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+  doctr-gpu:
+    image: seryus.ddns.net/unir/doctr-gpu:latest
+    container_name: doctr-gpu-tuning
+    ports:
+      - "8003:8000"
+    volumes:
+      - ./dataset:/app/dataset:ro
+      - ./debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
+  doctr-cache:
+    name: doctr-model-cache
--- a/src/doctr_raytune_rest.ipynb
+++ b/src/doctr_raytune_rest.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "header",
+   "metadata": {},
+   "source": [
+    "# DocTR Hyperparameter Optimization via REST API\n",
+    "\n",
+    "Uses Ray Tune + Optuna to find optimal DocTR parameters.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "```bash\n",
+    "cd src/doctr_service\n",
+    "docker compose up ocr-cpu  # or ocr-gpu\n",
+    "```\n",
+    "\n",
+    "Service runs on port 8003."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "deps",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from raytune_ocr import (\n",
+    "    check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
+    "    doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n",
+    ")\n",
+    "\n",
+    "# Worker ports\n",
+    "PORTS = [8003]\n",
+    "\n",
+    "# Check workers are running\n",
+    "healthy = check_workers(PORTS, \"DocTR\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "tune",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create trainable and run tuning\n",
+    "trainable = create_trainable(PORTS, doctr_payload)\n",
+    "\n",
+    "results = run_tuner(\n",
+    "    trainable=trainable,\n",
+    "    search_space=DOCTR_SEARCH_SPACE,\n",
+    "    num_samples=64,\n",
+    "    num_workers=len(healthy),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "analysis",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Analyze results\n",
+    "df = analyze_results(\n",
+    "    results,\n",
+    "    prefix=\"raytune_doctr\",\n",
+    "    config_keys=DOCTR_CONFIG_KEYS,\n",
+    ")\n",
+    "\n",
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "correlation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Correlation analysis\n",
+    "correlation_analysis(df, DOCTR_CONFIG_KEYS)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/src/doctr_service/Dockerfile
+++ b/src/doctr_service/Dockerfile
@@ -0,0 +1,49 @@
+# Dockerfile - DocTR Tuning REST API
+#
+# Build:
+#   docker build -t doctr-api:latest .
+#
+# Run:
+#   docker run -p 8003:8000 -v ./dataset:/app/dataset doctr-api:latest
+
+FROM python:3.11-slim
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="DocTR Tuning REST API"
+
+WORKDIR /app
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV DOCTR_DET_ARCH=db_resnet50
+ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
+
+# Install system dependencies for OpenCV and image processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY doctr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.cache/doctr"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check (longer start period for model download)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/doctr_service/Dockerfile.gpu
+++ b/src/doctr_service/Dockerfile.gpu
@@ -0,0 +1,63 @@
+# Dockerfile.gpu - DocTR GPU Dockerfile for amd64/arm64
+#
+# Build:
+#   docker build -t doctr-gpu:latest -f Dockerfile.gpu .
+#
+# Run:
+#   docker run --gpus all -p 8003:8000 -v ./dataset:/app/dataset doctr-gpu:latest
+
+# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
+FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="DocTR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_VISIBLE_DEVICES=0
+ENV DOCTR_DET_ARCH=db_resnet50
+ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
+
+# Install Python 3.12 and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.12 \
+    python3.12-venv \
+    python3-pip \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3.12 /usr/bin/python
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install PyTorch with CUDA support first (cu128 index has amd64 + arm64 wheels)
+RUN python -m pip install --no-cache-dir --break-system-packages \
+    torch torchvision --index-url https://download.pytorch.org/whl/cu128
+
+# Install remaining dependencies from requirements.txt (skip torch, already installed)
+RUN grep -v "^torch" requirements.txt | python -m pip install --no-cache-dir --break-system-packages -r /dev/stdin
+
+# Copy application code
+COPY doctr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.cache/doctr"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check (longer start period for model download)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/doctr_service/README.md
+++ b/src/doctr_service/README.md
@@ -0,0 +1,261 @@
+# DocTR Tuning REST API
+
+REST API service for DocTR (Document Text Recognition) hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
+
+## Quick Start
+
+### CPU Version
+
+```bash
+cd src/doctr_service
+
+# Build
+docker build -t doctr-api:cpu .
+
+# Run
+docker run -d -p 8003:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:cpu
+
+# Test
+curl http://localhost:8003/health
+```
+
+### GPU Version
+
+```bash
+# Build GPU image
+docker build -f Dockerfile.gpu -t doctr-api:gpu .
+
+# Run with GPU
+docker run -d -p 8003:8000 --gpus all \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:gpu
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `doctr_tuning_rest.py` | FastAPI REST service with 9 tunable hyperparameters |
+| `dataset_manager.py` | Dataset loader (shared with other services) |
+| `Dockerfile` | CPU-only image (amd64 + arm64) |
+| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
+| `requirements.txt` | Python dependencies |
+
+## API Endpoints
+
+### `GET /health`
+
+Check if service is ready.
+
+```json
+{
+  "status": "ok",
+  "model_loaded": true,
+  "dataset_loaded": true,
+  "dataset_size": 24,
+  "det_arch": "db_resnet50",
+  "reco_arch": "crnn_vgg16_bn",
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10"
+}
+```
+
+### `POST /evaluate`
+
+Run OCR evaluation with given hyperparameters.
+
+**Request (9 tunable parameters):**
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "assume_straight_pages": true,
+  "straighten_pages": false,
+  "preserve_aspect_ratio": true,
+  "symmetric_pad": true,
+  "disable_page_orientation": false,
+  "disable_crop_orientation": false,
+  "resolve_lines": true,
+  "resolve_blocks": false,
+  "paragraph_break": 0.035,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+**Response:**
+```json
+{
+  "CER": 0.0189,
+  "WER": 0.1023,
+  "TIME": 52.3,
+  "PAGES": 5,
+  "TIME_PER_PAGE": 10.46,
+  "model_reinitialized": false
+}
+```
+
+**Note:** `model_reinitialized` indicates if the model was reloaded due to changed processing flags (adds ~2-5s overhead).
+
+## Debug Output (debugset)
+
+The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
+
+### Enable Debug Output
+
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "save_output": true,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+### Output Structure
+
+```
+debugset/
+├── doc1/
+│   └── doctr/
+│       ├── page_0005.txt
+│       ├── page_0006.txt
+│       └── ...
+├── doc2/
+│   └── doctr/
+│       └── ...
+```
+
+Each `.txt` file contains the OCR-extracted text for that page.
+
+### Docker Mount
+
+Add the debugset volume to your docker run command:
+
+```bash
+docker run -d -p 8003:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v $(pwd)/../debugset:/app/debugset:rw \
+  -v doctr-cache:/root/.cache/doctr \
+  doctr-api:cpu
+```
+
+### Use Cases
+
+- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
+- **Debug hyperparameters**: See how different settings affect text extraction
+- **Ground truth comparison**: Compare predictions against expected output
+
+## Hyperparameters
+
+### Processing Flags (Require Model Reinitialization)
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `assume_straight_pages` | true | Skip rotation handling for straight documents |
+| `straighten_pages` | false | Pre-straighten pages before detection |
+| `preserve_aspect_ratio` | true | Maintain document proportions during resize |
+| `symmetric_pad` | true | Use symmetric padding when preserving aspect ratio |
+
+**Note:** Changing these flags requires model reinitialization (~2-5s).
+
+### Orientation Flags
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `disable_page_orientation` | false | Skip page orientation classification |
+| `disable_crop_orientation` | false | Skip crop orientation detection |
+
+### Output Grouping
+
+| Parameter | Default | Range | Description |
+|-----------|---------|-------|-------------|
+| `resolve_lines` | true | bool | Group words into lines |
+| `resolve_blocks` | false | bool | Group lines into blocks |
+| `paragraph_break` | 0.035 | 0.0-1.0 | Minimum space ratio separating paragraphs |
+
+## Model Architecture
+
+DocTR uses a two-stage pipeline:
+
+1. **Detection** (`det_arch`): Localizes text regions
+   - Default: `db_resnet50` (DBNet with ResNet-50 backbone)
+   - Alternatives: `linknet_resnet18`, `db_mobilenet_v3_large`
+
+2. **Recognition** (`reco_arch`): Recognizes characters
+   - Default: `crnn_vgg16_bn` (CRNN with VGG-16 backbone)
+   - Alternatives: `sar_resnet31`, `master`, `vitstr_small`
+
+Architecture is set via environment variables (fixed at startup).
+
+## GPU Support
+
+### Platform Support
+
+| Platform | CPU | GPU |
+|----------|-----|-----|
+| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
+| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
+| macOS ARM64 (M1/M2) | ✅ | ❌ |
+
+### PyTorch CUDA on ARM64
+
+Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
+
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
+```
+
+This works on both amd64 and arm64 platforms with CUDA support.
+
+### GPU Detection
+
+DocTR automatically uses GPU when available:
+
+```python
+import torch
+print(torch.cuda.is_available())  # True if GPU available
+
+# DocTR model moves to GPU
+model = ocr_predictor(pretrained=True)
+if torch.cuda.is_available():
+    model = model.cuda()
+```
+
+The `/health` endpoint shows GPU status:
+```json
+{
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10",
+  "gpu_memory_total": "128.00 GB"
+}
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `DOCTR_DET_ARCH` | `db_resnet50` | Detection architecture |
+| `DOCTR_RECO_ARCH` | `crnn_vgg16_bn` | Recognition architecture |
+| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
+
+## CI/CD
+
+Built images available from registry:
+
+| Image | Architecture |
+|-------|--------------|
+| `seryus.ddns.net/unir/doctr-cpu:latest` | amd64, arm64 |
+| `seryus.ddns.net/unir/doctr-gpu:latest` | amd64, arm64 |
+
+## Sources
+
+- [DocTR Documentation](https://mindee.github.io/doctr/)
+- [DocTR GitHub](https://github.com/mindee/doctr)
+- [DocTR Model Usage](https://mindee.github.io/doctr/latest/using_doctr/using_models.html)
+- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
--- a/src/doctr_service/dataset_manager.py
+++ b/src/doctr_service/dataset_manager.py
@@ -0,0 +1,74 @@
+# Imports
+import os
+from PIL import Image
+
+
+class ImageTextDataset:
+    def __init__(self, root):
+        self.samples = []
+
+        for folder in sorted(os.listdir(root)):
+            sub = os.path.join(root, folder)
+            img_dir = os.path.join(sub, "img")
+            txt_dir = os.path.join(sub, "txt")
+
+            if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
+                continue
+
+            for fname in sorted(os.listdir(img_dir)):
+                if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
+                    continue
+
+                img_path = os.path.join(img_dir, fname)
+
+                # text file must have same name but .txt
+                txt_name = os.path.splitext(fname)[0] + ".txt"
+                txt_path = os.path.join(txt_dir, txt_name)
+
+                if not os.path.exists(txt_path):
+                    continue
+
+                self.samples.append((img_path, txt_path))
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, idx):
+        img_path, txt_path = self.samples[idx]
+
+        # Load image
+        image = Image.open(img_path).convert("RGB")
+
+        # Load text
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+
+        return image, text
+
+    def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
+        """Get output path for saving OCR result to debugset folder.
+
+        Args:
+            idx: Sample index
+            output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
+            debugset_root: Root folder for debug output (default: /app/debugset)
+
+        Returns:
+            Path like /app/debugset/doc1/{output_subdir}/page_001.txt
+        """
+        img_path, _ = self.samples[idx]
+        # img_path: /app/dataset/doc1/img/page_001.png
+        # Extract relative path: doc1/img/page_001.png
+        parts = img_path.split("/dataset/", 1)
+        if len(parts) == 2:
+            rel_path = parts[1]  # doc1/img/page_001.png
+        else:
+            rel_path = os.path.basename(img_path)
+
+        # Replace /img/ with /{output_subdir}/
+        rel_parts = rel_path.rsplit("/img/", 1)
+        doc_folder = rel_parts[0]  # doc1
+        fname = os.path.splitext(rel_parts[1])[0] + ".txt"  # page_001.txt
+
+        out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
+        os.makedirs(out_dir, exist_ok=True)
+        return os.path.join(out_dir, fname)
--- a/src/doctr_service/docker-compose.yml
+++ b/src/doctr_service/docker-compose.yml
@@ -0,0 +1,63 @@
+# docker-compose.yml - DocTR REST API
+# Usage:
+#   CPU:  docker compose up ocr-cpu
+#   GPU:  docker compose up ocr-gpu
+#
+# Port: 8003
+
+services:
+  # CPU-only service
+  ocr-cpu:
+    image: seryus.ddns.net/unir/doctr-cpu:latest
+    container_name: doctr-cpu
+    ports:
+      - "8003:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+  # GPU service (requires NVIDIA Container Toolkit)
+  ocr-gpu:
+    image: seryus.ddns.net/unir/doctr-gpu:latest
+    container_name: doctr-gpu
+    ports:
+      - "8003:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - doctr-cache:/root/.cache/doctr
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - DOCTR_DET_ARCH=db_resnet50
+      - DOCTR_RECO_ARCH=crnn_vgg16_bn
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 180s
+
+volumes:
+  doctr-cache:
+    name: doctr-model-cache
--- a/src/doctr_service/doctr_tuning_rest.py
+++ b/src/doctr_service/doctr_tuning_rest.py
@@ -0,0 +1,336 @@
+# doctr_tuning_rest.py
+# FastAPI REST service for DocTR hyperparameter evaluation
+# Usage: uvicorn doctr_tuning_rest:app --host 0.0.0.0 --port 8000
+
+import os
+import re
+import time
+import threading
+from typing import Optional
+from contextlib import asynccontextmanager
+
+import numpy as np
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+from doctr.models import ocr_predictor
+from jiwer import wer, cer
+from dataset_manager import ImageTextDataset
+
+
+def get_gpu_info() -> dict:
+    """Get GPU status information from PyTorch."""
+    info = {
+        "cuda_available": torch.cuda.is_available(),
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = torch.cuda.device_count()
+            if info["gpu_count"] > 0:
+                info["gpu_name"] = torch.cuda.get_device_name(0)
+                info["gpu_memory_total"] = f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB"
+                info["gpu_memory_used"] = f"{torch.cuda.memory_allocated(0) / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
+# Model configuration via environment variables
+DEFAULT_DET_ARCH = os.environ.get("DOCTR_DET_ARCH", "db_resnet50")
+DEFAULT_RECO_ARCH = os.environ.get("DOCTR_RECO_ARCH", "crnn_vgg16_bn")
+
+
+# Global state for model and dataset
+class AppState:
+    model: Optional[object] = None
+    dataset: Optional[ImageTextDataset] = None
+    dataset_path: Optional[str] = None
+    det_arch: str = DEFAULT_DET_ARCH
+    reco_arch: str = DEFAULT_RECO_ARCH
+    # Track current model config for cache invalidation
+    current_config: Optional[dict] = None
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    lock: threading.Lock = None  # Protects OCR model from concurrent access
+
+    def __init__(self):
+        self.lock = threading.Lock()
+
+
+state = AppState()
+
+
+def create_model(
+    assume_straight_pages: bool = True,
+    straighten_pages: bool = False,
+    preserve_aspect_ratio: bool = True,
+    symmetric_pad: bool = True,
+    disable_page_orientation: bool = False,
+    disable_crop_orientation: bool = False,
+) -> object:
+    """Create DocTR model with given configuration."""
+    model = ocr_predictor(
+        det_arch=state.det_arch,
+        reco_arch=state.reco_arch,
+        pretrained=True,
+        assume_straight_pages=assume_straight_pages,
+        straighten_pages=straighten_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+    )
+
+    # Apply orientation settings if supported
+    if hasattr(model, 'disable_page_orientation'):
+        model.disable_page_orientation = disable_page_orientation
+    if hasattr(model, 'disable_crop_orientation'):
+        model.disable_crop_orientation = disable_crop_orientation
+
+    # Move to GPU if available
+    if state.device == "cuda":
+        model = model.cuda()
+
+    return model
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load DocTR model at startup with default configuration."""
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
+    print(f"Loading DocTR models...")
+    print(f"  Detection: {state.det_arch}")
+    print(f"  Recognition: {state.reco_arch}")
+
+    # Load with default config
+    state.model = create_model()
+    state.current_config = {
+        "assume_straight_pages": True,
+        "straighten_pages": False,
+        "preserve_aspect_ratio": True,
+        "symmetric_pad": True,
+        "disable_page_orientation": False,
+        "disable_crop_orientation": False,
+    }
+
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
+    print("Model loaded successfully!")
+    yield
+    state.model = None
+    state.dataset = None
+
+
+app = FastAPI(
+    title="DocTR Tuning API",
+    description="REST API for DocTR hyperparameter evaluation",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
+class EvaluateRequest(BaseModel):
+    """Request schema with all tunable DocTR hyperparameters."""
+    pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
+
+    # Processing flags (require model reinit)
+    assume_straight_pages: bool = Field(True, description="Skip rotation handling for straight documents")
+    straighten_pages: bool = Field(False, description="Pre-straighten pages before detection")
+    preserve_aspect_ratio: bool = Field(True, description="Maintain document proportions during resize")
+    symmetric_pad: bool = Field(True, description="Use symmetric padding when preserving aspect ratio")
+
+    # Orientation flags
+    disable_page_orientation: bool = Field(False, description="Skip page orientation classification")
+    disable_crop_orientation: bool = Field(False, description="Skip crop orientation detection")
+
+    # Output grouping
+    resolve_lines: bool = Field(True, description="Group words into lines")
+    resolve_blocks: bool = Field(False, description="Group lines into blocks")
+    paragraph_break: float = Field(0.035, ge=0.0, le=1.0, description="Minimum space ratio separating paragraphs")
+
+    # Page range
+    start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
+    end_page: int = Field(10, ge=1, description="End page index (exclusive)")
+    save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
+
+
+class EvaluateResponse(BaseModel):
+    """Response schema matching CLI output."""
+    CER: float
+    WER: float
+    TIME: float
+    PAGES: int
+    TIME_PER_PAGE: float
+    model_reinitialized: bool = False
+
+
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    dataset_loaded: bool
+    dataset_size: Optional[int] = None
+    det_arch: Optional[str] = None
+    reco_arch: Optional[str] = None
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None
+
+
+def doctr_result_to_text(result, resolve_lines: bool = True, resolve_blocks: bool = False) -> str:
+    """
+    Convert DocTR result to plain text.
+    Structure: Document -> pages -> blocks -> lines -> words
+    """
+    lines = []
+    for page in result.pages:
+        for block in page.blocks:
+            for line in block.lines:
+                line_text = " ".join([w.value for w in line.words])
+                lines.append(line_text)
+            if resolve_blocks:
+                lines.append("")  # paragraph separator
+
+    text = " ".join([l for l in lines if l]).strip()
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def evaluate_text(reference: str, prediction: str) -> dict:
+    """Calculate WER and CER metrics."""
+    return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
+
+
+@app.get("/health", response_model=HealthResponse)
+def health_check():
+    """Check if the service is ready."""
+    gpu_info = get_gpu_info()
+    return HealthResponse(
+        status="ok" if state.model is not None else "initializing",
+        model_loaded=state.model is not None,
+        dataset_loaded=state.dataset is not None,
+        dataset_size=len(state.dataset) if state.dataset else None,
+        det_arch=state.det_arch,
+        reco_arch=state.reco_arch,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
+    )
+
+
+@app.post("/evaluate", response_model=EvaluateResponse)
+def evaluate(request: EvaluateRequest):
+    """
+    Evaluate OCR with given hyperparameters.
+    Returns CER, WER, and timing metrics.
+    Note: Model will be reinitialized if processing flags change.
+    """
+    if state.model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+
+    # Load or reload dataset if path changed
+    if state.dataset is None or state.dataset_path != request.pdf_folder:
+        if not os.path.isdir(request.pdf_folder):
+            raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
+        state.dataset = ImageTextDataset(request.pdf_folder)
+        state.dataset_path = request.pdf_folder
+
+    if len(state.dataset) == 0:
+        raise HTTPException(status_code=400, detail="Dataset is empty")
+
+    # Validate page range
+    start = request.start_page
+    end = min(request.end_page, len(state.dataset))
+    if start >= end:
+        raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
+
+    cer_list, wer_list = [], []
+    time_per_page_list = []
+    t0 = time.time()
+
+    # Lock to prevent concurrent OCR access (model is not thread-safe)
+    with state.lock:
+        # Check if model needs to be reinitialized
+        new_config = {
+            "assume_straight_pages": request.assume_straight_pages,
+            "straighten_pages": request.straighten_pages,
+            "preserve_aspect_ratio": request.preserve_aspect_ratio,
+            "symmetric_pad": request.symmetric_pad,
+            "disable_page_orientation": request.disable_page_orientation,
+            "disable_crop_orientation": request.disable_crop_orientation,
+        }
+
+        model_reinitialized = False
+        if state.current_config != new_config:
+            print(f"Model config changed, reinitializing...")
+            state.model = create_model(**new_config)
+            state.current_config = new_config
+            model_reinitialized = True
+
+        for idx in range(start, end):
+            img, ref = state.dataset[idx]
+            arr = np.array(img)
+
+            tp0 = time.time()
+            # DocTR expects a list of images
+            result = state.model([arr])
+
+            pred = doctr_result_to_text(
+                result,
+                resolve_lines=request.resolve_lines,
+                resolve_blocks=request.resolve_blocks,
+            )
+            time_per_page_list.append(float(time.time() - tp0))
+
+            # Save prediction to debugset if requested
+            if request.save_output:
+                out_path = state.dataset.get_output_path(idx, "doctr_text")
+                with open(out_path, "w", encoding="utf-8") as f:
+                    f.write(pred)
+
+            m = evaluate_text(ref, pred)
+            cer_list.append(m["CER"])
+            wer_list.append(m["WER"])
+
+    return EvaluateResponse(
+        CER=float(np.mean(cer_list)) if cer_list else 1.0,
+        WER=float(np.mean(wer_list)) if wer_list else 1.0,
+        TIME=float(time.time() - t0),
+        PAGES=len(cer_list),
+        TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
+        model_reinitialized=model_reinitialized,
+    )
+
+
+@app.post("/evaluate_full", response_model=EvaluateResponse)
+def evaluate_full(request: EvaluateRequest):
+    """Evaluate on ALL pages (ignores start_page/end_page)."""
+    request.start_page = 0
+    request.end_page = 9999
+    return evaluate(request)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/src/doctr_service/requirements.txt
+++ b/src/doctr_service/requirements.txt
@@ -0,0 +1,8 @@
+python-doctr[torch]>=0.8.0
+fastapi>=0.104.0
+uvicorn>=0.24.0
+pydantic>=2.0.0
+jiwer>=3.0.0
+numpy>=1.24.0
+pillow>=10.0.0
+torch>=2.0.0
--- a/src/easyocr_raytune_rest.ipynb
+++ b/src/easyocr_raytune_rest.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "header",
+   "metadata": {},
+   "source": [
+    "# EasyOCR Hyperparameter Optimization via REST API\n",
+    "\n",
+    "Uses Ray Tune + Optuna to find optimal EasyOCR parameters.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "```bash\n",
+    "cd src/easyocr_service\n",
+    "docker compose up ocr-cpu  # or ocr-gpu\n",
+    "```\n",
+    "\n",
+    "Service runs on port 8002."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "deps",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from raytune_ocr import (\n",
+    "    check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
+    "    easyocr_payload, EASYOCR_SEARCH_SPACE, EASYOCR_CONFIG_KEYS,\n",
+    ")\n",
+    "\n",
+    "# Worker ports\n",
+    "PORTS = [8002]\n",
+    "\n",
+    "# Check workers are running\n",
+    "healthy = check_workers(PORTS, \"EasyOCR\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "tune",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create trainable and run tuning\n",
+    "trainable = create_trainable(PORTS, easyocr_payload)\n",
+    "\n",
+    "results = run_tuner(\n",
+    "    trainable=trainable,\n",
+    "    search_space=EASYOCR_SEARCH_SPACE,\n",
+    "    num_samples=64,\n",
+    "    num_workers=len(healthy),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "analysis",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Analyze results\n",
+    "df = analyze_results(\n",
+    "    results,\n",
+    "    prefix=\"raytune_easyocr\",\n",
+    "    config_keys=EASYOCR_CONFIG_KEYS,\n",
+    ")\n",
+    "\n",
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "correlation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Correlation analysis\n",
+    "correlation_analysis(df, EASYOCR_CONFIG_KEYS)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/src/easyocr_service/Dockerfile
+++ b/src/easyocr_service/Dockerfile
@@ -0,0 +1,48 @@
+# Dockerfile - EasyOCR Tuning REST API
+#
+# Build:
+#   docker build -t easyocr-api:latest .
+#
+# Run:
+#   docker run -p 8002:8000 -v ./dataset:/app/dataset easyocr-api:latest
+
+FROM python:3.11-slim
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="EasyOCR Tuning REST API"
+
+WORKDIR /app
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV EASYOCR_LANGUAGES=es,en
+
+# Install system dependencies for OpenCV and image processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY easyocr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.EasyOCR"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/easyocr_service/Dockerfile.gpu
+++ b/src/easyocr_service/Dockerfile.gpu
@@ -0,0 +1,62 @@
+# Dockerfile.gpu - EasyOCR GPU Dockerfile for amd64/arm64
+#
+# Build:
+#   docker build -t easyocr-gpu:latest -f Dockerfile.gpu .
+#
+# Run:
+#   docker run --gpus all -p 8002:8000 -v ./dataset:/app/dataset easyocr-gpu:latest
+
+# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
+FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="EasyOCR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_VISIBLE_DEVICES=0
+ENV EASYOCR_LANGUAGES=es,en
+
+# Install Python 3.12 and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.12 \
+    python3.12-venv \
+    python3-pip \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3.12 /usr/bin/python
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install PyTorch with CUDA support first (cu128 index has amd64 + arm64 wheels)
+RUN python -m pip install --no-cache-dir --break-system-packages \
+    torch torchvision --index-url https://download.pytorch.org/whl/cu128
+
+# Install remaining dependencies from requirements.txt (skip torch, already installed)
+RUN grep -v "^torch" requirements.txt | python -m pip install --no-cache-dir --break-system-packages -r /dev/stdin
+
+# Copy application code
+COPY easyocr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.EasyOCR"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/easyocr_service/README.md
+++ b/src/easyocr_service/README.md
@@ -0,0 +1,248 @@
+# EasyOCR Tuning REST API
+
+REST API service for EasyOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
+
+## Quick Start
+
+### CPU Version
+
+```bash
+cd src/easyocr_service
+
+# Build
+docker build -t easyocr-api:cpu .
+
+# Run
+docker run -d -p 8002:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v easyocr-cache:/root/.EasyOCR \
+  easyocr-api:cpu
+
+# Test
+curl http://localhost:8002/health
+```
+
+### GPU Version
+
+```bash
+# Build GPU image
+docker build -f Dockerfile.gpu -t easyocr-api:gpu .
+
+# Run with GPU
+docker run -d -p 8002:8000 --gpus all \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v easyocr-cache:/root/.EasyOCR \
+  easyocr-api:gpu
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `easyocr_tuning_rest.py` | FastAPI REST service with 14 tunable hyperparameters |
+| `dataset_manager.py` | Dataset loader (shared with other services) |
+| `Dockerfile` | CPU-only image (amd64 + arm64) |
+| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
+| `requirements.txt` | Python dependencies |
+
+## API Endpoints
+
+### `GET /health`
+
+Check if service is ready.
+
+```json
+{
+  "status": "ok",
+  "model_loaded": true,
+  "dataset_loaded": true,
+  "dataset_size": 24,
+  "languages": ["es", "en"],
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10"
+}
+```
+
+### `POST /evaluate`
+
+Run OCR evaluation with given hyperparameters.
+
+**Request (14 tunable parameters):**
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "text_threshold": 0.7,
+  "low_text": 0.4,
+  "link_threshold": 0.4,
+  "slope_ths": 0.1,
+  "ycenter_ths": 0.5,
+  "height_ths": 0.5,
+  "width_ths": 0.5,
+  "add_margin": 0.1,
+  "contrast_ths": 0.1,
+  "adjust_contrast": 0.5,
+  "decoder": "greedy",
+  "beamWidth": 5,
+  "min_size": 10,
+  "rotation_info": null,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+**Response:**
+```json
+{"CER": 0.0234, "WER": 0.1156, "TIME": 45.2, "PAGES": 5, "TIME_PER_PAGE": 9.04}
+```
+
+## Debug Output (debugset)
+
+The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
+
+### Enable Debug Output
+
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "save_output": true,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+### Output Structure
+
+```
+debugset/
+├── doc1/
+│   └── easyocr/
+│       ├── page_0005.txt
+│       ├── page_0006.txt
+│       └── ...
+├── doc2/
+│   └── easyocr/
+│       └── ...
+```
+
+Each `.txt` file contains the OCR-extracted text for that page.
+
+### Docker Mount
+
+Add the debugset volume to your docker run command:
+
+```bash
+docker run -d -p 8002:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v $(pwd)/../debugset:/app/debugset:rw \
+  -v easyocr-cache:/root/.EasyOCR \
+  easyocr-api:cpu
+```
+
+### Use Cases
+
+- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
+- **Debug hyperparameters**: See how different settings affect text extraction
+- **Ground truth comparison**: Compare predictions against expected output
+
+## Hyperparameters
+
+### Detection (CRAFT Algorithm)
+
+| Parameter | Default | Range | Description |
+|-----------|---------|-------|-------------|
+| `text_threshold` | 0.7 | 0.0-1.0 | Text confidence threshold |
+| `low_text` | 0.4 | 0.0-1.0 | Text lower-bound score |
+| `link_threshold` | 0.4 | 0.0-1.0 | Link confidence threshold |
+
+### Bounding Box Merging
+
+| Parameter | Default | Range | Description |
+|-----------|---------|-------|-------------|
+| `slope_ths` | 0.1 | 0.0-1.0 | Max slope for merging |
+| `ycenter_ths` | 0.5 | 0.0-2.0 | Max vertical shift |
+| `height_ths` | 0.5 | 0.0-2.0 | Max height variance |
+| `width_ths` | 0.5 | 0.0-2.0 | Max horizontal distance |
+| `add_margin` | 0.1 | 0.0-1.0 | Bounding box extension |
+
+### Contrast
+
+| Parameter | Default | Range | Description |
+|-----------|---------|-------|-------------|
+| `contrast_ths` | 0.1 | 0.0-1.0 | Contrast threshold for dual-pass |
+| `adjust_contrast` | 0.5 | 0.0-1.0 | Target contrast level |
+
+### Decoder
+
+| Parameter | Default | Options | Description |
+|-----------|---------|---------|-------------|
+| `decoder` | "greedy" | greedy, beamsearch, wordbeamsearch | Decoding method |
+| `beamWidth` | 5 | 1-20 | Beam width (for beam search) |
+
+### Other
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `min_size` | 10 | Minimum text box pixels |
+| `rotation_info` | null | Rotation angles to try: [90, 180, 270] |
+
+## GPU Support
+
+### Platform Support
+
+| Platform | CPU | GPU |
+|----------|-----|-----|
+| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
+| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
+| macOS ARM64 (M1/M2) | ✅ | ❌ |
+
+### PyTorch CUDA on ARM64
+
+Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
+
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
+```
+
+This works on both amd64 and arm64 platforms with CUDA support.
+
+### GPU Detection
+
+EasyOCR automatically uses GPU when PyTorch CUDA is available:
+
+```python
+import torch
+print(torch.cuda.is_available())  # True if GPU available
+```
+
+The `/health` endpoint shows GPU status:
+```json
+{
+  "cuda_available": true,
+  "device": "cuda",
+  "gpu_name": "NVIDIA GB10",
+  "gpu_memory_total": "128.00 GB"
+}
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `EASYOCR_LANGUAGES` | `es,en` | Comma-separated language codes |
+| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
+
+## CI/CD
+
+Built images available from registry:
+
+| Image | Architecture |
+|-------|--------------|
+| `seryus.ddns.net/unir/easyocr-cpu:latest` | amd64, arm64 |
+| `seryus.ddns.net/unir/easyocr-gpu:latest` | amd64, arm64 |
+
+## Sources
+
+- [EasyOCR Documentation](https://www.jaided.ai/easyocr/documentation/)
+- [EasyOCR GitHub](https://github.com/JaidedAI/EasyOCR)
+- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
--- a/src/easyocr_service/dataset_manager.py
+++ b/src/easyocr_service/dataset_manager.py
@@ -0,0 +1,74 @@
+# Imports
+import os
+from PIL import Image
+
+
+class ImageTextDataset:
+    def __init__(self, root):
+        self.samples = []
+
+        for folder in sorted(os.listdir(root)):
+            sub = os.path.join(root, folder)
+            img_dir = os.path.join(sub, "img")
+            txt_dir = os.path.join(sub, "txt")
+
+            if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
+                continue
+
+            for fname in sorted(os.listdir(img_dir)):
+                if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
+                    continue
+
+                img_path = os.path.join(img_dir, fname)
+
+                # text file must have same name but .txt
+                txt_name = os.path.splitext(fname)[0] + ".txt"
+                txt_path = os.path.join(txt_dir, txt_name)
+
+                if not os.path.exists(txt_path):
+                    continue
+
+                self.samples.append((img_path, txt_path))
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, idx):
+        img_path, txt_path = self.samples[idx]
+
+        # Load image
+        image = Image.open(img_path).convert("RGB")
+
+        # Load text
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+
+        return image, text
+
+    def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
+        """Get output path for saving OCR result to debugset folder.
+
+        Args:
+            idx: Sample index
+            output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
+            debugset_root: Root folder for debug output (default: /app/debugset)
+
+        Returns:
+            Path like /app/debugset/doc1/{output_subdir}/page_001.txt
+        """
+        img_path, _ = self.samples[idx]
+        # img_path: /app/dataset/doc1/img/page_001.png
+        # Extract relative path: doc1/img/page_001.png
+        parts = img_path.split("/dataset/", 1)
+        if len(parts) == 2:
+            rel_path = parts[1]  # doc1/img/page_001.png
+        else:
+            rel_path = os.path.basename(img_path)
+
+        # Replace /img/ with /{output_subdir}/
+        rel_parts = rel_path.rsplit("/img/", 1)
+        doc_folder = rel_parts[0]  # doc1
+        fname = os.path.splitext(rel_parts[1])[0] + ".txt"  # page_001.txt
+
+        out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
+        os.makedirs(out_dir, exist_ok=True)
+        return os.path.join(out_dir, fname)
--- a/src/easyocr_service/docker-compose.yml
+++ b/src/easyocr_service/docker-compose.yml
@@ -0,0 +1,61 @@
+# docker-compose.yml - EasyOCR REST API
+# Usage:
+#   CPU:  docker compose up ocr-cpu
+#   GPU:  docker compose up ocr-gpu
+#
+# Port: 8002
+
+services:
+  # CPU-only service
+  ocr-cpu:
+    image: seryus.ddns.net/unir/easyocr-cpu:latest
+    container_name: easyocr-cpu
+    ports:
+      - "8002:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - easyocr-cache:/root/.EasyOCR
+    environment:
+      - PYTHONUNBUFFERED=1
+      - EASYOCR_LANGUAGES=es,en
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 120s
+
+  # GPU service (requires NVIDIA Container Toolkit)
+  ocr-gpu:
+    image: seryus.ddns.net/unir/easyocr-gpu:latest
+    container_name: easyocr-gpu
+    ports:
+      - "8002:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - easyocr-cache:/root/.EasyOCR
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - EASYOCR_LANGUAGES=es,en
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 120s
+
+volumes:
+  easyocr-cache:
+    name: easyocr-model-cache
--- a/src/easyocr_service/easyocr_tuning_rest.py
+++ b/src/easyocr_service/easyocr_tuning_rest.py
@@ -0,0 +1,334 @@
+# easyocr_tuning_rest.py
+# FastAPI REST service for EasyOCR hyperparameter evaluation
+# Usage: uvicorn easyocr_tuning_rest:app --host 0.0.0.0 --port 8000
+
+import os
+import re
+import time
+import threading
+from typing import Optional, List
+from contextlib import asynccontextmanager
+
+import numpy as np
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+import easyocr
+from jiwer import wer, cer
+from dataset_manager import ImageTextDataset
+
+
+def get_gpu_info() -> dict:
+    """Get GPU status information from PyTorch."""
+    info = {
+        "cuda_available": torch.cuda.is_available(),
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = torch.cuda.device_count()
+            if info["gpu_count"] > 0:
+                info["gpu_name"] = torch.cuda.get_device_name(0)
+                info["gpu_memory_total"] = f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB"
+                info["gpu_memory_used"] = f"{torch.cuda.memory_allocated(0) / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
+# Model configuration via environment variables
+DEFAULT_LANGUAGES = os.environ.get("EASYOCR_LANGUAGES", "es,en").split(",")
+
+
+# Global state for model and dataset
+class AppState:
+    reader: Optional[easyocr.Reader] = None
+    dataset: Optional[ImageTextDataset] = None
+    dataset_path: Optional[str] = None
+    languages: List[str] = DEFAULT_LANGUAGES
+    lock: threading.Lock = None  # Protects OCR model from concurrent access
+
+    def __init__(self):
+        self.lock = threading.Lock()
+
+
+state = AppState()
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load EasyOCR model at startup."""
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
+    print(f"Loading EasyOCR models...")
+    print(f"  Languages: {state.languages}")
+    state.reader = easyocr.Reader(
+        state.languages,
+        gpu=gpu_info['cuda_available'],
+    )
+
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
+    print("Model loaded successfully!")
+    yield
+    state.reader = None
+    state.dataset = None
+
+
+app = FastAPI(
+    title="EasyOCR Tuning API",
+    description="REST API for EasyOCR hyperparameter evaluation",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
+class EvaluateRequest(BaseModel):
+    """Request schema with all tunable EasyOCR hyperparameters."""
+    pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
+
+    # Detection thresholds (CRAFT algorithm)
+    text_threshold: float = Field(0.7, ge=0.0, le=1.0, description="Text confidence threshold")
+    low_text: float = Field(0.4, ge=0.0, le=1.0, description="Text lower-bound score")
+    link_threshold: float = Field(0.4, ge=0.0, le=1.0, description="Link confidence threshold")
+
+    # Bounding box merging
+    slope_ths: float = Field(0.1, ge=0.0, le=1.0, description="Maximum slope for box merging")
+    ycenter_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum vertical shift for merging")
+    height_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum height variance for merging")
+    width_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum horizontal distance for merging")
+    add_margin: float = Field(0.1, ge=0.0, le=1.0, description="Bounding box extension margin")
+
+    # Contrast handling
+    contrast_ths: float = Field(0.1, ge=0.0, le=1.0, description="Contrast threshold for dual-pass")
+    adjust_contrast: float = Field(0.5, ge=0.0, le=1.0, description="Target contrast adjustment level")
+
+    # Decoder options
+    decoder: str = Field("greedy", description="Decoder type: greedy, beamsearch, wordbeamsearch")
+    beamWidth: int = Field(5, ge=1, le=20, description="Beam width for beam search decoders")
+
+    # Other
+    min_size: int = Field(10, ge=1, description="Minimum text box size in pixels")
+    rotation_info: Optional[List[int]] = Field(None, description="Rotation angles to try: [90, 180, 270]")
+
+    # Page range
+    start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
+    end_page: int = Field(10, ge=1, description="End page index (exclusive)")
+    save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
+
+
+class EvaluateResponse(BaseModel):
+    """Response schema matching CLI output."""
+    CER: float
+    WER: float
+    TIME: float
+    PAGES: int
+    TIME_PER_PAGE: float
+
+
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    dataset_loaded: bool
+    dataset_size: Optional[int] = None
+    languages: Optional[List[str]] = None
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None
+
+
+def assemble_easyocr_result(result: list) -> str:
+    """
+    Assemble EasyOCR result into text.
+    EasyOCR returns: [(bbox, text, confidence), ...]
+    """
+    if not result:
+        return ""
+
+    # Sort by vertical position (y), then horizontal (x)
+    # bbox format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+    def get_y_center(item):
+        bbox = item[0]
+        return (bbox[0][1] + bbox[2][1]) / 2
+
+    def get_x(item):
+        return item[0][0][0]
+
+    # Group by lines based on y-center
+    sorted_items = sorted(result, key=lambda x: (get_y_center(x), get_x(x)))
+
+    if not sorted_items:
+        return ""
+
+    # Adaptive line tolerance
+    heights = []
+    for item in sorted_items:
+        bbox = item[0]
+        h = abs(bbox[2][1] - bbox[0][1])
+        heights.append(h)
+
+    median_h = float(np.median(heights)) if heights else 20.0
+    line_tol = max(8.0, 0.6 * median_h)
+
+    lines, cur_line, last_y = [], [], None
+    for item in sorted_items:
+        y_center = get_y_center(item)
+        text = item[1]
+
+        if last_y is None or abs(y_center - last_y) <= line_tol:
+            cur_line.append((get_x(item), text))
+        else:
+            cur_line.sort(key=lambda t: t[0])
+            lines.append(" ".join(t[1] for t in cur_line))
+            cur_line = [(get_x(item), text)]
+        last_y = y_center
+
+    if cur_line:
+        cur_line.sort(key=lambda t: t[0])
+        lines.append(" ".join(t[1] for t in cur_line))
+
+    text = " ".join(lines)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def evaluate_text(reference: str, prediction: str) -> dict:
+    """Calculate WER and CER metrics."""
+    return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
+
+
+@app.get("/health", response_model=HealthResponse)
+def health_check():
+    """Check if the service is ready."""
+    gpu_info = get_gpu_info()
+    return HealthResponse(
+        status="ok" if state.reader is not None else "initializing",
+        model_loaded=state.reader is not None,
+        dataset_loaded=state.dataset is not None,
+        dataset_size=len(state.dataset) if state.dataset else None,
+        languages=state.languages,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
+    )
+
+
+@app.post("/evaluate", response_model=EvaluateResponse)
+def evaluate(request: EvaluateRequest):
+    """
+    Evaluate OCR with given hyperparameters.
+    Returns CER, WER, and timing metrics.
+    """
+    if state.reader is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+
+    # Validate decoder
+    if request.decoder not in ["greedy", "beamsearch", "wordbeamsearch"]:
+        raise HTTPException(status_code=400, detail=f"Invalid decoder: {request.decoder}")
+
+    # Load or reload dataset if path changed
+    if state.dataset is None or state.dataset_path != request.pdf_folder:
+        if not os.path.isdir(request.pdf_folder):
+            raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
+        state.dataset = ImageTextDataset(request.pdf_folder)
+        state.dataset_path = request.pdf_folder
+
+    if len(state.dataset) == 0:
+        raise HTTPException(status_code=400, detail="Dataset is empty")
+
+    # Validate page range
+    start = request.start_page
+    end = min(request.end_page, len(state.dataset))
+    if start >= end:
+        raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
+
+    cer_list, wer_list = [], []
+    time_per_page_list = []
+    t0 = time.time()
+
+    # Lock to prevent concurrent OCR access (model is not thread-safe)
+    with state.lock:
+        for idx in range(start, end):
+            img, ref = state.dataset[idx]
+            arr = np.array(img)
+
+            tp0 = time.time()
+            result = state.reader.readtext(
+                arr,
+                # Detection thresholds
+                text_threshold=request.text_threshold,
+                low_text=request.low_text,
+                link_threshold=request.link_threshold,
+                # Bounding box merging
+                slope_ths=request.slope_ths,
+                ycenter_ths=request.ycenter_ths,
+                height_ths=request.height_ths,
+                width_ths=request.width_ths,
+                add_margin=request.add_margin,
+                # Contrast
+                contrast_ths=request.contrast_ths,
+                adjust_contrast=request.adjust_contrast,
+                # Decoder
+                decoder=request.decoder,
+                beamWidth=request.beamWidth,
+                # Other
+                min_size=request.min_size,
+                rotation_info=request.rotation_info,
+            )
+
+            pred = assemble_easyocr_result(result)
+            time_per_page_list.append(float(time.time() - tp0))
+
+            # Save prediction to debugset if requested
+            if request.save_output:
+                out_path = state.dataset.get_output_path(idx, "easyocr_text")
+                with open(out_path, "w", encoding="utf-8") as f:
+                    f.write(pred)
+
+            m = evaluate_text(ref, pred)
+            cer_list.append(m["CER"])
+            wer_list.append(m["WER"])
+
+    return EvaluateResponse(
+        CER=float(np.mean(cer_list)) if cer_list else 1.0,
+        WER=float(np.mean(wer_list)) if wer_list else 1.0,
+        TIME=float(time.time() - t0),
+        PAGES=len(cer_list),
+        TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
+    )
+
+
+@app.post("/evaluate_full", response_model=EvaluateResponse)
+def evaluate_full(request: EvaluateRequest):
+    """Evaluate on ALL pages (ignores start_page/end_page)."""
+    request.start_page = 0
+    request.end_page = 9999
+    return evaluate(request)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/src/easyocr_service/requirements.txt
+++ b/src/easyocr_service/requirements.txt
@@ -0,0 +1,8 @@
+easyocr>=1.7.0
+fastapi>=0.104.0
+uvicorn>=0.24.0
+pydantic>=2.0.0
+jiwer>=3.0.0
+numpy>=1.24.0
+pillow>=10.0.0
+torch>=2.0.0
--- a/src/paddle_ocr/Dockerfile.build-paddle
+++ b/src/paddle_ocr/Dockerfile.build-paddle
@@ -0,0 +1,213 @@
+# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
+#
+# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
+# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
+#
+# Build time: ~1-2 hours with caching, 2-4 hours first build
+# Output: /output/paddlepaddle_gpu-*.whl
+#
+# Usage:
+#   CUDA_ARCH=90 docker compose --profile build run --rm build-paddle
+#
+# Features:
+# - ccache for compiler caching (survives rebuilds)
+# - Split build stages for better layer caching
+# - ARM64 -m64 patch applied automatically
+
+# syntax=docker/dockerfile:1.4
+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="PaddlePaddle GPU wheel builder for ARM64"
+
+# Build arguments
+ARG PADDLE_VERSION=v3.0.0
+ARG PYTHON_VERSION=3.11
+ARG CUDA_ARCH=90
+
+# Environment setup
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CCACHE_DIR=/ccache
+ENV PATH="/usr/lib/ccache:${PATH}"
+
+# Install build dependencies + ccache
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python${PYTHON_VERSION} \
+    python${PYTHON_VERSION}-dev \
+    python${PYTHON_VERSION}-venv \
+    python3-pip \
+    build-essential \
+    cmake \
+    ninja-build \
+    git \
+    wget \
+    curl \
+    pkg-config \
+    ccache \
+    libssl-dev \
+    libffi-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    liblzma-dev \
+    libncurses5-dev \
+    libncursesw5-dev \
+    libgflags-dev \
+    libgoogle-glog-dev \
+    libprotobuf-dev \
+    protobuf-compiler \
+    patchelf \
+    libopenblas-dev \
+    liblapack-dev \
+    swig \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
+    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
+
+# Setup ccache symlinks for CUDA
+RUN mkdir -p /usr/lib/ccache && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/c++
+
+# Upgrade pip and install Python build dependencies
+RUN python -m pip install --upgrade pip setuptools wheel && \
+    python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
+
+WORKDIR /build
+
+# Clone PaddlePaddle repository
+RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
+
+WORKDIR /build/Paddle
+
+# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64)
+RUN sed -i 's/-m64//g' cmake/flags.cmake && \
+    sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
+    find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
+    echo "Patched -m64 flag for ARM64 compatibility"
+
+# Patch for ARM64: Install sse2neon to translate x86 SSE intrinsics to ARM NEON
+# sse2neon provides drop-in replacements for x86 SIMD headers
+RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
+    mkdir -p /usr/local/include/sse2neon && \
+    cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
+    rm -rf /tmp/sse2neon && \
+    echo "Installed sse2neon for x86->ARM NEON translation"
+
+# Create wrapper headers that use sse2neon for ARM64
+RUN mkdir -p /usr/local/include/x86_stubs && \
+    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/immintrin.h && \
+    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
+    echo "#else" >> /usr/local/include/x86_stubs/immintrin.h && \
+    echo "#include_next <immintrin.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
+    echo "#endif" >> /usr/local/include/x86_stubs/immintrin.h && \
+    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/xmmintrin.h && \
+    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
+    echo "#else" >> /usr/local/include/x86_stubs/xmmintrin.h && \
+    echo "#include_next <xmmintrin.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
+    echo "#endif" >> /usr/local/include/x86_stubs/xmmintrin.h && \
+    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/emmintrin.h && \
+    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
+    echo "#else" >> /usr/local/include/x86_stubs/emmintrin.h && \
+    echo "#include_next <emmintrin.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
+    echo "#endif" >> /usr/local/include/x86_stubs/emmintrin.h && \
+    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/pmmintrin.h && \
+    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
+    echo "#else" >> /usr/local/include/x86_stubs/pmmintrin.h && \
+    echo "#include_next <pmmintrin.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
+    echo "#endif" >> /usr/local/include/x86_stubs/pmmintrin.h && \
+    echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/smmintrin.h && \
+    echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
+    echo "#else" >> /usr/local/include/x86_stubs/smmintrin.h && \
+    echo "#include_next <smmintrin.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
+    echo "#endif" >> /usr/local/include/x86_stubs/smmintrin.h && \
+    echo "Created x86 intrinsic wrapper headers for ARM64 using sse2neon"
+
+# Install additional Python requirements for building
+RUN pip install -r python/requirements.txt || true
+
+# Create build directory
+RUN mkdir -p build
+WORKDIR /build/Paddle/build
+
+# Configure CMake for ARM64 + CUDA build
+# Note: -Wno-class-memaccess fixes Eigen NEON warning on ARM64
+RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
+    cmake .. \
+    -GNinja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DPY_VERSION=${PYTHON_VERSION} \
+    -DWITH_GPU=ON \
+    -DWITH_TESTING=OFF \
+    -DWITH_DISTRIBUTE=OFF \
+    -DWITH_NCCL=OFF \
+    -DWITH_MKL=OFF \
+    -DWITH_MKLDNN=OFF \
+    -DON_INFER=OFF \
+    -DWITH_PYTHON=ON \
+    -DWITH_AVX=OFF \
+    -DCUDA_ARCH_NAME=Manual \
+    -DCUDA_ARCH_BIN="${CUDA_ARCH}" \
+    -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
+    -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+    -DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs" \
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+
+# Build external dependencies first (cacheable layer)
+RUN --mount=type=cache,target=/ccache \
+    ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
+
+# Build flashattn (heaviest dependency, separate layer for caching)
+RUN --mount=type=cache,target=/ccache \
+    ninja extern_flashattn
+
+# Build remaining external dependencies
+RUN --mount=type=cache,target=/ccache \
+    ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
+
+# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM)
+RUN --mount=type=cache,target=/ccache \
+    ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
+
+# Build the Python wheel
+RUN ninja paddle_python || true
+
+# Create output directory
+RUN mkdir -p /output
+
+# Build wheel package - try multiple methods since PaddlePaddle build structure varies
+WORKDIR /build/Paddle
+RUN echo "=== Looking for wheel build method ===" && \
+    ls -la python/ 2>/dev/null && \
+    ls -la build/python/ 2>/dev/null && \
+    if [ -f build/python/setup.py ]; then \
+        echo "Using build/python/setup.py" && \
+        cd build/python && python setup.py bdist_wheel; \
+    elif [ -f python/setup.py ]; then \
+        echo "Using python/setup.py" && \
+        cd python && python setup.py bdist_wheel; \
+    else \
+        echo "Looking for existing wheel..." && \
+        find /build -name "paddlepaddle*.whl" -type f 2>/dev/null; \
+    fi
+
+# Copy wheel to output
+RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
+    ls -la /output/ && \
+    if [ ! "$(ls -A /output/*.whl 2>/dev/null)" ]; then \
+        echo "ERROR: No wheel found!" && exit 1; \
+    fi
+
+# List what was built
+RUN ls -la /output/ && \
+    echo "=== Build complete ===" && \
+    find /build -name "*.whl" -type f 2>/dev/null
+
+# Default command: copy wheel to mounted volume
+CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]
--- a/src/paddle_ocr/Dockerfile.build-paddle-cpu
+++ b/src/paddle_ocr/Dockerfile.build-paddle-cpu
@@ -0,0 +1,149 @@
+# Dockerfile.build-paddle-cpu - Build PaddlePaddle CPU wheel for ARM64
+#
+# Required because PyPI wheels don't work on ARM64 (x86 SSE instructions).
+#
+# Build time: ~1-2 hours
+# Output: /output/paddlepaddle-*.whl
+#
+# Usage:
+#   docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
+#   docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
+
+# syntax=docker/dockerfile:1.4
+FROM ubuntu:22.04
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="PaddlePaddle CPU wheel builder for ARM64"
+
+ARG PADDLE_VERSION=v3.0.0
+ARG PYTHON_VERSION=3.11
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CCACHE_DIR=/ccache
+ENV PATH="/usr/lib/ccache:${PATH}"
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python${PYTHON_VERSION} \
+    python${PYTHON_VERSION}-dev \
+    python${PYTHON_VERSION}-venv \
+    python3-pip \
+    build-essential \
+    cmake \
+    ninja-build \
+    git \
+    wget \
+    curl \
+    pkg-config \
+    ccache \
+    libssl-dev \
+    libffi-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    liblzma-dev \
+    libncurses5-dev \
+    libncursesw5-dev \
+    libgflags-dev \
+    libgoogle-glog-dev \
+    libprotobuf-dev \
+    protobuf-compiler \
+    patchelf \
+    libopenblas-dev \
+    liblapack-dev \
+    swig \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
+    && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
+
+# Setup ccache
+RUN mkdir -p /usr/lib/ccache && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
+    ln -sf /usr/bin/ccache /usr/lib/ccache/c++
+
+RUN python -m pip install --upgrade pip setuptools wheel && \
+    python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
+
+WORKDIR /build
+RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
+
+WORKDIR /build/Paddle
+
+# Patch -m64 flag (x86_64 specific)
+RUN sed -i 's/-m64//g' cmake/flags.cmake && \
+    sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
+    find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true
+
+# Install sse2neon for x86 SSE -> ARM NEON translation
+RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
+    mkdir -p /usr/local/include/sse2neon && \
+    cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
+    rm -rf /tmp/sse2neon
+
+# Create x86 intrinsic wrapper headers
+RUN mkdir -p /usr/local/include/x86_stubs && \
+    for h in immintrin xmmintrin emmintrin pmmintrin smmintrin; do \
+        echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/${h}.h && \
+        echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/${h}.h && \
+        echo "#else" >> /usr/local/include/x86_stubs/${h}.h && \
+        echo "#include_next <${h}.h>" >> /usr/local/include/x86_stubs/${h}.h && \
+        echo "#endif" >> /usr/local/include/x86_stubs/${h}.h; \
+    done
+
+RUN pip install -r python/requirements.txt || true
+
+RUN mkdir -p build
+WORKDIR /build/Paddle/build
+
+# Configure for CPU-only ARM64 build
+# WITH_ARM=ON enables ARM NEON optimizations and disables x86-specific code (XBYAK, MKL)
+RUN cmake .. \
+    -GNinja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DPY_VERSION=${PYTHON_VERSION} \
+    -DWITH_GPU=OFF \
+    -DWITH_ARM=ON \
+    -DWITH_TESTING=OFF \
+    -DWITH_DISTRIBUTE=OFF \
+    -DWITH_NCCL=OFF \
+    -DWITH_MKL=OFF \
+    -DWITH_MKLDNN=OFF \
+    -DWITH_XBYAK=OFF \
+    -DON_INFER=OFF \
+    -DWITH_PYTHON=ON \
+    -DWITH_AVX=OFF \
+    -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+    -DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs"
+
+# Build external dependencies
+RUN --mount=type=cache,target=/ccache \
+    ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
+
+# Note: extern_xbyak excluded - it's x86-only and disabled with WITH_ARM=ON
+RUN --mount=type=cache,target=/ccache \
+    ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo
+
+# Build PaddlePaddle
+RUN --mount=type=cache,target=/ccache \
+    ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
+
+RUN ninja paddle_python || true
+
+RUN mkdir -p /output
+
+WORKDIR /build/Paddle
+RUN if [ -f build/python/setup.py ]; then \
+        cd build/python && python setup.py bdist_wheel; \
+    elif [ -f python/setup.py ]; then \
+        cd python && python setup.py bdist_wheel; \
+    fi
+
+RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
+    ls -la /output/
+
+CMD ["sh", "-c", "cp /output/*.whl /wheels/ && ls -la /wheels/"]
--- a/src/paddle_ocr/Dockerfile.cpu
+++ b/src/paddle_ocr/Dockerfile.cpu
@@ -0,0 +1,81 @@
+# Dockerfile.cpu - Multi-stage CPU Dockerfile
+#
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
+#
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
+#
+# Or build all at once:
+#   docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
+
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM python:3.11-slim AS base
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="PaddleOCR Base Image - CPU dependencies"
+
+WORKDIR /app
+
+# Install system dependencies for OpenCV and PaddleOCR
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy local wheels directory (may contain ARM64 wheel from build-paddle-cpu)
+COPY wheels/ /tmp/wheels/
+
+# Install paddlepaddle: prefer local wheel (ARM64), fallback to PyPI (x86_64)
+RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
+        echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
+        pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
+    else \
+        echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \
+        pip install --no-cache-dir paddlepaddle==3.0.0; \
+    fi && \
+    rm -rf /tmp/wheels
+
+# Install remaining Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - CPU version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
+COPY paddle_ocr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Build arguments for models
+ARG DET_MODEL=PP-OCRv5_server_det
+ARG REC_MODEL=PP-OCRv5_server_rec
+
+# Set as environment variables (can be overridden at runtime)
+ENV PADDLE_DET_MODEL=${DET_MODEL}
+ENV PADDLE_REC_MODEL=${REC_MODEL}
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.paddlex"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/paddle_ocr/Dockerfile.gpu
+++ b/src/paddle_ocr/Dockerfile.gpu
@@ -0,0 +1,105 @@
+# Dockerfile.gpu - Multi-stage GPU Dockerfile
+#
+# Build base only (push to registry, rarely changes):
+#   docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
+#
+# Build deploy (uses base, fast - code only):
+#   docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
+#
+# Or build all at once:
+#   docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
+
+# =============================================================================
+# STAGE 1: BASE - All dependencies (rarely changes)
+# =============================================================================
+FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
+
+LABEL maintainer="Sergio Jimenez"
+LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
+
+WORKDIR /app
+
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_VISIBLE_DEVICES=0
+
+# Install Python 3.11 and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3.11 \
+    python3.11-venv \
+    python3-pip \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sf /usr/bin/python3.11 /usr/bin/python
+
+# Fix cuDNN library path for ARM64 only (PaddlePaddle looks in /usr/local/cuda/lib64)
+# x86_64 doesn't need this - PyPI wheel handles paths correctly
+RUN if [ "$(uname -m)" = "aarch64" ]; then \
+        mkdir -p /usr/local/cuda/lib64 && \
+        ln -sf /usr/lib/aarch64-linux-gnu/libcudnn*.so* /usr/local/cuda/lib64/ && \
+        ln -sf /usr/lib/aarch64-linux-gnu/libcudnn.so.9 /usr/local/cuda/lib64/libcudnn.so && \
+        ldconfig; \
+    fi
+
+# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
+COPY wheels/ /tmp/wheels/
+
+# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
+RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
+        echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
+        python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
+    else \
+        echo "=== Installing PaddlePaddle from CUDA index (x86_64) ===" && \
+        python -m pip install --no-cache-dir paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/; \
+    fi && \
+    rm -rf /tmp/wheels
+
+# Install remaining dependencies
+RUN python -m pip install --no-cache-dir \
+    paddleocr==3.3.2 \
+    jiwer \
+    numpy \
+    fastapi \
+    "uvicorn[standard]" \
+    pydantic \
+    Pillow
+
+# =============================================================================
+# STAGE 2: DEPLOY - Application code (changes frequently)
+# =============================================================================
+FROM base AS deploy
+
+LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
+
+WORKDIR /app
+
+# Copy application code (this is the only layer that changes frequently)
+COPY paddle_ocr_tuning_rest.py .
+COPY dataset_manager.py .
+
+# Build arguments for models
+ARG DET_MODEL=PP-OCRv5_server_det
+ARG REC_MODEL=PP-OCRv5_server_rec
+
+# Set as environment variables (can be overridden at runtime)
+ENV PADDLE_DET_MODEL=${DET_MODEL}
+ENV PADDLE_REC_MODEL=${REC_MODEL}
+
+# Volume for dataset and model cache
+VOLUME ["/app/dataset", "/root/.paddlex"]
+
+# Expose API port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
+
+# Run the API server
+CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/src/paddle_ocr/README.md
+++ b/src/paddle_ocr/README.md
@@ -0,0 +1,824 @@
+# PaddleOCR Tuning REST API
+
+REST API service for PaddleOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
+
+## Quick Start with Docker Compose
+
+Docker Compose manages building and running containers. The `docker-compose.yml` defines two services:
+- `ocr-cpu` - CPU-only version (works everywhere)
+- `ocr-gpu` - GPU version (requires NVIDIA GPU + Container Toolkit)
+
+### Run CPU Version
+
+```bash
+cd src/paddle_ocr
+
+# Build and start (first time takes ~2-3 min to build, ~30s to load model)
+docker compose up ocr-cpu
+
+# Or run in background (detached)
+docker compose up -d ocr-cpu
+
+# View logs
+docker compose logs -f ocr-cpu
+
+# Stop
+docker compose down
+```
+
+### Run GPU Version
+
+```bash
+# Requires: NVIDIA GPU + nvidia-container-toolkit installed
+docker compose up ocr-gpu
+```
+
+### Test the API
+
+Once running, test with:
+```bash
+# Check health
+curl http://localhost:8000/health
+
+# Or use the test script
+pip install requests
+python test.py --url http://localhost:8000
+```
+
+### What Docker Compose Does
+
+```
+docker compose up ocr-cpu
+       │
+       ├─► Builds image from Dockerfile.cpu (if not exists)
+       ├─► Creates container "paddle-ocr-cpu"
+       ├─► Mounts ../dataset → /app/dataset (your PDF images)
+       ├─► Mounts paddlex-cache volume (persists downloaded models)
+       ├─► Exposes port 8000
+       └─► Runs: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
+```
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `paddle_ocr_tuning_rest.py` | FastAPI REST service |
+| `dataset_manager.py` | Dataset loader |
+| `test.py` | API test client |
+| `Dockerfile.cpu` | CPU-only image (x86_64 + ARM64 with local wheel) |
+| `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) |
+| `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 |
+| `Dockerfile.build-paddle-cpu` | PaddlePaddle CPU wheel builder for ARM64 |
+| `docker-compose.yml` | Service orchestration |
+| `docker-compose.cpu-registry.yml` | Pull CPU image from registry |
+| `docker-compose.gpu-registry.yml` | Pull GPU image from registry |
+| `wheels/` | Local PaddlePaddle wheels (created by build-paddle) |
+
+## API Endpoints
+
+### `GET /health`
+Check if service is ready.
+
+```json
+{"status": "ok", "model_loaded": true, "dataset_loaded": true, "dataset_size": 24}
+```
+
+### `POST /evaluate`
+Run OCR evaluation with given hyperparameters.
+
+**Request:**
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "textline_orientation": true,
+  "use_doc_orientation_classify": false,
+  "use_doc_unwarping": false,
+  "text_det_thresh": 0.469,
+  "text_det_box_thresh": 0.5412,
+  "text_det_unclip_ratio": 0.0,
+  "text_rec_score_thresh": 0.635,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+**Response:**
+```json
+{"CER": 0.0115, "WER": 0.0989, "TIME": 330.5, "PAGES": 5, "TIME_PER_PAGE": 66.1}
+```
+
+### `POST /evaluate_full`
+Same as `/evaluate` but runs on ALL pages (ignores start_page/end_page).
+
+## Debug Output (debugset)
+
+The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
+
+### Enable Debug Output
+
+```json
+{
+  "pdf_folder": "/app/dataset",
+  "save_output": true,
+  "start_page": 5,
+  "end_page": 10
+}
+```
+
+### Output Structure
+
+```
+debugset/
+├── doc1/
+│   └── paddle_ocr/
+│       ├── page_0005.txt
+│       ├── page_0006.txt
+│       └── ...
+├── doc2/
+│   └── paddle_ocr/
+│       └── ...
+```
+
+Each `.txt` file contains the OCR-extracted text for that page.
+
+### Docker Mount
+
+The `debugset` folder is mounted read-write in docker-compose:
+
+```yaml
+volumes:
+  - ../debugset:/app/debugset:rw
+```
+
+### Use Cases
+
+- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
+- **Debug hyperparameters**: See how different settings affect text extraction
+- **Ground truth comparison**: Compare predictions against expected output
+
+## Building Images
+
+### CPU Image (Multi-Architecture)
+
+```bash
+# Local build (current architecture)
+docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
+
+# Multi-arch build with buildx (amd64 + arm64)
+docker buildx create --name multiarch --use
+docker buildx build -f Dockerfile.cpu \
+  --platform linux/amd64,linux/arm64 \
+  -t paddle-ocr-api:cpu \
+  --push .
+```
+
+### GPU Image (x86_64 + ARM64 with local wheel)
+
+```bash
+docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu .
+```
+
+> **Note:** PaddlePaddle GPU 3.x packages are **not on PyPI**. The Dockerfile installs from PaddlePaddle's official CUDA index (`paddlepaddle.org.cn/packages/stable/cu126/`). This is handled automatically during build.
+
+## Running
+
+### CPU (Any machine)
+
+```bash
+docker run -d -p 8000:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v paddlex-cache:/root/.paddlex \
+  paddle-ocr-api:cpu
+```
+
+### GPU (NVIDIA)
+
+```bash
+docker run -d -p 8000:8000 --gpus all \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  -v paddlex-cache:/root/.paddlex \
+  paddle-ocr-api:gpu
+```
+
+## GPU Support Analysis
+
+### Host System Reference (DGX Spark)
+
+This section documents GPU support findings based on testing on an NVIDIA DGX Spark:
+
+| Component | Value |
+|-----------|-------|
+| Architecture | ARM64 (aarch64) |
+| CPU | NVIDIA Grace (ARM) |
+| GPU | NVIDIA GB10 |
+| CUDA Version | 13.0 |
+| Driver | 580.95.05 |
+| OS | Ubuntu 24.04 LTS |
+| Container Toolkit | nvidia-container-toolkit 1.18.1 |
+| Docker | 28.5.1 |
+| Docker Compose | v2.40.0 |
+
+### PaddlePaddle GPU Platform Support
+
+**Note:** PaddlePaddle-GPU does NOT have prebuilt ARM64 wheels on PyPI, but ARM64 support is available via custom-built wheels.
+
+| Platform | CPU | GPU |
+|----------|-----|-----|
+| Linux x86_64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
+| Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
+| macOS x64 | ✅ | ❌ |
+| macOS ARM64 (M1/M2) | ✅ | ❌ |
+| Linux ARM64 (Jetson/DGX) | ✅ | ⚠️ Limited - see Blackwell note |
+
+**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available on PyPI. ARM64 wheels must be built from source or downloaded from Gitea packages.
+
+### ARM64 GPU Support
+
+ARM64 GPU support is available but requires custom-built wheels:
+
+1. **No prebuilt PyPI wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist on PyPI
+2. **Custom wheels work**: This project provides Dockerfiles to build ARM64 GPU wheels from source
+3. **CI/CD builds ARM64 GPU images**: Pre-built wheels are available from Gitea packages
+
+**To use GPU on ARM64:**
+- Use the pre-built images from the container registry, or
+- Build the wheel locally using `Dockerfile.build-paddle` (see Option 2 below), or
+- Download the wheel from Gitea packages: `wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl`
+
+### ⚠️ Known Limitation: Blackwell GPU (sm_121 / GB10)
+
+**Status: GPU inference does NOT work on NVIDIA Blackwell GPUs (DGX Spark, GB200, etc.)**
+
+#### Symptoms
+
+When running PaddleOCR on Blackwell GPUs:
+- CUDA loads successfully ✅
+- Basic tensor operations work ✅
+- **Detection model outputs constant values** ❌
+- 0 text regions detected
+- CER/WER = 100% (nothing recognized)
+
+#### Root Cause
+
+**Confirmed:** PaddlePaddle's entire CUDA backend does NOT support Blackwell (sm_121). This is NOT just an inference model problem - even basic operations fail.
+
+**Test Results (January 2026):**
+
+1. **PTX JIT Test** (`CUDA_FORCE_PTX_JIT=1`):
+   ```
+   OSError: CUDA error(209), no kernel image is available for execution on the device.
+   [Hint: 'cudaErrorNoKernelImageForDevice']
+   ```
+   → Confirmed: No PTX code exists in PaddlePaddle binaries
+
+2. **Dynamic Graph Mode Test** (bypassing inference models):
+   ```
+   Conv2D + BatchNorm output:
+     Output min: 0.0000
+     Output max: 0.0000
+     Output mean: 0.0000
+   Dynamic graph mode: BROKEN (constant output)
+   ```
+   → Confirmed: Even simple nn.Conv2D produces zeros on Blackwell
+
+**Conclusion:** The issue is PaddlePaddle's compiled CUDA kernels (cubins), not just the inference models. The entire framework was compiled without sm_121 support and without PTX for JIT compilation.
+
+**Why building PaddlePaddle from source doesn't fix it:**
+
+1. ⚠️ Building with `CUDA_ARCH=121` requires CUDA 13.0+ (PaddlePaddle only supports up to CUDA 12.6)
+2. ❌ Even if you could build it, PaddleOCR models contain pre-compiled CUDA ops
+3. ❌ These model files were exported/compiled targeting sm_80/sm_90 architectures
+4. ❌ The model kernels execute on GPU but produce garbage output on sm_121
+
+**To truly fix this**, the PaddlePaddle team would need to:
+1. Add sm_121 to their model export pipeline
+2. Re-export all PaddleOCR models (PP-OCRv4, PP-OCRv5, etc.) with Blackwell support
+3. Release new model versions
+
+This is tracked in [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327).
+
+#### Debug Script
+
+Use the included debug script to verify this issue:
+
+```bash
+docker exec paddle-ocr-gpu python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
+```
+
+Expected output showing the problem:
+```
+OUTPUT ANALYSIS:
+  Shape: (1, 1, 640, 640)
+  Min: 0.000010
+  Max: 0.000010   # <-- Same as min = constant output
+  Mean: 0.000010
+
+DIAGNOSIS:
+  PROBLEM: Output is constant - model inference is broken!
+  This typically indicates GPU compute capability mismatch.
+```
+
+#### Workarounds
+
+1. **Use CPU mode** (recommended):
+   ```bash
+   docker compose up ocr-cpu
+   ```
+   The ARM Grace CPU is fast (~2-5 sec/page). This is the reliable option.
+
+2. **Use EasyOCR or DocTR with GPU**:
+   These use PyTorch which has official ARM64 CUDA wheels (cu128 index):
+   ```bash
+   # EasyOCR with GPU on DGX Spark
+   docker build -f ../easyocr_service/Dockerfile.gpu -t easyocr-gpu ../easyocr_service
+   docker run --gpus all -p 8002:8000 easyocr-gpu
+   ```
+
+3. **Wait for PaddlePaddle Blackwell support**:
+   Track [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327) for updates.
+
+#### GPU Support Matrix (Updated)
+
+| GPU Architecture | Compute | CPU | GPU |
+|------------------|---------|-----|-----|
+| Ampere (A100, A10) | sm_80 | ✅ | ✅ |
+| Hopper (H100, H200) | sm_90 | ✅ | ✅ |
+| **Blackwell (GB10, GB200)** | sm_121 | ✅ | ❌ Not supported |
+
+#### FAQ: Why Doesn't CUDA Backward Compatibility Work?
+
+**Q: CUDA normally runs older kernels on newer GPUs. Why doesn't this work for Blackwell?**
+
+Per [NVIDIA Blackwell Compatibility Guide](https://docs.nvidia.com/cuda/blackwell-compatibility-guide/):
+
+CUDA **can** run older code on newer GPUs via **PTX JIT compilation**:
+1. PTX (Parallel Thread Execution) is NVIDIA's intermediate representation
+2. If an app includes PTX code, the driver JIT-compiles it for the target GPU
+3. This allows sm_80 code to run on sm_121
+
+**The problem**: PaddleOCR inference models contain only pre-compiled **cubins** (SASS binary), not PTX. Without PTX, there's nothing to JIT-compile.
+
+We tested PTX JIT (January 2026):
+```bash
+# Force PTX JIT compilation
+docker run --gpus all -e CUDA_FORCE_PTX_JIT=1 paddle-ocr-gpu \
+  python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
+
+# Result:
+# OSError: CUDA error(209), no kernel image is available for execution on the device.
+```
+**Confirmed: No PTX exists** in PaddlePaddle binaries. The CUDA kernels are cubins-only (SASS binary), compiled for sm_80/sm_90 without PTX fallback.
+
+**Note on sm_121**: Per NVIDIA docs, "sm_121 is the same as sm_120 since the only difference is physically integrated CPU+GPU memory of Spark." The issue is general Blackwell (sm_12x) support, not Spark-specific.
+
+#### FAQ: Does Dynamic Graph Mode Work on Blackwell?
+
+**Q: Can I bypass inference models and use PaddlePaddle's dynamic graph mode?**
+
+**No.** We tested dynamic graph mode (January 2026):
+```bash
+# Test script runs: paddle.nn.Conv2D + paddle.nn.BatchNorm2D
+python /app/scripts/test_dynamic_mode.py
+
+# Result:
+# Input shape: [1, 3, 224, 224]
+# Output shape: [1, 64, 112, 112]
+# Output min: 0.0000
+# Output max: 0.0000  # <-- All zeros!
+# Output mean: 0.0000
+# Dynamic graph mode: BROKEN (constant output)
+```
+
+**Conclusion:** The problem isn't limited to inference models. PaddlePaddle's core CUDA kernels (Conv2D, BatchNorm, etc.) produce garbage on sm_121. The entire framework lacks Blackwell support.
+
+#### FAQ: Can I Run AMD64 Containers on ARM64 DGX Spark?
+
+**Q: Can I just run the working x86_64 GPU image via emulation?**
+
+**Short answer: Yes for CPU, No for GPU.**
+
+You can run amd64 containers via QEMU emulation:
+```bash
+# Install QEMU
+sudo apt-get install qemu binfmt-support qemu-user-static
+docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+
+# Run amd64 container
+docker run --platform linux/amd64 paddle-ocr-gpu:amd64 ...
+```
+
+**But GPU doesn't work:**
+- QEMU emulates CPU instructions (x86 → ARM)
+- **QEMU user-mode does NOT support GPU passthrough**
+- GPU calls from emulated x86 code cannot reach the ARM64 GPU
+
+So even if the amd64 image works on x86_64:
+- ❌ No GPU access through QEMU
+- ❌ CPU emulation is 10-100x slower than native ARM64
+- ❌ Defeats the purpose entirely
+
+| Approach | CPU | GPU | Speed |
+|----------|-----|-----|-------|
+| ARM64 native (CPU) | ✅ | N/A | Fast (~2-5s/page) |
+| ARM64 native (GPU) | ✅ | ❌ Blackwell issue | - |
+| AMD64 via QEMU | ⚠️ Works | ❌ No passthrough | 10-100x slower |
+
+### Options for ARM64 Systems
+
+#### Option 1: CPU-Only (Recommended)
+
+Use `Dockerfile.cpu` which works on ARM64:
+
+```bash
+# On DGX Spark
+docker compose up ocr-cpu
+
+# Or build directly
+docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
+```
+
+**Performance:** CPU inference on ARM64 Grace is surprisingly fast due to high core count. Expect ~2-5 seconds per page.
+
+#### Option 2: Build PaddlePaddle from Source (Docker-based)
+
+Use the included Docker builder to compile PaddlePaddle GPU for ARM64:
+
+```bash
+cd src/paddle_ocr
+
+# Step 1: Build the PaddlePaddle GPU wheel (one-time, 2-4 hours)
+docker compose --profile build run --rm build-paddle
+
+# Verify wheel was created
+ls -la wheels/paddlepaddle*.whl
+
+# Step 2: Build the GPU image (uses local wheel)
+docker compose build ocr-gpu
+
+# Step 3: Run with GPU
+docker compose up ocr-gpu
+
+# Verify GPU is working
+docker compose exec ocr-gpu python -c "import paddle; print(paddle.device.is_compiled_with_cuda())"
+```
+
+**What this does:**
+1. `build-paddle` compiles PaddlePaddle from source inside a CUDA container
+2. The wheel is saved to `./wheels/` directory
+3. `Dockerfile.gpu` detects the local wheel and uses it instead of PyPI
+
+**Caveats:**
+- Build takes 2-4 hours on first run
+- Requires ~20GB disk space during build
+- Not officially supported by PaddlePaddle team
+- May need adjustments for future PaddlePaddle versions
+
+See: [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327)
+
+#### Option 3: Alternative OCR Engines
+
+For ARM64 GPU acceleration, consider alternatives:
+
+| Engine | ARM64 GPU | Notes |
+|--------|-----------|-------|
+| **Tesseract** | ❌ CPU-only | Good fallback, widely available |
+| **EasyOCR** | ⚠️ Via PyTorch | PyTorch has ARM64 GPU support |
+| **TrOCR** | ⚠️ Via Transformers | Hugging Face Transformers + PyTorch |
+| **docTR** | ⚠️ Via TensorFlow/PyTorch | Both backends have ARM64 support |
+
+EasyOCR with PyTorch is a viable alternative:
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
+pip install easyocr
+```
+
+### x86_64 GPU Setup (Working)
+
+For x86_64 systems with NVIDIA GPU, the GPU Docker works:
+
+```bash
+# Verify GPU is accessible
+nvidia-smi
+
+# Verify Docker GPU access
+docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
+
+# Build and run GPU version
+docker compose up ocr-gpu
+```
+
+### GPU Docker Compose Configuration
+
+The `docker-compose.yml` configures GPU access via:
+
+```yaml
+deploy:
+  resources:
+    reservations:
+      devices:
+        - driver: nvidia
+          count: 1
+          capabilities: [gpu]
+```
+
+This requires Docker Compose v2 and nvidia-container-toolkit.
+
+## DGX Spark / ARM64 Quick Start
+
+For ARM64 systems (DGX Spark, Jetson, Graviton), use CPU-only:
+
+```bash
+cd src/paddle_ocr
+
+# Build ARM64-native CPU image
+docker build -f Dockerfile.cpu -t paddle-ocr-api:arm64 .
+
+# Run
+docker run -d -p 8000:8000 \
+  -v $(pwd)/../dataset:/app/dataset:ro \
+  paddle-ocr-api:arm64
+
+# Test
+curl http://localhost:8000/health
+```
+
+### Cross-Compile from x86_64
+
+Build ARM64 images from an x86_64 machine:
+
+```bash
+# Setup buildx for multi-arch
+docker buildx create --name mybuilder --use
+
+# Build ARM64 image from x86_64 machine
+docker buildx build -f Dockerfile.cpu \
+  --platform linux/arm64 \
+  -t paddle-ocr-api:arm64 \
+  --load .
+
+# Save and transfer to DGX Spark
+docker save paddle-ocr-api:arm64 | gzip > paddle-ocr-arm64.tar.gz
+scp paddle-ocr-arm64.tar.gz dgx-spark:~/
+
+# On DGX Spark:
+docker load < paddle-ocr-arm64.tar.gz
+```
+
+## Using with Ray Tune
+
+### Multi-Worker Setup for Parallel Trials
+
+Run multiple workers for parallel hyperparameter tuning:
+
+```bash
+cd src/paddle_ocr
+
+# Start 2 CPU workers (ports 8001-8002)
+sudo docker compose -f docker-compose.workers.yml --profile cpu up -d
+
+# Or for GPU workers (if supported)
+sudo docker compose -f docker-compose.workers.yml --profile gpu up -d
+
+# Check workers are healthy
+curl http://localhost:8001/health
+curl http://localhost:8002/health
+```
+
+Then run the notebook with `max_concurrent_trials=2` to use both workers in parallel.
+
+### Single Worker Setup
+
+Update your notebook's `trainable_paddle_ocr` function:
+
+```python
+import requests
+
+API_URL = "http://localhost:8000/evaluate"
+
+def trainable_paddle_ocr(config):
+    """Call OCR API instead of subprocess."""
+    payload = {
+        "pdf_folder": "/app/dataset",
+        "use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
+        "use_doc_unwarping": config.get("use_doc_unwarping", False),
+        "textline_orientation": config.get("textline_orientation", True),
+        "text_det_thresh": config.get("text_det_thresh", 0.0),
+        "text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
+        "text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
+        "text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
+    }
+
+    try:
+        response = requests.post(API_URL, json=payload, timeout=600)
+        response.raise_for_status()
+        metrics = response.json()
+        tune.report(metrics=metrics)
+    except Exception as e:
+        tune.report({"CER": 1.0, "WER": 1.0, "ERROR": str(e)[:500]})
+```
+
+## Architecture: Model Lifecycle
+
+The model is loaded **once** at container startup and stays in memory for all requests:
+
+```mermaid
+flowchart TB
+    subgraph Container["Docker Container Lifecycle"]
+        Start([Container Start]) --> Load[Load PaddleOCR Models<br/>~10-30s one-time cost]
+        Load --> Ready[API Ready<br/>Models in RAM ~500MB]
+
+        subgraph Requests["Incoming Requests - Models Stay Loaded"]
+            Ready --> R1[Request 1] --> Ready
+            Ready --> R2[Request 2] --> Ready
+            Ready --> RN[Request N...] --> Ready
+        end
+
+        Ready --> Stop([Container Stop])
+        Stop --> Free[Models Freed]
+    end
+
+    style Load fill:#f9f,stroke:#333
+    style Ready fill:#9f9,stroke:#333
+    style Requests fill:#e8f4ea,stroke:#090
+```
+
+**Subprocess vs REST API comparison:**
+
+```mermaid
+flowchart LR
+    subgraph Subprocess["❌ Subprocess Approach"]
+        direction TB
+        S1[Trial 1] --> L1[Load Model ~10s]
+        L1 --> E1[Evaluate ~60s]
+        E1 --> U1[Unload]
+        U1 --> S2[Trial 2]
+        S2 --> L2[Load Model ~10s]
+        L2 --> E2[Evaluate ~60s]
+    end
+
+    subgraph REST["✅ REST API Approach"]
+        direction TB
+        Start2[Start Container] --> Load2[Load Model ~10s]
+        Load2 --> Ready2[Model in Memory]
+        Ready2 --> T1[Trial 1 ~60s]
+        T1 --> Ready2
+        Ready2 --> T2[Trial 2 ~60s]
+        T2 --> Ready2
+        Ready2 --> TN[Trial N ~60s]
+    end
+
+    style L1 fill:#faa
+    style L2 fill:#faa
+    style Load2 fill:#afa
+    style Ready2 fill:#afa
+```
+
+## Performance Comparison
+
+| Approach | Model Load | Per-Trial Overhead | 64 Trials |
+|----------|------------|-------------------|-----------|
+| Subprocess (original) | Every trial (~10s) | ~10s | ~7 hours |
+| Docker per trial | Every trial (~10s) | ~12-15s | ~7.5 hours |
+| **REST API** | **Once** | **~0.1s** | **~5.8 hours** |
+
+The REST API saves ~1+ hour by loading the model only once.
+
+## Troubleshooting
+
+### Model download slow on first run
+The first run downloads ~500MB of models. Use volume `paddlex-cache` to persist them.
+
+### Out of memory
+Reduce `max_concurrent_trials` in Ray Tune, or increase container memory:
+```bash
+docker run --memory=8g ...
+```
+
+### GPU not detected
+Ensure NVIDIA Container Toolkit is installed:
+```bash
+nvidia-smi  # Should work
+docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi  # Should work
+```
+
+### PaddlePaddle GPU installation fails
+PaddlePaddle 3.x GPU packages are **not available on PyPI**. They must be installed from PaddlePaddle's official index:
+```bash
+# For CUDA 12.x
+pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+
+# For CUDA 11.8
+pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
+```
+The Dockerfile.gpu handles this automatically.
+
+## CI/CD Pipeline
+
+The project includes a Gitea Actions workflow (`.gitea/workflows/ci.yaml`) for automated builds.
+
+### What CI Builds
+
+| Image | Architecture | Source |
+|-------|--------------|--------|
+| `paddle-ocr-cpu:amd64` | amd64 | PyPI paddlepaddle |
+| `paddle-ocr-cpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
+| `paddle-ocr-gpu:amd64` | amd64 | PyPI paddlepaddle-gpu |
+| `paddle-ocr-gpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
+
+### ARM64 Wheel Workflow
+
+Since PyPI wheels don't work on ARM64 (x86 SSE instructions), wheels must be built from source using sse2neon:
+
+1. Built manually on an ARM64 machine (one-time)
+2. Uploaded to Gitea generic packages
+3. Downloaded by CI when building ARM64 images
+
+#### Step 1: Build ARM64 Wheels (One-time, on ARM64 machine)
+
+```bash
+cd src/paddle_ocr
+
+# Build GPU wheel (requires NVIDIA GPU, takes 1-2 hours)
+sudo docker build -t paddle-builder:gpu-arm64 -f Dockerfile.build-paddle .
+sudo docker run --rm -v ./wheels:/wheels paddle-builder:gpu-arm64
+
+# Build CPU wheel (no GPU required, takes 1-2 hours)
+sudo docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
+sudo docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
+
+# Verify wheels were created
+ls -la wheels/paddlepaddle*.whl
+# paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl (GPU)
+# paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl (CPU)
+```
+
+#### Step 2: Upload Wheels to Gitea Packages
+
+```bash
+export GITEA_TOKEN="your-token-here"
+
+# Upload GPU wheel
+curl -X PUT \
+  -H "Authorization: token $GITEA_TOKEN" \
+  --upload-file wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl \
+  "https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl"
+
+# Upload CPU wheel
+curl -X PUT \
+  -H "Authorization: token $GITEA_TOKEN" \
+  --upload-file wheels/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl \
+  "https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl"
+```
+
+Wheels available at:
+```
+https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
+https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl
+```
+
+#### Step 3: CI Builds Images
+
+CI automatically:
+1. Downloads ARM64 wheels from Gitea packages (for arm64 builds only)
+2. Builds both CPU and GPU images for amd64 and arm64
+3. Pushes to registry with arch-specific tags
+
+### Required CI Secrets
+
+Configure these in Gitea repository settings:
+
+| Secret | Description |
+|--------|-------------|
+| `CI_READWRITE` | Gitea token with registry read/write access |
+
+### Manual Image Push
+
+```bash
+# Login to registry
+docker login seryus.ddns.net
+
+# Build and push CPU (multi-arch)
+docker buildx build -f Dockerfile.cpu \
+  --platform linux/amd64,linux/arm64 \
+  -t seryus.ddns.net/unir/paddle-ocr-api:cpu \
+  --push .
+
+# Build and push GPU (x86_64)
+docker build -f Dockerfile.gpu -t seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64 .
+docker push seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64
+
+# Build and push GPU (ARM64) - requires wheel in wheels/
+docker buildx build -f Dockerfile.gpu \
+  --platform linux/arm64 \
+  -t seryus.ddns.net/unir/paddle-ocr-api:gpu-arm64 \
+  --push .
+```
+
+### Updating the ARM64 Wheels
+
+When PaddlePaddle releases a new version:
+
+1. Update `PADDLE_VERSION` in `Dockerfile.build-paddle` and `Dockerfile.build-paddle-cpu`
+2. Rebuild both wheels on an ARM64 machine
+3. Upload to Gitea packages with new version
+4. Update `PADDLE_VERSION` in `.gitea/workflows/ci.yaml`
--- a/src/paddle_ocr/dataset_manager.py
+++ b/src/paddle_ocr/dataset_manager.py
@@ -0,0 +1,74 @@
+# Imports
+import os
+from PIL import Image
+
+
+class ImageTextDataset:
+    def __init__(self, root):
+        self.samples = []
+
+        for folder in sorted(os.listdir(root)):
+            sub = os.path.join(root, folder)
+            img_dir = os.path.join(sub, "img")
+            txt_dir = os.path.join(sub, "txt")
+
+            if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
+                continue
+
+            for fname in sorted(os.listdir(img_dir)):
+                if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
+                    continue
+
+                img_path = os.path.join(img_dir, fname)
+
+                # text file must have same name but .txt
+                txt_name = os.path.splitext(fname)[0] + ".txt"
+                txt_path = os.path.join(txt_dir, txt_name)
+
+                if not os.path.exists(txt_path):
+                    continue
+
+                self.samples.append((img_path, txt_path))
+    def __len__(self):
+        return len(self.samples)
+
+    def __getitem__(self, idx):
+        img_path, txt_path = self.samples[idx]
+
+        # Load image
+        image = Image.open(img_path).convert("RGB")
+
+        # Load text
+        with open(txt_path, "r", encoding="utf-8") as f:
+            text = f.read()
+
+        return image, text
+
+    def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
+        """Get output path for saving OCR result to debugset folder.
+
+        Args:
+            idx: Sample index
+            output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
+            debugset_root: Root folder for debug output (default: /app/debugset)
+
+        Returns:
+            Path like /app/debugset/doc1/{output_subdir}/page_001.txt
+        """
+        img_path, _ = self.samples[idx]
+        # img_path: /app/dataset/doc1/img/page_001.png
+        # Extract relative path: doc1/img/page_001.png
+        parts = img_path.split("/dataset/", 1)
+        if len(parts) == 2:
+            rel_path = parts[1]  # doc1/img/page_001.png
+        else:
+            rel_path = os.path.basename(img_path)
+
+        # Replace /img/ with /{output_subdir}/
+        rel_parts = rel_path.rsplit("/img/", 1)
+        doc_folder = rel_parts[0]  # doc1
+        fname = os.path.splitext(rel_parts[1])[0] + ".txt"  # page_001.txt
+
+        out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
+        os.makedirs(out_dir, exist_ok=True)
+        return os.path.join(out_dir, fname)
--- a/src/paddle_ocr/docker-compose.cpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.cpu-registry.yml
@@ -0,0 +1,26 @@
+# docker-compose.cpu-registry.yml - Pull CPU image from registry
+# Usage: docker compose -f docker-compose.cpu-registry.yml up
+
+services:
+  ocr-cpu:
+    image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
+    container_name: paddle-ocr-cpu-registry
+    ports:
+      - "8001:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - paddlex-cache:/root/.paddlex
+    environment:
+      - PYTHONUNBUFFERED=1
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
--- a/src/paddle_ocr/docker-compose.gpu-registry.yml
+++ b/src/paddle_ocr/docker-compose.gpu-registry.yml
@@ -0,0 +1,39 @@
+# docker-compose.gpu-registry.yml - Pull GPU image from registry
+# Usage: docker compose -f docker-compose.gpu-registry.yml up
+#
+# Requires: NVIDIA GPU + nvidia-container-toolkit installed
+
+services:
+  ocr-gpu:
+    image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
+    container_name: paddle-ocr-gpu-registry
+    ports:
+      - "8002:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - paddlex-cache:/root/.paddlex
+      - ./scripts:/app/scripts:ro
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
+      - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
--- a/src/paddle_ocr/docker-compose.workers.yml
+++ b/src/paddle_ocr/docker-compose.workers.yml
@@ -0,0 +1,140 @@
+# docker-compose.workers.yml - Multiple PaddleOCR workers for parallel Ray Tune
+#
+# Usage:
+#   GPU (4 workers sharing GPU):
+#     docker compose -f docker-compose.workers.yml up
+#
+#   CPU (4 workers):
+#     docker compose -f docker-compose.workers.yml --profile cpu up
+#
+#   Scale workers (e.g., 8 workers):
+#     NUM_WORKERS=8 docker compose -f docker-compose.workers.yml up
+#
+# Each worker runs on a separate port: 8001, 8002, 8003, 8004, ...
+
+x-ocr-gpu-common: &ocr-gpu-common
+  image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
+  volumes:
+    - ../dataset:/app/dataset:ro
+    - ../debugset:/app/debugset:rw
+    - paddlex-cache:/root/.paddlex
+  environment:
+    - PYTHONUNBUFFERED=1
+    - CUDA_VISIBLE_DEVICES=0
+  deploy:
+    resources:
+      reservations:
+        devices:
+          - driver: nvidia
+            count: 1
+            capabilities: [gpu]
+  restart: unless-stopped
+  healthcheck:
+    test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+    interval: 30s
+    timeout: 10s
+    retries: 3
+    start_period: 120s
+
+x-ocr-cpu-common: &ocr-cpu-common
+  image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
+  volumes:
+    - ../dataset:/app/dataset:ro
+    - ../debugset:/app/debugset:rw
+    - paddlex-cache:/root/.paddlex
+  environment:
+    - PYTHONUNBUFFERED=1
+  restart: unless-stopped
+  healthcheck:
+    test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+    interval: 30s
+    timeout: 10s
+    retries: 3
+    start_period: 120s
+
+services:
+  # GPU Workers (gpu profile) - share single GPU
+  ocr-worker-1:
+    <<: *ocr-gpu-common
+    container_name: paddle-ocr-worker-1
+    ports:
+      - "8001:8000"
+    profiles:
+      - gpu
+
+  ocr-worker-2:
+    <<: *ocr-gpu-common
+    container_name: paddle-ocr-worker-2
+    ports:
+      - "8002:8000"
+    profiles:
+      - gpu
+
+  ocr-worker-3:
+    <<: *ocr-gpu-common
+    container_name: paddle-ocr-worker-3
+    ports:
+      - "8003:8000"
+    profiles:
+      - gpu
+
+  ocr-worker-4:
+    <<: *ocr-gpu-common
+    container_name: paddle-ocr-worker-4
+    ports:
+      - "8004:8000"
+    profiles:
+      - gpu
+
+  ocr-worker-5:
+    <<: *ocr-gpu-common
+    container_name: paddle-ocr-worker-5
+    ports:
+      - "8005:8000"
+    profiles:
+      - gpu
+
+  # CPU Workers (cpu profile) - for systems without GPU
+  ocr-cpu-worker-1:
+    <<: *ocr-cpu-common
+    container_name: paddle-ocr-cpu-worker-1
+    ports:
+      - "8001:8000"
+    profiles:
+      - cpu
+
+  ocr-cpu-worker-2:
+    <<: *ocr-cpu-common
+    container_name: paddle-ocr-cpu-worker-2
+    ports:
+      - "8002:8000"
+    profiles:
+      - cpu
+
+  ocr-cpu-worker-3:
+    <<: *ocr-cpu-common
+    container_name: paddle-ocr-cpu-worker-3
+    ports:
+      - "8003:8000"
+    profiles:
+      - cpu
+
+  ocr-cpu-worker-4:
+    <<: *ocr-cpu-common
+    container_name: paddle-ocr-cpu-worker-4
+    ports:
+      - "8004:8000"
+    profiles:
+      - cpu
+
+  ocr-cpu-worker-5:
+    <<: *ocr-cpu-common
+    container_name: paddle-ocr-cpu-worker-5
+    ports:
+      - "8005:8000"
+    profiles:
+      - cpu
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
--- a/src/paddle_ocr/docker-compose.yml
+++ b/src/paddle_ocr/docker-compose.yml
@@ -0,0 +1,111 @@
+# docker-compose.yml - PaddleOCR REST API
+# Usage:
+#   CPU:   docker compose up ocr-cpu
+#   GPU:   docker compose up ocr-gpu
+#   Test:  docker compose run --rm test
+#   Build: CUDA_ARCH=120 docker compose --profile build run --rm build-paddle
+#
+# Auto-detect CUDA arch before building:
+#   export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.')
+#   docker compose --profile build run --rm build-paddle
+
+services:
+  # PaddlePaddle GPU wheel builder (ARM64 only, one-time build)
+  # Creates ./wheels/paddlepaddle_gpu-*.whl for ARM64 GPU support
+  # CUDA_ARCH env var controls target GPU architecture (default: 120 for Blackwell base)
+  build-paddle:
+    build:
+      context: .
+      dockerfile: Dockerfile.build-paddle
+      args:
+        CUDA_ARCH: ${CUDA_ARCH:-120}
+    volumes:
+      - ./wheels:/wheels
+    profiles:
+      - build
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+  # CPU-only service (works on any architecture)
+  ocr-cpu:
+    build:
+      context: .
+      dockerfile: Dockerfile.cpu
+      args:
+        # Models to bake into image (change before building):
+        DET_MODEL: PP-OCRv5_server_det
+        REC_MODEL: PP-OCRv5_server_rec
+    image: paddle-ocr-api:cpu
+    container_name: paddle-ocr-cpu
+    ports:
+      - "8000:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw          # Your dataset
+      - paddlex-cache:/root/.paddlex        # For additional models at runtime
+    environment:
+      - PYTHONUNBUFFERED=1
+      # Override models at runtime (uncomment to use different models):
+      # - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
+      # - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+
+  # GPU service (requires NVIDIA Container Toolkit)
+  ocr-gpu:
+    build:
+      context: .
+      dockerfile: Dockerfile.gpu
+      args:
+        DET_MODEL: PP-OCRv5_server_det
+        REC_MODEL: PP-OCRv5_server_rec
+    image: paddle-ocr-api:gpu
+    container_name: paddle-ocr-gpu
+    ports:
+      - "8000:8000"
+    volumes:
+      - ../dataset:/app/dataset:ro
+      - ../debugset:/app/debugset:rw
+      - paddlex-cache:/root/.paddlex
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_VISIBLE_DEVICES=0
+      # Override models at runtime:
+      # - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
+      # - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+
+  # Test client (runs once and exits)
+  test:
+    image: python:3.11-slim
+    container_name: paddle-ocr-test
+    depends_on:
+      ocr-cpu:
+        condition: service_healthy
+    volumes:
+      - ./test.py:/app/test.py:ro
+    working_dir: /app
+    command: >
+      sh -c "pip install -q requests && python test.py --url http://ocr-cpu:8000 --dataset /app/dataset"
+    network_mode: "service:ocr-cpu"
+
+volumes:
+  paddlex-cache:
+    name: paddlex-model-cache
--- a/src/paddle_ocr/paddle_ocr_tuning_rest.py
+++ b/src/paddle_ocr/paddle_ocr_tuning_rest.py
@@ -0,0 +1,340 @@
+# paddle_ocr_tuning_rest.py
+# FastAPI REST service for PaddleOCR hyperparameter evaluation
+# Usage: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
+
+import os
+import re
+import time
+import threading
+from typing import Optional
+from contextlib import asynccontextmanager
+
+import numpy as np
+import paddle
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+from paddleocr import PaddleOCR
+from jiwer import wer, cer
+from dataset_manager import ImageTextDataset
+
+
+def get_gpu_info() -> dict:
+    """Get GPU status information from PaddlePaddle."""
+    info = {
+        "cuda_available": paddle.device.is_compiled_with_cuda(),
+        "device": str(paddle.device.get_device()),
+        "gpu_count": 0,
+        "gpu_name": None,
+        "gpu_memory_total": None,
+        "gpu_memory_used": None,
+    }
+
+    if info["cuda_available"]:
+        try:
+            info["gpu_count"] = paddle.device.cuda.device_count()
+            if info["gpu_count"] > 0:
+                # Get GPU properties
+                props = paddle.device.cuda.get_device_properties(0)
+                info["gpu_name"] = props.name
+                info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
+
+                # Get current memory usage
+                mem_reserved = paddle.device.cuda.memory_reserved(0)
+                mem_allocated = paddle.device.cuda.memory_allocated(0)
+                info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
+                info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
+        except Exception as e:
+            info["gpu_error"] = str(e)
+
+    return info
+
+
+# Model configuration via environment variables (with defaults)
+DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
+DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
+
+
+# Global state for model and dataset
+class AppState:
+    ocr: Optional[PaddleOCR] = None
+    dataset: Optional[ImageTextDataset] = None
+    dataset_path: Optional[str] = None
+    det_model: str = DEFAULT_DET_MODEL
+    rec_model: str = DEFAULT_REC_MODEL
+    lock: threading.Lock = None  # Protects OCR model from concurrent access
+
+    def __init__(self):
+        self.lock = threading.Lock()
+
+
+state = AppState()
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Load OCR model at startup."""
+    # Log GPU status
+    gpu_info = get_gpu_info()
+    print("=" * 50)
+    print("GPU STATUS")
+    print("=" * 50)
+    print(f"  CUDA available: {gpu_info['cuda_available']}")
+    print(f"  Device: {gpu_info['device']}")
+    if gpu_info['cuda_available']:
+        print(f"  GPU count: {gpu_info['gpu_count']}")
+        print(f"  GPU name: {gpu_info['gpu_name']}")
+        print(f"  GPU memory total: {gpu_info['gpu_memory_total']}")
+    print("=" * 50)
+
+    print(f"Loading PaddleOCR models...")
+    print(f"  Detection: {state.det_model}")
+    print(f"  Recognition: {state.rec_model}")
+    state.ocr = PaddleOCR(
+        text_detection_model_name=state.det_model,
+        text_recognition_model_name=state.rec_model,
+    )
+
+    # Log GPU memory after model load
+    if gpu_info['cuda_available']:
+        gpu_after = get_gpu_info()
+        print(f"  GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
+
+    print("Model loaded successfully!")
+    yield
+    # Cleanup on shutdown
+    state.ocr = None
+    state.dataset = None
+
+
+app = FastAPI(
+    title="PaddleOCR Tuning API",
+    description="REST API for OCR hyperparameter evaluation",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
+class EvaluateRequest(BaseModel):
+    """Request schema matching CLI arguments."""
+    pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
+    use_doc_orientation_classify: bool = Field(False, description="Use document orientation classification")
+    use_doc_unwarping: bool = Field(False, description="Use document unwarping")
+    textline_orientation: bool = Field(True, description="Use textline orientation classification")
+    text_det_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection pixel threshold")
+    text_det_box_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection box threshold")
+    text_det_unclip_ratio: float = Field(1.5, ge=0.0, description="Text detection expansion coefficient")
+    text_rec_score_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Recognition score threshold")
+    start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
+    end_page: int = Field(10, ge=1, description="End page index (exclusive)")
+    save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
+
+
+class EvaluateResponse(BaseModel):
+    """Response schema matching CLI output."""
+    CER: float
+    WER: float
+    TIME: float
+    PAGES: int
+    TIME_PER_PAGE: float
+
+
+class HealthResponse(BaseModel):
+    status: str
+    model_loaded: bool
+    dataset_loaded: bool
+    dataset_size: Optional[int] = None
+    det_model: Optional[str] = None
+    rec_model: Optional[str] = None
+    # GPU info
+    cuda_available: Optional[bool] = None
+    device: Optional[str] = None
+    gpu_name: Optional[str] = None
+    gpu_memory_used: Optional[str] = None
+    gpu_memory_total: Optional[str] = None
+
+
+def _normalize_box_xyxy(box):
+    """Normalize bounding box to (x0, y0, x1, y1) format."""
+    if isinstance(box, (list, tuple)) and box and isinstance(box[0], (list, tuple)):
+        xs = [p[0] for p in box]
+        ys = [p[1] for p in box]
+        return min(xs), min(ys), max(xs), max(ys)
+
+    if isinstance(box, (list, tuple)):
+        if len(box) == 4:
+            x0, y0, x1, y1 = box
+            return min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1)
+        if len(box) == 8:
+            xs = box[0::2]
+            ys = box[1::2]
+            return min(xs), min(ys), max(xs), max(ys)
+
+    raise ValueError(f"Unrecognized box format: {box!r}")
+
+
+def assemble_from_paddle_result(paddleocr_predict, min_score=0.0, line_tol_factor=0.6):
+    """
+    Robust line grouping for PaddleOCR outputs.
+    Normalizes boxes, groups by line, and returns assembled text.
+    """
+    boxes_all = []
+    for item in paddleocr_predict:
+        res = item.json.get("res", {})
+        boxes = res.get("rec_boxes", []) or []
+        texts = res.get("rec_texts", []) or []
+        scores = res.get("rec_scores", None)
+
+        for i, (box, text) in enumerate(zip(boxes, texts)):
+            try:
+                x0, y0, x1, y1 = _normalize_box_xyxy(box)
+            except Exception:
+                continue
+
+            y_mid = 0.5 * (y0 + y1)
+            score = float(scores[i]) if (scores is not None and i < len(scores)) else 1.0
+
+            t = re.sub(r"\s+", " ", str(text)).strip()
+            if not t:
+                continue
+
+            boxes_all.append((x0, y0, x1, y1, y_mid, t, score))
+
+    if min_score > 0:
+        boxes_all = [b for b in boxes_all if b[6] >= min_score]
+
+    if not boxes_all:
+        return ""
+
+    # Adaptive line tolerance
+    heights = [b[3] - b[1] for b in boxes_all]
+    median_h = float(np.median(heights)) if heights else 20.0
+    line_tol = max(8.0, line_tol_factor * median_h)
+
+    # Sort by vertical mid, then x0
+    boxes_all.sort(key=lambda b: (b[4], b[0]))
+
+    # Group into lines
+    lines, cur, last_y = [], [], None
+    for x0, y0, x1, y1, y_mid, text, score in boxes_all:
+        if last_y is None or abs(y_mid - last_y) <= line_tol:
+            cur.append((x0, text))
+        else:
+            cur.sort(key=lambda t: t[0])
+            lines.append(" ".join(t[1] for t in cur))
+            cur = [(x0, text)]
+        last_y = y_mid
+
+    if cur:
+        cur.sort(key=lambda t: t[0])
+        lines.append(" ".join(t[1] for t in cur))
+
+    res = "\n".join(lines)
+    res = re.sub(r"\s+\n", "\n", res).strip()
+    return res
+
+
+def evaluate_text(reference: str, prediction: str) -> dict:
+    """Calculate WER and CER metrics."""
+    return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
+
+
+@app.get("/health", response_model=HealthResponse)
+def health_check():
+    """Check if the service is ready."""
+    gpu_info = get_gpu_info()
+    return HealthResponse(
+        status="ok" if state.ocr is not None else "initializing",
+        model_loaded=state.ocr is not None,
+        dataset_loaded=state.dataset is not None,
+        dataset_size=len(state.dataset) if state.dataset else None,
+        det_model=state.det_model,
+        rec_model=state.rec_model,
+        cuda_available=gpu_info.get("cuda_available"),
+        device=gpu_info.get("device"),
+        gpu_name=gpu_info.get("gpu_name"),
+        gpu_memory_used=gpu_info.get("gpu_memory_used"),
+        gpu_memory_total=gpu_info.get("gpu_memory_total"),
+    )
+
+
+@app.post("/evaluate", response_model=EvaluateResponse)
+def evaluate(request: EvaluateRequest):
+    """
+    Evaluate OCR with given hyperparameters.
+    Returns CER, WER, and timing metrics.
+    """
+    if state.ocr is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+
+    # Load or reload dataset if path changed
+    if state.dataset is None or state.dataset_path != request.pdf_folder:
+        if not os.path.isdir(request.pdf_folder):
+            raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
+        state.dataset = ImageTextDataset(request.pdf_folder)
+        state.dataset_path = request.pdf_folder
+
+    if len(state.dataset) == 0:
+        raise HTTPException(status_code=400, detail="Dataset is empty")
+
+    # Validate page range
+    start = request.start_page
+    end = min(request.end_page, len(state.dataset))
+    if start >= end:
+        raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
+
+    cer_list, wer_list = [], []
+    time_per_page_list = []
+    t0 = time.time()
+
+    # Lock to prevent concurrent OCR access (model is not thread-safe)
+    with state.lock:
+        for idx in range(start, end):
+            img, ref = state.dataset[idx]
+            arr = np.array(img)
+
+            tp0 = time.time()
+            out = state.ocr.predict(
+                arr,
+                use_doc_orientation_classify=request.use_doc_orientation_classify,
+                use_doc_unwarping=request.use_doc_unwarping,
+                use_textline_orientation=request.textline_orientation,
+                text_det_thresh=request.text_det_thresh,
+                text_det_box_thresh=request.text_det_box_thresh,
+                text_det_unclip_ratio=request.text_det_unclip_ratio,
+                text_rec_score_thresh=request.text_rec_score_thresh,
+            )
+
+            pred = assemble_from_paddle_result(out)
+            time_per_page_list.append(float(time.time() - tp0))
+
+            # Save prediction to debugset if requested
+            if request.save_output:
+                out_path = state.dataset.get_output_path(idx, "paddle_text")
+                with open(out_path, "w", encoding="utf-8") as f:
+                    f.write(pred)
+
+            m = evaluate_text(ref, pred)
+            cer_list.append(m["CER"])
+            wer_list.append(m["WER"])
+
+    return EvaluateResponse(
+        CER=float(np.mean(cer_list)) if cer_list else 1.0,
+        WER=float(np.mean(wer_list)) if wer_list else 1.0,
+        TIME=float(time.time() - t0),
+        PAGES=len(cer_list),
+        TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
+    )
+
+
+@app.post("/evaluate_full", response_model=EvaluateResponse)
+def evaluate_full(request: EvaluateRequest):
+    """Evaluate on ALL pages (ignores start_page/end_page)."""
+    request.start_page = 0
+    request.end_page = 9999  # Will be clamped to dataset size
+    return evaluate(request)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/src/paddle_ocr/requirements-gpu.txt
+++ b/src/paddle_ocr/requirements-gpu.txt
@@ -0,0 +1,22 @@
+# PaddleOCR REST API - GPU Requirements
+# Install: pip install -r requirements-gpu.txt
+
+# PaddlePaddle (GPU version with CUDA)
+paddlepaddle-gpu==3.2.0
+
+# PaddleOCR
+paddleocr==3.3.2
+
+# OCR evaluation metrics
+jiwer
+
+# Numerical computing
+numpy
+
+# REST API framework
+fastapi
+uvicorn[standard]
+pydantic
+
+# Image processing
+Pillow
--- a/src/paddle_ocr/requirements.txt
+++ b/src/paddle_ocr/requirements.txt
@@ -0,0 +1,22 @@
+# PaddleOCR REST API - CPU Requirements
+# Install: pip install -r requirements.txt
+
+# PaddlePaddle (CPU version)
+paddlepaddle==3.2.2
+
+# PaddleOCR
+paddleocr==3.3.2
+
+# OCR evaluation metrics
+jiwer
+
+# Numerical computing
+numpy
+
+# REST API framework
+fastapi
+uvicorn[standard]
+pydantic
+
+# Image processing (pulled by paddleocr, but explicit)
+Pillow
--- a/src/paddle_ocr/scripts/debug_gpu_detection.py
+++ b/src/paddle_ocr/scripts/debug_gpu_detection.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""
+Debug script for GPU OCR detection issues.
+
+This script tests the raw inference output from PaddlePaddle detection models
+to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
+
+Usage:
+    docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
+
+Expected behavior:
+    - Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
+    - Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
+"""
+
+import os
+import sys
+
+os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
+
+import numpy as np
+import paddle
+from PIL import Image
+
+
+def check_gpu_status():
+    """Check GPU availability and properties."""
+    print("=" * 60)
+    print("GPU STATUS")
+    print("=" * 60)
+    print(f"Device: {paddle.device.get_device()}")
+    print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
+
+    if paddle.device.is_compiled_with_cuda():
+        print(f"GPU count: {paddle.device.cuda.device_count()}")
+        if paddle.device.cuda.device_count() > 0:
+            props = paddle.device.cuda.get_device_properties(0)
+            print(f"GPU name: {props.name}")
+            print(f"Compute capability: {props.major}.{props.minor}")
+            print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
+    print()
+
+
+def test_basic_ops():
+    """Test basic GPU tensor operations."""
+    print("=" * 60)
+    print("BASIC GPU OPERATIONS")
+    print("=" * 60)
+
+    # Test tensor creation
+    x = paddle.randn([2, 3])
+    print(f"Tensor place: {x.place}")
+
+    # Test conv2d
+    x = paddle.randn([1, 3, 64, 64])
+    conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
+    y = conv(x)
+    print(f"Conv2d output shape: {y.shape}, place: {y.place}")
+
+    # Test softmax
+    s = paddle.nn.functional.softmax(y, axis=1)
+    print(f"Softmax output shape: {s.shape}")
+    print("Basic operations: OK")
+    print()
+
+
+def test_detection_model(image_path: str):
+    """Test detection model raw output."""
+    print("=" * 60)
+    print("DETECTION MODEL TEST")
+    print("=" * 60)
+
+    from paddle.inference import Config, create_predictor
+
+    model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
+    inference_file = f'{model_dir}/inference.json'
+    params_file = f'{model_dir}/inference.pdiparams'
+
+    if not os.path.exists(inference_file):
+        print(f"Model not found at {model_dir}")
+        print("Run PaddleOCR once to download models first.")
+        return
+
+    # Create config
+    config = Config()
+    config.set_prog_file(inference_file)
+    config.set_params_file(params_file)
+    config.enable_use_gpu(1024, 0)
+
+    print("Creating predictor...")
+    predictor = create_predictor(config)
+
+    # Get input/output names
+    input_names = predictor.get_input_names()
+    output_names = predictor.get_output_names()
+    print(f"Input names: {input_names}")
+    print(f"Output names: {output_names}")
+
+    # Load and preprocess image
+    img = Image.open(image_path)
+    img = img.resize((640, 640))
+    arr = np.array(img).astype('float32')
+    arr = arr / 255.0
+    arr = arr.transpose(2, 0, 1)[np.newaxis, ...]  # NCHW
+    print(f"Input tensor shape: {arr.shape}")
+
+    # Set input
+    input_handle = predictor.get_input_handle(input_names[0])
+    input_handle.reshape(arr.shape)
+    input_handle.copy_from_cpu(arr)
+
+    # Run prediction
+    print("Running inference...")
+    predictor.run()
+
+    # Get output
+    output_handle = predictor.get_output_handle(output_names[0])
+    output = output_handle.copy_to_cpu()
+
+    print()
+    print("OUTPUT ANALYSIS:")
+    print(f"  Shape: {output.shape}")
+    print(f"  Min: {output.min():.6f}")
+    print(f"  Max: {output.max():.6f}")
+    print(f"  Mean: {output.mean():.6f}")
+    print(f"  Std: {output.std():.6f}")
+    print(f"  Has NaN: {np.isnan(output).any()}")
+    print(f"  Has Inf: {np.isinf(output).any()}")
+
+    # Diagnosis
+    print()
+    print("DIAGNOSIS:")
+    if output.min() == output.max():
+        print("  PROBLEM: Output is constant - model inference is broken!")
+        print("  This typically indicates GPU compute capability mismatch.")
+        print("  GB10 (sm_121) may need CUDA 13.0+ for native support.")
+    elif output.max() < 0.01:
+        print("  PROBLEM: Output values too low - detection will find nothing.")
+    elif np.isnan(output).any() or np.isinf(output).any():
+        print("  PROBLEM: Output contains NaN/Inf - numerical instability.")
+    else:
+        print("  OK: Output values look reasonable.")
+        print(f"  Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
+
+
+def test_paddleocr_output(image_path: str):
+    """Test full PaddleOCR pipeline."""
+    print()
+    print("=" * 60)
+    print("PADDLEOCR PIPELINE TEST")
+    print("=" * 60)
+
+    from paddleocr import PaddleOCR
+
+    ocr = PaddleOCR(
+        text_detection_model_name='PP-OCRv4_mobile_det',
+        text_recognition_model_name='PP-OCRv4_mobile_rec',
+    )
+
+    img = Image.open(image_path)
+    arr = np.array(img)
+
+    out = ocr.predict(arr)
+    res = out[0].json['res']
+
+    dt_polys = res.get('dt_polys', [])
+    rec_texts = res.get('rec_texts', [])
+
+    print(f"Detection polygons: {len(dt_polys)}")
+    print(f"Recognition texts: {len(rec_texts)}")
+
+    if rec_texts:
+        print(f"Sample texts: {rec_texts[:5]}")
+    else:
+        print("No text detected!")
+
+
+def main():
+    # Default test image
+    image_path = '/app/dataset/0/img/page_0001.png'
+    if len(sys.argv) > 1:
+        image_path = sys.argv[1]
+
+    if not os.path.exists(image_path):
+        print(f"Image not found: {image_path}")
+        print("Usage: python debug_gpu_detection.py [image_path]")
+        sys.exit(1)
+
+    print(f"Testing with image: {image_path}")
+    print()
+
+    check_gpu_status()
+    test_basic_ops()
+    test_detection_model(image_path)
+    test_paddleocr_output(image_path)
+
+
+if __name__ == '__main__':
+    main()
--- a/src/paddle_ocr/scripts/test_dynamic_mode.py
+++ b/src/paddle_ocr/scripts/test_dynamic_mode.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+Test PaddleOCR in dynamic graph mode (not inference mode).
+
+Dynamic mode compiles kernels at runtime, which may work on Blackwell.
+Inference mode uses pre-compiled kernels which fail on sm_121.
+
+Usage:
+    python test_dynamic_mode.py [image_path]
+"""
+
+import os
+import sys
+
+os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
+# Force dynamic graph mode
+os.environ['FLAGS_enable_pir_api'] = '0'
+
+import numpy as np
+import paddle
+from PIL import Image
+
+
+def check_gpu():
+    """Check GPU status."""
+    print("=" * 60)
+    print("GPU STATUS")
+    print("=" * 60)
+    print(f"Device: {paddle.device.get_device()}")
+    print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
+
+    if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:
+        props = paddle.device.cuda.get_device_properties(0)
+        print(f"GPU: {props.name} (sm_{props.major}{props.minor})")
+        print(f"Memory: {props.total_memory / (1024**3):.1f} GB")
+    print()
+
+
+def test_paddleocr_dynamic(image_path: str):
+    """Test PaddleOCR with dynamic execution."""
+    print("=" * 60)
+    print("PADDLEOCR DYNAMIC MODE TEST")
+    print("=" * 60)
+
+    # Import PaddleOCR
+    from paddleocr import PaddleOCR
+
+    # Try to force dynamic mode by setting use_static=False if available
+    # or by using the model in eval mode directly
+
+    print("Creating PaddleOCR instance...")
+    print("(This may download models on first run)")
+
+    try:
+        # Create OCR instance - this might still use inference internally
+        ocr = PaddleOCR(
+            text_detection_model_name='PP-OCRv4_mobile_det',
+            text_recognition_model_name='PP-OCRv4_mobile_rec',
+            use_angle_cls=False,  # Simplify
+            lang='es',
+        )
+
+        # Load image
+        img = Image.open(image_path)
+        arr = np.array(img)
+        print(f"Image shape: {arr.shape}")
+
+        # Run prediction
+        print("Running OCR prediction...")
+        result = ocr.predict(arr)
+
+        # Parse results
+        res = result[0].json['res']
+        dt_polys = res.get('dt_polys', [])
+        rec_texts = res.get('rec_texts', [])
+
+        print()
+        print("RESULTS:")
+        print(f"  Detected boxes: {len(dt_polys)}")
+        print(f"  Recognized texts: {len(rec_texts)}")
+
+        if rec_texts:
+            print(f"  First 5 texts: {rec_texts[:5]}")
+            return True
+        else:
+            print("  WARNING: No text recognized!")
+            return False
+
+    except Exception as e:
+        print(f"ERROR: {e}")
+        return False
+
+
+def test_paddle_dynamic_model():
+    """Test loading a paddle model in dynamic graph mode."""
+    print()
+    print("=" * 60)
+    print("PADDLE DYNAMIC GRAPH TEST")
+    print("=" * 60)
+
+    # Ensure we're in dynamic mode
+    paddle.disable_static()
+
+    # Test a simple model forward pass
+    print("Testing dynamic graph execution...")
+
+    # Create a simple ResNet-like block
+    x = paddle.randn([1, 3, 224, 224])
+
+    # Conv -> BN -> ReLU
+    conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)
+    bn = paddle.nn.BatchNorm2D(64)
+
+    # Forward pass (dynamic mode - compiles at runtime)
+    y = conv(x)
+    y = bn(y)
+    y = paddle.nn.functional.relu(y)
+
+    print(f"Input shape: {x.shape}")
+    print(f"Output shape: {y.shape}")
+    print(f"Output min: {y.min().item():.4f}")
+    print(f"Output max: {y.max().item():.4f}")
+    print(f"Output mean: {y.mean().item():.4f}")
+
+    if y.min() != y.max():
+        print("Dynamic graph mode: WORKING")
+        return True
+    else:
+        print("Dynamic graph mode: BROKEN (constant output)")
+        return False
+
+
+def test_ppocr_model_direct():
+    """Try loading PPOCRv4 model directly in dynamic mode."""
+    print()
+    print("=" * 60)
+    print("PPOCR MODEL DIRECT LOAD TEST")
+    print("=" * 60)
+
+    try:
+        # Try to import ppocr modules directly
+        # This bypasses the inference predictor
+        from paddleocr.ppocr.modeling.architectures import build_model
+        from paddleocr.ppocr.postprocess import build_post_process
+        from paddleocr.ppocr.utils.save_load import load_model
+
+        print("Direct model import available")
+
+        # Note: This approach requires model config files
+        # which may or may not be bundled with paddleocr
+
+    except ImportError as e:
+        print(f"Direct model import not available: {e}")
+        print("PaddleOCR may only support inference mode")
+
+    return False
+
+
+def main():
+    # Default test image
+    image_path = '/app/dataset/0/img/page_0001.png'
+    if len(sys.argv) > 1:
+        image_path = sys.argv[1]
+
+    if not os.path.exists(image_path):
+        print(f"Image not found: {image_path}")
+        sys.exit(1)
+
+    print(f"Testing with image: {image_path}")
+    print()
+
+    check_gpu()
+
+    # Test 1: Basic dynamic graph
+    dynamic_works = test_paddle_dynamic_model()
+
+    if not dynamic_works:
+        print("\nDynamic graph mode is broken - GPU likely unsupported")
+        sys.exit(1)
+
+    # Test 2: Direct model load
+    test_ppocr_model_direct()
+
+    # Test 3: PaddleOCR pipeline
+    ocr_works = test_paddleocr_dynamic(image_path)
+
+    print()
+    print("=" * 60)
+    print("SUMMARY")
+    print("=" * 60)
+    print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")
+    print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")
+
+    if dynamic_works and not ocr_works:
+        print()
+        print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")
+        print("This means PaddleOCR internally uses inference predictor")
+        print("which has pre-compiled kernels without Blackwell support.")
+        print()
+        print("Potential solutions:")
+        print("1. Modify PaddleOCR to use dynamic mode")
+        print("2. Use ONNX export + ONNXRuntime")
+        print("3. Wait for PaddlePaddle Blackwell support")
+
+
+if __name__ == '__main__':
+    main()
--- a/src/paddle_ocr/scripts/upload-wheel.sh
+++ b/src/paddle_ocr/scripts/upload-wheel.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Upload PaddlePaddle ARM64 wheel to Gitea generic packages
+#
+# Usage:
+#   ./scripts/upload-wheel.sh [wheel_file] [token]
+#
+# Environment variables (alternative to arguments):
+#   GITEA_TOKEN - Gitea API token
+#   WHEEL_FILE  - Path to wheel file (default: auto-detect in wheels/)
+
+set -e
+
+GITEA_URL="https://seryus.ddns.net"
+GITEA_ORG="unir"
+PACKAGE_NAME="paddlepaddle-gpu-arm64"
+
+# Get wheel file
+WHEEL_FILE="${1:-${WHEEL_FILE:-$(ls wheels/paddlepaddle*.whl 2>/dev/null | head -1)}}"
+if [ -z "$WHEEL_FILE" ] || [ ! -f "$WHEEL_FILE" ]; then
+    echo "Error: No wheel file found"
+    echo "Usage: $0 [wheel_file] [token]"
+    echo "  or set WHEEL_FILE environment variable"
+    exit 1
+fi
+
+# Get token
+TOKEN="${2:-${GITEA_TOKEN}}"
+if [ -z "$TOKEN" ]; then
+    echo "Error: No token provided"
+    echo "Usage: $0 [wheel_file] [token]"
+    echo "  or set GITEA_TOKEN environment variable"
+    exit 1
+fi
+
+# Extract version from wheel filename
+# Format: paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
+FILENAME=$(basename "$WHEEL_FILE")
+VERSION=$(echo "$FILENAME" | sed -E 's/paddlepaddle[_-]gpu-([0-9.]+)-.*/\1/')
+
+if [ -z "$VERSION" ]; then
+    echo "Error: Could not extract version from filename: $FILENAME"
+    exit 1
+fi
+
+echo "Uploading wheel to Gitea packages..."
+echo "  File: $WHEEL_FILE"
+echo "  Package: $PACKAGE_NAME"
+echo "  Version: $VERSION"
+echo "  URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
+
+# Upload using PUT request
+HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
+    -X PUT \
+    -H "Authorization: token $TOKEN" \
+    -H "Content-Type: application/octet-stream" \
+    --data-binary "@$WHEEL_FILE" \
+    "$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME")
+
+if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
+    echo "Success! Wheel uploaded."
+    echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
+elif [ "$HTTP_CODE" = "409" ]; then
+    echo "Package version already exists (HTTP 409)"
+    echo "To update, delete the existing version first in Gitea UI"
+else
+    echo "Error: Upload failed with HTTP $HTTP_CODE"
+    cat /tmp/upload_response.txt
+    exit 1
+fi
--- a/src/paddle_ocr/test.py
+++ b/src/paddle_ocr/test.py
@@ -0,0 +1,114 @@
+# test.py - Simple client to test PaddleOCR REST API
+# Usage: python test.py [--url URL] [--dataset PATH]
+
+import argparse
+import requests
+import time
+import sys
+
+
+def wait_for_health(url: str, timeout: int = 120) -> bool:
+    """Wait for API to be ready."""
+    health_url = f"{url}/health"
+    start = time.time()
+
+    print(f"Waiting for API at {health_url}...")
+    while time.time() - start < timeout:
+        try:
+            resp = requests.get(health_url, timeout=5)
+            if resp.status_code == 200:
+                data = resp.json()
+                if data.get("model_loaded"):
+                    print(f"API ready! Model loaded in {time.time() - start:.1f}s")
+                    return True
+                print(f"  Model loading... ({time.time() - start:.0f}s)")
+        except requests.exceptions.ConnectionError:
+            print(f"  Connecting... ({time.time() - start:.0f}s)")
+        except Exception as e:
+            print(f"  Error: {e}")
+        time.sleep(2)
+
+    print("Timeout waiting for API")
+    return False
+
+
+def test_evaluate(url: str, config: dict) -> dict:
+    """Run evaluation with given config."""
+    eval_url = f"{url}/evaluate"
+
+    print(f"\nTesting config: {config}")
+    start = time.time()
+
+    resp = requests.post(eval_url, json=config, timeout=600)
+    resp.raise_for_status()
+
+    result = resp.json()
+    elapsed = time.time() - start
+
+    print(f"Results (took {elapsed:.1f}s):")
+    print(f"  CER: {result['CER']:.4f} ({result['CER']*100:.2f}%)")
+    print(f"  WER: {result['WER']:.4f} ({result['WER']*100:.2f}%)")
+    print(f"  Pages: {result['PAGES']}")
+    print(f"  Time/page: {result['TIME_PER_PAGE']:.2f}s")
+
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test PaddleOCR REST API")
+    parser.add_argument("--url", default="http://localhost:8001", help="API base URL")
+    parser.add_argument("--dataset", default="/app/dataset", help="Dataset path (inside container)")
+    parser.add_argument("--skip-health", action="store_true", help="Skip health check wait")
+    args = parser.parse_args()
+
+    # Wait for API to be ready
+    if not args.skip_health:
+        if not wait_for_health(args.url):
+            sys.exit(1)
+
+    # Test 1: Baseline config (default PaddleOCR)
+    print("\n" + "="*50)
+    print("TEST 1: Baseline Configuration")
+    print("="*50)
+    baseline = test_evaluate(args.url, {
+        "pdf_folder": args.dataset,
+        "use_doc_orientation_classify": False,
+        "use_doc_unwarping": False,
+        "textline_orientation": False,  # Baseline: disabled
+        "text_det_thresh": 0.0,
+        "text_det_box_thresh": 0.0,
+        "text_det_unclip_ratio": 1.5,
+        "text_rec_score_thresh": 0.0,
+        "start_page": 5,
+        "end_page": 10,
+    })
+
+    # Test 2: Optimized config (from Ray Tune results)
+    print("\n" + "="*50)
+    print("TEST 2: Optimized Configuration")
+    print("="*50)
+    optimized = test_evaluate(args.url, {
+        "pdf_folder": args.dataset,
+        "use_doc_orientation_classify": False,
+        "use_doc_unwarping": False,
+        "textline_orientation": True,  # KEY: enabled
+        "text_det_thresh": 0.4690,
+        "text_det_box_thresh": 0.5412,
+        "text_det_unclip_ratio": 0.0,
+        "text_rec_score_thresh": 0.6350,
+        "start_page": 5,
+        "end_page": 10,
+    })
+
+    # Summary
+    print("\n" + "="*50)
+    print("SUMMARY")
+    print("="*50)
+    cer_reduction = (1 - optimized["CER"] / baseline["CER"]) * 100 if baseline["CER"] > 0 else 0
+    print(f"Baseline CER:  {baseline['CER']*100:.2f}%")
+    print(f"Optimized CER: {optimized['CER']*100:.2f}%")
+    print(f"Improvement:   {cer_reduction:.1f}% reduction in errors")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/paddle_ocr/wheels/.gitkeep
+++ b/src/paddle_ocr/wheels/.gitkeep
--- a/src/paddle_ocr_raytune_rest.ipynb
+++ b/src/paddle_ocr_raytune_rest.ipynb
@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "header",
+   "metadata": {},
+   "source": [
+    "# PaddleOCR Hyperparameter Optimization via REST API\n",
+    "\n",
+    "Uses Ray Tune + Optuna to find optimal PaddleOCR parameters.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "```bash\n",
+    "cd src/paddle_ocr\n",
+    "docker compose -f docker-compose.workers.yml up  # GPU workers on 8001-8002\n",
+    "# or: docker compose -f docker-compose.workers.yml --profile cpu up\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "deps",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "setup",
+   "metadata": {},
+   "outputs": [],
+   "source": "from raytune_ocr import (\n    check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n    paddle_ocr_payload, PADDLE_OCR_SEARCH_SPACE, PADDLE_OCR_CONFIG_KEYS,\n)\n\n# Worker ports (3 workers to avoid OOM)\nPORTS = [8001, 8002, 8003]\n\n# Check workers are running\nhealthy = check_workers(PORTS, \"PaddleOCR\")"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "tune",
+   "metadata": {},
+   "outputs": [],
+   "source": "# Create trainable and run tuning\ntrainable = create_trainable(PORTS, paddle_ocr_payload)\n\nresults = run_tuner(\n    trainable=trainable,\n    search_space=PADDLE_OCR_SEARCH_SPACE,\n    num_samples=128,\n    num_workers=len(healthy),\n)"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "analysis",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Analyze results\n",
+    "df = analyze_results(\n",
+    "    results,\n",
+    "    prefix=\"raytune_paddle\",\n",
+    "    config_keys=PADDLE_OCR_CONFIG_KEYS,\n",
+    ")\n",
+    "\n",
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "correlation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Correlation analysis\n",
+    "correlation_analysis(df, PADDLE_OCR_CONFIG_KEYS)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/src/raytune/Dockerfile
+++ b/src/raytune/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application files
+COPY raytune_ocr.py .
+COPY run_tuning.py .
+
+# Create results directory
+RUN mkdir -p /app/results
+
+ENV PYTHONUNBUFFERED=1
+
+ENTRYPOINT ["python", "run_tuning.py"]
--- a/src/raytune/README.md
+++ b/src/raytune/README.md
@@ -0,0 +1,131 @@
+# Ray Tune OCR Hyperparameter Optimization
+
+Docker-based hyperparameter tuning for OCR services using Ray Tune with Optuna search.
+
+## Structure
+
+```
+raytune/
+├── Dockerfile          # Python 3.12-slim with Ray Tune + Optuna
+├── requirements.txt    # Dependencies
+├── raytune_ocr.py      # Shared utilities and search spaces
+├── run_tuning.py       # CLI entry point
+└── README.md
+```
+
+## Quick Start
+
+```bash
+cd src
+
+# Build the raytune image
+docker compose -f docker-compose.tuning.paddle.yml build raytune
+
+# Or pull from registry
+docker pull seryus.ddns.net/unir/raytune:latest
+```
+
+## Usage
+
+### PaddleOCR Tuning
+
+```bash
+# Start PaddleOCR service
+docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
+
+# Wait for health check, then run tuning
+docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
+
+# Stop when done
+docker compose -f docker-compose.tuning.paddle.yml down
+```
+
+### DocTR Tuning
+
+```bash
+docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
+docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
+docker compose -f docker-compose.tuning.doctr.yml down
+```
+
+### EasyOCR Tuning
+
+```bash
+# Note: EasyOCR uses port 8002 (same as PaddleOCR). Cannot run simultaneously.
+docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
+docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
+docker compose -f docker-compose.tuning.easyocr.yml down
+```
+
+## CLI Options
+
+```
+python run_tuning.py --service {paddle,doctr,easyocr} --samples N
+```
+
+| Option     | Description                          | Default |
+|------------|--------------------------------------|---------|
+| --service  | OCR service to tune (required)       | -       |
+| --samples  | Number of hyperparameter trials      | 64      |
+
+## Search Spaces
+
+### PaddleOCR
+- `use_doc_orientation_classify`: [True, False]
+- `use_doc_unwarping`: [True, False]
+- `textline_orientation`: [True, False]
+- `text_det_thresh`: uniform(0.0, 0.7)
+- `text_det_box_thresh`: uniform(0.0, 0.7)
+- `text_rec_score_thresh`: uniform(0.0, 0.7)
+
+### DocTR
+- `assume_straight_pages`: [True, False]
+- `straighten_pages`: [True, False]
+- `preserve_aspect_ratio`: [True, False]
+- `symmetric_pad`: [True, False]
+- `disable_page_orientation`: [True, False]
+- `disable_crop_orientation`: [True, False]
+- `resolve_lines`: [True, False]
+- `resolve_blocks`: [True, False]
+- `paragraph_break`: uniform(0.01, 0.1)
+
+### EasyOCR
+- `text_threshold`: uniform(0.3, 0.9)
+- `low_text`: uniform(0.2, 0.6)
+- `link_threshold`: uniform(0.2, 0.6)
+- `slope_ths`: uniform(0.0, 0.3)
+- `ycenter_ths`: uniform(0.3, 1.0)
+- `height_ths`: uniform(0.3, 1.0)
+- `width_ths`: uniform(0.3, 1.0)
+- `add_margin`: uniform(0.0, 0.3)
+- `contrast_ths`: uniform(0.05, 0.3)
+- `adjust_contrast`: uniform(0.3, 0.8)
+- `decoder`: ["greedy", "beamsearch"]
+- `beamWidth`: [3, 5, 7, 10]
+- `min_size`: [5, 10, 15, 20]
+
+## Output
+
+Results are saved to `src/results/` as CSV files:
+- `raytune_paddle_results_YYYYMMDD_HHMMSS.csv`
+- `raytune_doctr_results_YYYYMMDD_HHMMSS.csv`
+- `raytune_easyocr_results_YYYYMMDD_HHMMSS.csv`
+
+Each row contains:
+- Configuration parameters (prefixed with `config/`)
+- Metrics: CER, WER, TIME, PAGES, TIME_PER_PAGE
+- Worker URL used for the trial
+
+## Network Mode
+
+The raytune container uses `network_mode: host` to access OCR services on localhost ports:
+- PaddleOCR: port 8002
+- DocTR: port 8003
+- EasyOCR: port 8002 (conflicts with PaddleOCR)
+
+## Dependencies
+
+- ray[tune]==2.52.1
+- optuna==4.7.0
+- requests>=2.28.0
+- pandas>=2.0.0
--- a/src/raytune/raytune_ocr.py
+++ b/src/raytune/raytune_ocr.py
@@ -0,0 +1,371 @@
+# raytune_ocr.py
+# Shared Ray Tune utilities for OCR hyperparameter optimization
+#
+# Usage:
+#   from raytune_ocr import check_workers, create_trainable, run_tuner, analyze_results
+#
+# Environment variables:
+#   OCR_HOST: Host for OCR services (default: localhost)
+
+import os
+from datetime import datetime
+from typing import List, Dict, Any, Callable
+
+import requests
+import pandas as pd
+
+import ray
+from ray import tune
+from ray.tune.search.optuna import OptunaSearch
+
+
+def check_workers(
+    ports: List[int],
+    service_name: str = "OCR",
+    timeout: int = 180,
+    interval: int = 5,
+) -> List[str]:
+    """
+    Wait for workers to be fully ready (model + dataset loaded) and return healthy URLs.
+
+    Args:
+        ports: List of port numbers to check
+        service_name: Name for error messages
+        timeout: Max seconds to wait for each worker
+        interval: Seconds between retries
+
+    Returns:
+        List of healthy worker URLs
+
+    Raises:
+        RuntimeError if no healthy workers found after timeout
+    """
+    import time
+
+    host = os.environ.get("OCR_HOST", "localhost")
+    worker_urls = [f"http://{host}:{port}" for port in ports]
+    healthy_workers = []
+
+    for url in worker_urls:
+        print(f"Waiting for {url}...")
+        start = time.time()
+
+        while time.time() - start < timeout:
+            try:
+                health = requests.get(f"{url}/health", timeout=10).json()
+                model_ok = health.get('model_loaded', False)
+                dataset_ok = health.get('dataset_loaded', False)
+
+                if health.get('status') == 'ok' and model_ok:
+                    gpu = health.get('gpu_name', 'CPU')
+                    print(f"✓ {url}: ready ({gpu})")
+                    healthy_workers.append(url)
+                    break
+
+                elapsed = int(time.time() - start)
+                print(f"  [{elapsed}s] model={model_ok}")
+            except requests.exceptions.RequestException:
+                elapsed = int(time.time() - start)
+                print(f"  [{elapsed}s] not reachable")
+
+            time.sleep(interval)
+        else:
+            print(f"✗ {url}: timeout after {timeout}s")
+
+    if not healthy_workers:
+        raise RuntimeError(
+            f"No healthy {service_name} workers found.\n"
+            f"Checked ports: {ports}"
+        )
+
+    print(f"\n{len(healthy_workers)}/{len(worker_urls)} workers ready\n")
+    return healthy_workers
+
+
+def create_trainable(ports: List[int], payload_fn: Callable[[Dict], Dict]) -> Callable:
+    """
+    Factory to create a trainable function for Ray Tune.
+
+    Args:
+        ports: List of worker ports for load balancing
+        payload_fn: Function that takes config dict and returns API payload dict
+
+    Returns:
+        Trainable function for Ray Tune
+
+    Note:
+        Ray Tune 2.x API: tune.report(metrics_dict) - pass dict directly, NOT kwargs.
+        See: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.report.html
+    """
+    def trainable(config):
+        import os
+        import random
+        import requests
+        from ray.tune import report  # Ray 2.x: report(dict), not report(**kwargs)
+
+        host = os.environ.get("OCR_HOST", "localhost")
+        api_url = f"http://{host}:{random.choice(ports)}"
+        payload = payload_fn(config)
+
+        try:
+            response = requests.post(f"{api_url}/evaluate", json=payload, timeout=None)
+            response.raise_for_status()
+            metrics = response.json()
+            metrics["worker"] = api_url
+            report(metrics)  # Ray 2.x API: pass dict directly
+        except Exception as e:
+            report({  # Ray 2.x API: pass dict directly
+                "CER": 1.0,
+                "WER": 1.0,
+                "TIME": 0.0,
+                "PAGES": 0,
+                "TIME_PER_PAGE": 0,
+                "worker": api_url,
+                "ERROR": str(e)[:500]
+            })
+
+    return trainable
+
+
+def run_tuner(
+    trainable: Callable,
+    search_space: Dict[str, Any],
+    num_samples: int = 64,
+    num_workers: int = 1,
+    metric: str = "CER",
+    mode: str = "min",
+) -> tune.ResultGrid:
+    """
+    Initialize Ray and run hyperparameter tuning.
+
+    Args:
+        trainable: Trainable function from create_trainable()
+        search_space: Dict of parameter names to tune.* search spaces
+        num_samples: Number of trials to run
+        num_workers: Max concurrent trials
+        metric: Metric to optimize
+        mode: "min" or "max"
+
+    Returns:
+        Ray Tune ResultGrid
+    """
+    ray.init(
+        ignore_reinit_error=True,
+        include_dashboard=False,
+        configure_logging=False,
+        _metrics_export_port=0,  # Disable metrics export to avoid connection warnings
+    )
+    print(f"Ray Tune ready (version: {ray.__version__})")
+
+    tuner = tune.Tuner(
+        trainable,
+        tune_config=tune.TuneConfig(
+            metric=metric,
+            mode=mode,
+            search_alg=OptunaSearch(),
+            num_samples=num_samples,
+            max_concurrent_trials=num_workers,
+        ),
+        param_space=search_space,
+    )
+
+    return tuner.fit()
+
+
+def analyze_results(
+    results: tune.ResultGrid,
+    output_folder: str = "results",
+    prefix: str = "raytune",
+    config_keys: List[str] = None,
+) -> pd.DataFrame:
+    """
+    Analyze and save tuning results.
+
+    Args:
+        results: Ray Tune ResultGrid
+        output_folder: Directory to save CSV
+        prefix: Filename prefix
+        config_keys: List of config keys to show in best result (without 'config/' prefix)
+
+    Returns:
+        Results DataFrame
+    """
+    os.makedirs(output_folder, exist_ok=True)
+    df = results.get_dataframe()
+
+    # Save to CSV
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{prefix}_results_{timestamp}.csv"
+    filepath = os.path.join(output_folder, filename)
+    df.to_csv(filepath, index=False)
+    print(f"Results saved: {filepath}")
+
+    # Best configuration
+    best = df.loc[df["CER"].idxmin()]
+    print(f"\nBest CER: {best['CER']:.6f}")
+    print(f"Best WER: {best['WER']:.6f}")
+
+    if config_keys:
+        print(f"\nOptimal Configuration:")
+        for key in config_keys:
+            col = f"config/{key}"
+            if col in best:
+                val = best[col]
+                if isinstance(val, float):
+                    print(f"  {key}: {val:.4f}")
+                else:
+                    print(f"  {key}: {val}")
+
+    return df
+
+
+def correlation_analysis(df: pd.DataFrame, param_keys: List[str]) -> None:
+    """
+    Print correlation of numeric parameters with CER/WER.
+
+    Args:
+        df: Results DataFrame
+        param_keys: List of config keys (without 'config/' prefix)
+    """
+    param_cols = [f"config/{k}" for k in param_keys if f"config/{k}" in df.columns]
+    numeric_cols = [c for c in param_cols if df[c].dtype in ['float64', 'int64']]
+
+    if not numeric_cols:
+        print("No numeric parameters for correlation analysis")
+        return
+
+    corr_cer = df[numeric_cols + ["CER"]].corr()["CER"].sort_values(ascending=False)
+    corr_wer = df[numeric_cols + ["WER"]].corr()["WER"].sort_values(ascending=False)
+
+    print("Correlation with CER:")
+    print(corr_cer)
+    print("\nCorrelation with WER:")
+    print(corr_wer)
+
+
+# =============================================================================
+# OCR-specific payload functions
+# =============================================================================
+
+def paddle_ocr_payload(config: Dict) -> Dict:
+    """Create payload for PaddleOCR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
+        "use_doc_unwarping": config.get("use_doc_unwarping", False),
+        "textline_orientation": config.get("textline_orientation", True),
+        "text_det_thresh": config.get("text_det_thresh", 0.0),
+        "text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
+        "text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
+        "text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+def doctr_payload(config: Dict) -> Dict:
+    """Create payload for DocTR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "assume_straight_pages": config.get("assume_straight_pages", True),
+        "straighten_pages": config.get("straighten_pages", False),
+        "preserve_aspect_ratio": config.get("preserve_aspect_ratio", True),
+        "symmetric_pad": config.get("symmetric_pad", True),
+        "disable_page_orientation": config.get("disable_page_orientation", False),
+        "disable_crop_orientation": config.get("disable_crop_orientation", False),
+        "resolve_lines": config.get("resolve_lines", True),
+        "resolve_blocks": config.get("resolve_blocks", False),
+        "paragraph_break": config.get("paragraph_break", 0.035),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+def easyocr_payload(config: Dict) -> Dict:
+    """Create payload for EasyOCR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "text_threshold": config.get("text_threshold", 0.7),
+        "low_text": config.get("low_text", 0.4),
+        "link_threshold": config.get("link_threshold", 0.4),
+        "slope_ths": config.get("slope_ths", 0.1),
+        "ycenter_ths": config.get("ycenter_ths", 0.5),
+        "height_ths": config.get("height_ths", 0.5),
+        "width_ths": config.get("width_ths", 0.5),
+        "add_margin": config.get("add_margin", 0.1),
+        "contrast_ths": config.get("contrast_ths", 0.1),
+        "adjust_contrast": config.get("adjust_contrast", 0.5),
+        "decoder": config.get("decoder", "greedy"),
+        "beamWidth": config.get("beamWidth", 5),
+        "min_size": config.get("min_size", 10),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+# =============================================================================
+# Search spaces
+# =============================================================================
+
+PADDLE_OCR_SEARCH_SPACE = {
+    "use_doc_orientation_classify": tune.choice([True, False]),
+    "use_doc_unwarping": tune.choice([True, False]),
+    "textline_orientation": tune.choice([True, False]),
+    "text_det_thresh": tune.uniform(0.0, 0.7),
+    "text_det_box_thresh": tune.uniform(0.0, 0.7),
+    "text_det_unclip_ratio": tune.choice([0.0]),
+    "text_rec_score_thresh": tune.uniform(0.0, 0.7),
+}
+
+DOCTR_SEARCH_SPACE = {
+    "assume_straight_pages": tune.choice([True, False]),
+    "straighten_pages": tune.choice([True, False]),
+    "preserve_aspect_ratio": tune.choice([True, False]),
+    "symmetric_pad": tune.choice([True, False]),
+    "disable_page_orientation": tune.choice([True, False]),
+    "disable_crop_orientation": tune.choice([True, False]),
+    "resolve_lines": tune.choice([True, False]),
+    "resolve_blocks": tune.choice([True, False]),
+    "paragraph_break": tune.uniform(0.01, 0.1),
+}
+
+EASYOCR_SEARCH_SPACE = {
+    "text_threshold": tune.uniform(0.3, 0.9),
+    "low_text": tune.uniform(0.2, 0.6),
+    "link_threshold": tune.uniform(0.2, 0.6),
+    "slope_ths": tune.uniform(0.0, 0.3),
+    "ycenter_ths": tune.uniform(0.3, 1.0),
+    "height_ths": tune.uniform(0.3, 1.0),
+    "width_ths": tune.uniform(0.3, 1.0),
+    "add_margin": tune.uniform(0.0, 0.3),
+    "contrast_ths": tune.uniform(0.05, 0.3),
+    "adjust_contrast": tune.uniform(0.3, 0.8),
+    "decoder": tune.choice(["greedy", "beamsearch"]),
+    "beamWidth": tune.choice([3, 5, 7, 10]),
+    "min_size": tune.choice([5, 10, 15, 20]),
+}
+
+
+# =============================================================================
+# Config keys for results display
+# =============================================================================
+
+PADDLE_OCR_CONFIG_KEYS = [
+    "use_doc_orientation_classify", "use_doc_unwarping", "textline_orientation",
+    "text_det_thresh", "text_det_box_thresh", "text_det_unclip_ratio", "text_rec_score_thresh",
+]
+
+DOCTR_CONFIG_KEYS = [
+    "assume_straight_pages", "straighten_pages", "preserve_aspect_ratio", "symmetric_pad",
+    "disable_page_orientation", "disable_crop_orientation", "resolve_lines", "resolve_blocks",
+    "paragraph_break",
+]
+
+EASYOCR_CONFIG_KEYS = [
+    "text_threshold", "low_text", "link_threshold", "slope_ths", "ycenter_ths",
+    "height_ths", "width_ths", "add_margin", "contrast_ths", "adjust_contrast",
+    "decoder", "beamWidth", "min_size",
+]
--- a/src/raytune/requirements.txt
+++ b/src/raytune/requirements.txt
@@ -0,0 +1,4 @@
+ray[tune]==2.52.1
+optuna==4.7.0
+requests>=2.28.0
+pandas>=2.0.0
--- a/src/raytune/run_tuning.py
+++ b/src/raytune/run_tuning.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""Run hyperparameter tuning for OCR services."""
+
+import os
+import sys
+import argparse
+from raytune_ocr import (
+    check_workers, create_trainable, run_tuner, analyze_results,
+    paddle_ocr_payload, doctr_payload, easyocr_payload,
+    PADDLE_OCR_SEARCH_SPACE, DOCTR_SEARCH_SPACE, EASYOCR_SEARCH_SPACE,
+    PADDLE_OCR_CONFIG_KEYS, DOCTR_CONFIG_KEYS, EASYOCR_CONFIG_KEYS,
+)
+
+SERVICES = {
+    "paddle": {
+        "payload_fn": paddle_ocr_payload,
+        "search_space": PADDLE_OCR_SEARCH_SPACE,
+        "config_keys": PADDLE_OCR_CONFIG_KEYS,
+        "name": "PaddleOCR",
+    },
+    "doctr": {
+        "payload_fn": doctr_payload,
+        "search_space": DOCTR_SEARCH_SPACE,
+        "config_keys": DOCTR_CONFIG_KEYS,
+        "name": "DocTR",
+    },
+    "easyocr": {
+        "payload_fn": easyocr_payload,
+        "search_space": EASYOCR_SEARCH_SPACE,
+        "config_keys": EASYOCR_CONFIG_KEYS,
+        "name": "EasyOCR",
+    },
+}
+
+def main():
+    parser = argparse.ArgumentParser(description="Run OCR hyperparameter tuning")
+    parser.add_argument("--service", choices=["paddle", "doctr", "easyocr"], required=True)
+    parser.add_argument("--host", type=str, default="localhost", help="OCR service host")
+    parser.add_argument("--port", type=int, default=8000, help="OCR service port")
+    parser.add_argument("--samples", type=int, default=64, help="Number of samples")
+    args = parser.parse_args()
+
+    # Set environment variable for raytune_ocr module
+    os.environ["OCR_HOST"] = args.host
+
+    cfg = SERVICES[args.service]
+    ports = [args.port]
+
+    print(f"\n{'='*50}")
+    print(f"Hyperparameter Tuning: {cfg['name']}")
+    print(f"Host: {args.host}:{args.port}")
+    print(f"Samples: {args.samples}")
+    print(f"{'='*50}\n")
+
+    # Check workers
+    healthy = check_workers(ports, cfg["name"])
+
+    # Create trainable and run tuning
+    trainable = create_trainable(ports, cfg["payload_fn"])
+    results = run_tuner(
+        trainable=trainable,
+        search_space=cfg["search_space"],
+        num_samples=args.samples,
+        num_workers=len(healthy),
+    )
+
+    # Analyze results
+    df = analyze_results(
+        results,
+        output_folder="results",
+        prefix=f"raytune_{args.service}",
+        config_keys=cfg["config_keys"],
+    )
+
+    print(f"\n{'='*50}")
+    print("Tuning complete!")
+    print(f"{'='*50}")
+
+if __name__ == "__main__":
+    main()
--- a/src/raytune_ocr.py
+++ b/src/raytune_ocr.py
@@ -0,0 +1,365 @@
+# raytune_ocr.py
+# Shared Ray Tune utilities for OCR hyperparameter optimization
+#
+# Usage:
+#   from raytune_ocr import check_workers, create_trainable, run_tuner, analyze_results
+
+import os
+from datetime import datetime
+from typing import List, Dict, Any, Callable
+
+import requests
+import pandas as pd
+
+import ray
+from ray import tune
+from ray.tune.search.optuna import OptunaSearch
+
+
+def check_workers(
+    ports: List[int],
+    service_name: str = "OCR",
+    timeout: int = 180,
+    interval: int = 5,
+) -> List[str]:
+    """
+    Wait for workers to be fully ready (model + dataset loaded) and return healthy URLs.
+
+    Args:
+        ports: List of port numbers to check
+        service_name: Name for error messages
+        timeout: Max seconds to wait for each worker
+        interval: Seconds between retries
+
+    Returns:
+        List of healthy worker URLs
+
+    Raises:
+        RuntimeError if no healthy workers found after timeout
+    """
+    import time
+
+    worker_urls = [f"http://localhost:{port}" for port in ports]
+    healthy_workers = []
+
+    for url in worker_urls:
+        print(f"Waiting for {url}...")
+        start = time.time()
+
+        while time.time() - start < timeout:
+            try:
+                health = requests.get(f"{url}/health", timeout=10).json()
+                model_ok = health.get('model_loaded', False)
+                dataset_ok = health.get('dataset_loaded', False)
+
+                if health.get('status') == 'ok' and model_ok:
+                    gpu = health.get('gpu_name', 'CPU')
+                    print(f"✓ {url}: ready ({gpu})")
+                    healthy_workers.append(url)
+                    break
+
+                elapsed = int(time.time() - start)
+                print(f"  [{elapsed}s] model={model_ok}")
+            except requests.exceptions.RequestException:
+                elapsed = int(time.time() - start)
+                print(f"  [{elapsed}s] not reachable")
+
+            time.sleep(interval)
+        else:
+            print(f"✗ {url}: timeout after {timeout}s")
+
+    if not healthy_workers:
+        raise RuntimeError(
+            f"No healthy {service_name} workers found.\n"
+            f"Checked ports: {ports}"
+        )
+
+    print(f"\n{len(healthy_workers)}/{len(worker_urls)} workers ready\n")
+    return healthy_workers
+
+
+def create_trainable(ports: List[int], payload_fn: Callable[[Dict], Dict]) -> Callable:
+    """
+    Factory to create a trainable function for Ray Tune.
+
+    Args:
+        ports: List of worker ports for load balancing
+        payload_fn: Function that takes config dict and returns API payload dict
+
+    Returns:
+        Trainable function for Ray Tune
+
+    Note:
+        Ray Tune 2.x API: tune.report(metrics_dict) - pass dict directly, NOT kwargs.
+        See: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.report.html
+    """
+    def trainable(config):
+        import random
+        import requests
+        from ray.tune import report  # Ray 2.x: report(dict), not report(**kwargs)
+
+        api_url = f"http://localhost:{random.choice(ports)}"
+        payload = payload_fn(config)
+
+        try:
+            response = requests.post(f"{api_url}/evaluate", json=payload, timeout=None)
+            response.raise_for_status()
+            metrics = response.json()
+            metrics["worker"] = api_url
+            report(metrics)  # Ray 2.x API: pass dict directly
+        except Exception as e:
+            report({  # Ray 2.x API: pass dict directly
+                "CER": 1.0,
+                "WER": 1.0,
+                "TIME": 0.0,
+                "PAGES": 0,
+                "TIME_PER_PAGE": 0,
+                "worker": api_url,
+                "ERROR": str(e)[:500]
+            })
+
+    return trainable
+
+
+def run_tuner(
+    trainable: Callable,
+    search_space: Dict[str, Any],
+    num_samples: int = 64,
+    num_workers: int = 1,
+    metric: str = "CER",
+    mode: str = "min",
+) -> tune.ResultGrid:
+    """
+    Initialize Ray and run hyperparameter tuning.
+
+    Args:
+        trainable: Trainable function from create_trainable()
+        search_space: Dict of parameter names to tune.* search spaces
+        num_samples: Number of trials to run
+        num_workers: Max concurrent trials
+        metric: Metric to optimize
+        mode: "min" or "max"
+
+    Returns:
+        Ray Tune ResultGrid
+    """
+    ray.init(
+        ignore_reinit_error=True,
+        include_dashboard=False,
+        configure_logging=False,
+        _metrics_export_port=0,  # Disable metrics export to avoid connection warnings
+    )
+    print(f"Ray Tune ready (version: {ray.__version__})")
+
+    tuner = tune.Tuner(
+        trainable,
+        tune_config=tune.TuneConfig(
+            metric=metric,
+            mode=mode,
+            search_alg=OptunaSearch(),
+            num_samples=num_samples,
+            max_concurrent_trials=num_workers,
+        ),
+        param_space=search_space,
+    )
+
+    return tuner.fit()
+
+
+def analyze_results(
+    results: tune.ResultGrid,
+    output_folder: str = "results",
+    prefix: str = "raytune",
+    config_keys: List[str] = None,
+) -> pd.DataFrame:
+    """
+    Analyze and save tuning results.
+
+    Args:
+        results: Ray Tune ResultGrid
+        output_folder: Directory to save CSV
+        prefix: Filename prefix
+        config_keys: List of config keys to show in best result (without 'config/' prefix)
+
+    Returns:
+        Results DataFrame
+    """
+    os.makedirs(output_folder, exist_ok=True)
+    df = results.get_dataframe()
+
+    # Save to CSV
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{prefix}_results_{timestamp}.csv"
+    filepath = os.path.join(output_folder, filename)
+    df.to_csv(filepath, index=False)
+    print(f"Results saved: {filepath}")
+
+    # Best configuration
+    best = df.loc[df["CER"].idxmin()]
+    print(f"\nBest CER: {best['CER']:.6f}")
+    print(f"Best WER: {best['WER']:.6f}")
+
+    if config_keys:
+        print(f"\nOptimal Configuration:")
+        for key in config_keys:
+            col = f"config/{key}"
+            if col in best:
+                val = best[col]
+                if isinstance(val, float):
+                    print(f"  {key}: {val:.4f}")
+                else:
+                    print(f"  {key}: {val}")
+
+    return df
+
+
+def correlation_analysis(df: pd.DataFrame, param_keys: List[str]) -> None:
+    """
+    Print correlation of numeric parameters with CER/WER.
+
+    Args:
+        df: Results DataFrame
+        param_keys: List of config keys (without 'config/' prefix)
+    """
+    param_cols = [f"config/{k}" for k in param_keys if f"config/{k}" in df.columns]
+    numeric_cols = [c for c in param_cols if df[c].dtype in ['float64', 'int64']]
+
+    if not numeric_cols:
+        print("No numeric parameters for correlation analysis")
+        return
+
+    corr_cer = df[numeric_cols + ["CER"]].corr()["CER"].sort_values(ascending=False)
+    corr_wer = df[numeric_cols + ["WER"]].corr()["WER"].sort_values(ascending=False)
+
+    print("Correlation with CER:")
+    print(corr_cer)
+    print("\nCorrelation with WER:")
+    print(corr_wer)
+
+
+# =============================================================================
+# OCR-specific payload functions
+# =============================================================================
+
+def paddle_ocr_payload(config: Dict) -> Dict:
+    """Create payload for PaddleOCR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
+        "use_doc_unwarping": config.get("use_doc_unwarping", False),
+        "textline_orientation": config.get("textline_orientation", True),
+        "text_det_thresh": config.get("text_det_thresh", 0.0),
+        "text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
+        "text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
+        "text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+def doctr_payload(config: Dict) -> Dict:
+    """Create payload for DocTR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "assume_straight_pages": config.get("assume_straight_pages", True),
+        "straighten_pages": config.get("straighten_pages", False),
+        "preserve_aspect_ratio": config.get("preserve_aspect_ratio", True),
+        "symmetric_pad": config.get("symmetric_pad", True),
+        "disable_page_orientation": config.get("disable_page_orientation", False),
+        "disable_crop_orientation": config.get("disable_crop_orientation", False),
+        "resolve_lines": config.get("resolve_lines", True),
+        "resolve_blocks": config.get("resolve_blocks", False),
+        "paragraph_break": config.get("paragraph_break", 0.035),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+def easyocr_payload(config: Dict) -> Dict:
+    """Create payload for EasyOCR API. Uses pages 5-10 (first doc) for tuning."""
+    return {
+        "pdf_folder": "/app/dataset",
+        "text_threshold": config.get("text_threshold", 0.7),
+        "low_text": config.get("low_text", 0.4),
+        "link_threshold": config.get("link_threshold", 0.4),
+        "slope_ths": config.get("slope_ths", 0.1),
+        "ycenter_ths": config.get("ycenter_ths", 0.5),
+        "height_ths": config.get("height_ths", 0.5),
+        "width_ths": config.get("width_ths", 0.5),
+        "add_margin": config.get("add_margin", 0.1),
+        "contrast_ths": config.get("contrast_ths", 0.1),
+        "adjust_contrast": config.get("adjust_contrast", 0.5),
+        "decoder": config.get("decoder", "greedy"),
+        "beamWidth": config.get("beamWidth", 5),
+        "min_size": config.get("min_size", 10),
+        "start_page": 5,
+        "end_page": 10,
+        "save_output": False,
+    }
+
+
+# =============================================================================
+# Search spaces
+# =============================================================================
+
+PADDLE_OCR_SEARCH_SPACE = {
+    "use_doc_orientation_classify": tune.choice([True, False]),
+    "use_doc_unwarping": tune.choice([True, False]),
+    "textline_orientation": tune.choice([True, False]),
+    "text_det_thresh": tune.uniform(0.0, 0.7),
+    "text_det_box_thresh": tune.uniform(0.0, 0.7),
+    "text_det_unclip_ratio": tune.choice([0.0]),
+    "text_rec_score_thresh": tune.uniform(0.0, 0.7),
+}
+
+DOCTR_SEARCH_SPACE = {
+    "assume_straight_pages": tune.choice([True, False]),
+    "straighten_pages": tune.choice([True, False]),
+    "preserve_aspect_ratio": tune.choice([True, False]),
+    "symmetric_pad": tune.choice([True, False]),
+    "disable_page_orientation": tune.choice([True, False]),
+    "disable_crop_orientation": tune.choice([True, False]),
+    "resolve_lines": tune.choice([True, False]),
+    "resolve_blocks": tune.choice([True, False]),
+    "paragraph_break": tune.uniform(0.01, 0.1),
+}
+
+EASYOCR_SEARCH_SPACE = {
+    "text_threshold": tune.uniform(0.3, 0.9),
+    "low_text": tune.uniform(0.2, 0.6),
+    "link_threshold": tune.uniform(0.2, 0.6),
+    "slope_ths": tune.uniform(0.0, 0.3),
+    "ycenter_ths": tune.uniform(0.3, 1.0),
+    "height_ths": tune.uniform(0.3, 1.0),
+    "width_ths": tune.uniform(0.3, 1.0),
+    "add_margin": tune.uniform(0.0, 0.3),
+    "contrast_ths": tune.uniform(0.05, 0.3),
+    "adjust_contrast": tune.uniform(0.3, 0.8),
+    "decoder": tune.choice(["greedy", "beamsearch"]),
+    "beamWidth": tune.choice([3, 5, 7, 10]),
+    "min_size": tune.choice([5, 10, 15, 20]),
+}
+
+
+# =============================================================================
+# Config keys for results display
+# =============================================================================
+
+PADDLE_OCR_CONFIG_KEYS = [
+    "use_doc_orientation_classify", "use_doc_unwarping", "textline_orientation",
+    "text_det_thresh", "text_det_box_thresh", "text_det_unclip_ratio", "text_rec_score_thresh",
+]
+
+DOCTR_CONFIG_KEYS = [
+    "assume_straight_pages", "straighten_pages", "preserve_aspect_ratio", "symmetric_pad",
+    "disable_page_orientation", "disable_crop_orientation", "resolve_lines", "resolve_blocks",
+    "paragraph_break",
+]
+
+EASYOCR_CONFIG_KEYS = [
+    "text_threshold", "low_text", "link_threshold", "slope_ths", "ycenter_ths",
+    "height_ths", "width_ths", "add_margin", "contrast_ths", "adjust_contrast",
+    "decoder", "beamWidth", "min_size",
+]
--- a/src/results/raytune_doctr_results_20260119_121445.csv
+++ b/src/results/raytune_doctr_results_20260119_121445.csv
@@ -0,0 +1,65 @@
+CER,WER,TIME,PAGES,TIME_PER_PAGE,model_reinitialized,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/assume_straight_pages,config/straighten_pages,config/preserve_aspect_ratio,config/symmetric_pad,config/disable_page_orientation,config/disable_crop_orientation,config/resolve_lines,config/resolve_blocks,config/paragraph_break,logdir
+0.07954940376147028,0.3649854752864963,20.5652813911438,5,3.1847062587738035,True,http://localhost:8003,1768820798,,False,1,c4fcad75,2026-01-19_12-06-38,20.57081699371338,20.57081699371338,154230,sergio-XPS-15-9500,192.168.1.5,20.57081699371338,1,False,False,False,True,True,True,True,False,0.09422103316797548,c4fcad75
+0.7921346697142901,1.0545568452820837,10.610254287719727,5,0.7967870712280274,True,http://localhost:8003,1768820812,,False,1,1c83f678,2026-01-19_12-06-52,10.619899988174438,10.619899988174438,156506,sergio-XPS-15-9500,192.168.1.5,10.619899988174438,1,True,True,False,True,False,True,True,False,0.05901661817569934,1c83f678
+0.7923201620478004,1.0636912186759604,5.9415740966796875,5,0.7310843944549561,True,http://localhost:8003,1768820822,,False,1,9c50442a,2026-01-19_12-07-02,5.945918560028076,5.945918560028076,158305,sergio-XPS-15-9500,192.168.1.5,5.945918560028076,1,True,True,True,True,False,False,True,False,0.011733102672610147,9c50442a
+0.7666925478783123,1.0440873928467642,11.626950025558472,5,1.8842015743255616,True,http://localhost:8003,1768820837,,False,1,68ca7089,2026-01-19_12-07-17,11.63136100769043,11.63136100769043,160066,sergio-XPS-15-9500,192.168.1.5,11.63136100769043,1,False,True,True,True,True,False,True,False,0.05057045374185024,68ca7089
+0.07451994486755961,0.3515575293610934,3.9680063724517822,5,0.3743919849395752,True,http://localhost:8003,1768820844,,False,1,8a8806b7,2026-01-19_12-07-24,3.97230863571167,3.97230863571167,162728,sergio-XPS-15-9500,192.168.1.5,3.97230863571167,1,True,False,True,False,True,False,False,False,0.023697921154561794,8a8806b7
+0.7657432112619441,1.0344358563738436,11.93731951713562,5,1.942223310470581,True,http://localhost:8003,1768820859,,False,1,f96be72a,2026-01-19_12-07-39,11.941577672958374,11.941577672958374,163962,sergio-XPS-15-9500,192.168.1.5,11.941577672958374,1,False,True,False,True,True,False,False,False,0.08588425427021348,f96be72a
+0.7918824188958541,1.0538014427522018,5.69057035446167,5,0.7088402271270752,True,http://localhost:8003,1768820868,,False,1,a832050e,2026-01-19_12-07-48,5.69484543800354,5.69484543800354,166633,sergio-XPS-15-9500,192.168.1.5,5.69484543800354,1,True,True,True,False,False,True,False,True,0.048746351477152096,a832050e
+0.08002835367212643,0.35831740099305937,8.193880081176758,5,1.19890398979187,True,http://localhost:8003,1768820880,,False,1,9719423a,2026-01-19_12-08-00,8.198804140090942,8.198804140090942,168390,sergio-XPS-15-9500,192.168.1.5,8.198804140090942,1,False,False,True,True,True,False,True,True,0.05352825040305834,9719423a
+0.7921346697142901,1.0545568452820837,5.68590521812439,5,0.7114005088806152,True,http://localhost:8003,1768820889,,False,1,fddb15d7,2026-01-19_12-08-09,5.691095352172852,5.691095352172852,170482,sergio-XPS-15-9500,192.168.1.5,5.691095352172852,1,True,True,False,False,False,True,True,True,0.07145403499389562,fddb15d7
+0.0743152533045929,0.3522593474791794,3.8518898487091064,5,0.36159796714782716,True,http://localhost:8003,1768820896,,False,1,9a805553,2026-01-19_12-08-16,3.8564491271972656,3.8564491271972656,172258,sergio-XPS-15-9500,192.168.1.5,3.8564491271972656,1,True,False,False,False,True,False,True,False,0.09773705213878954,9a805553
+0.0743152533045929,0.3522593474791794,2.2390947341918945,5,0.361072301864624,False,http://localhost:8003,1768820902,,False,1,791b8e38,2026-01-19_12-08-22,2.2431814670562744,2.2431814670562744,173474,sergio-XPS-15-9500,192.168.1.5,2.2431814670562744,1,True,False,False,False,True,False,False,True,0.09837572708177385,791b8e38
+0.0743152533045929,0.3522593474791794,2.245297431945801,5,0.36272416114807127,False,http://localhost:8003,1768820907,,False,1,7c60350c,2026-01-19_12-08-27,2.2497620582580566,2.2497620582580566,174686,sergio-XPS-15-9500,192.168.1.5,2.2497620582580566,1,True,False,False,False,True,False,False,True,0.09836846418124921,7c60350c
+0.0743152533045929,0.3522593474791794,2.276707172393799,5,0.3691234111785889,False,http://localhost:8003,1768820913,,False,1,aa5f6e40,2026-01-19_12-08-33,2.2811028957366943,2.2811028957366943,175886,sergio-XPS-15-9500,192.168.1.5,2.2811028957366943,1,True,False,False,False,True,False,False,True,0.0782267000165494,aa5f6e40
+0.0743152533045929,0.3522593474791794,2.436836004257202,5,0.3974581241607666,False,http://localhost:8003,1768820919,,False,1,be96a2fd,2026-01-19_12-08-39,2.4409751892089844,2.4409751892089844,177093,sergio-XPS-15-9500,192.168.1.5,2.4409751892089844,1,True,False,False,False,True,False,False,True,0.0988530443174727,be96a2fd
+0.0743152533045929,0.3522593474791794,6.658028841018677,5,1.2390151023864746,False,http://localhost:8003,1768820930,,False,1,8fd4d954,2026-01-19_12-08-50,7.324350118637085,7.324350118637085,178357,sergio-XPS-15-9500,192.168.1.5,7.324350118637085,1,True,False,False,False,True,False,True,False,0.07133875696594016,8fd4d954
+0.0743152533045929,0.3522593474791794,2.495633840560913,5,0.38510971069335936,False,http://localhost:8003,1768820936,,False,1,8684a874,2026-01-19_12-08-56,2.9056968688964844,2.9056968688964844,179613,sergio-XPS-15-9500,192.168.1.5,2.9056968688964844,1,True,False,False,False,True,False,False,True,0.08646785623262251,8684a874
+0.0743152533045929,0.3522593474791794,2.3146378993988037,5,0.37464404106140137,False,http://localhost:8003,1768820942,,False,1,d70fd3a2,2026-01-19_12-09-02,2.3187525272369385,2.3187525272369385,180827,sergio-XPS-15-9500,192.168.1.5,2.3187525272369385,1,True,False,False,False,True,False,False,True,0.03514348927961332,d70fd3a2
+0.0743152533045929,0.3522593474791794,2.498570680618286,5,0.37261042594909666,False,http://localhost:8003,1768820947,,False,1,abaaebf8,2026-01-19_12-09-07,2.5029470920562744,2.5029470920562744,182032,sergio-XPS-15-9500,192.168.1.5,2.5029470920562744,1,True,False,False,False,True,False,True,False,0.08582540859307525,abaaebf8
+0.07954940376147028,0.3649854752864963,9.604491949081421,5,1.1755133152008057,True,http://localhost:8003,1768820961,,False,1,5d48a7dd,2026-01-19_12-09-21,9.60886025428772,9.60886025428772,183260,sergio-XPS-15-9500,192.168.1.5,9.60886025428772,1,False,False,False,False,False,True,True,False,0.06547108266017204,5d48a7dd
+0.0743152533045929,0.3522593474791794,3.8762130737304688,5,0.3647763729095459,True,http://localhost:8003,1768820968,,False,1,d9253804,2026-01-19_12-09-28,3.880464792251587,3.880464792251587,185371,sergio-XPS-15-9500,192.168.1.5,3.880464792251587,1,True,False,False,False,True,False,False,True,0.038626059833236914,d9253804
+0.0743152533045929,0.3522593474791794,2.2411227226257324,5,0.36162853240966797,False,http://localhost:8003,1768820974,,False,1,fdb1a71c,2026-01-19_12-09-34,2.245361566543579,2.245361566543579,186592,sergio-XPS-15-9500,192.168.1.5,2.245361566543579,1,True,False,False,False,True,False,True,True,0.07944221200021404,fdb1a71c
+0.0743152533045929,0.3522593474791794,2.248905897140503,5,0.36235270500183103,False,http://localhost:8003,1768820979,,False,1,52a8f206,2026-01-19_12-09-39,2.2539358139038086,2.2539358139038086,187800,sergio-XPS-15-9500,192.168.1.5,2.2539358139038086,1,True,False,False,False,True,False,False,True,0.09978431631107665,52a8f206
+0.0743152533045929,0.3522593474791794,2.3572463989257812,5,0.3830925941467285,False,http://localhost:8003,1768820985,,False,1,02249971,2026-01-19_12-09-45,2.3617091178894043,2.3617091178894043,189026,sergio-XPS-15-9500,192.168.1.5,2.3617091178894043,1,True,False,False,False,True,False,False,True,0.09205180265684457,02249971
+0.0743152533045929,0.3522593474791794,2.3035998344421387,5,0.3717160701751709,False,http://localhost:8003,1768820991,,False,1,c3e1ed25,2026-01-19_12-09-51,2.3079335689544678,2.3079335689544678,190242,sergio-XPS-15-9500,192.168.1.5,2.3079335689544678,1,True,False,False,False,True,False,False,True,0.09144355766189398,c3e1ed25
+0.0743152533045929,0.3522593474791794,2.239521026611328,5,0.3606713771820068,False,http://localhost:8003,1768820996,,False,1,e77efbfe,2026-01-19_12-09-56,2.243769407272339,2.243769407272339,191450,sergio-XPS-15-9500,192.168.1.5,2.243769407272339,1,True,False,False,False,True,False,False,True,0.07895438944798339,e77efbfe
+0.0743152533045929,0.3522593474791794,2.289245843887329,5,0.36969733238220215,False,http://localhost:8003,1768821002,,False,1,b63d705d,2026-01-19_12-10-02,2.293459177017212,2.293459177017212,192658,sergio-XPS-15-9500,192.168.1.5,2.293459177017212,1,True,False,False,False,True,False,False,True,0.0979702817184504,b63d705d
+0.0813627928214657,0.373642333504444,8.17723536491394,5,1.1997929096221924,True,http://localhost:8003,1768821014,,False,1,c2b49d5f,2026-01-19_12-10-14,8.18145489692688,8.18145489692688,193870,sergio-XPS-15-9500,192.168.1.5,8.18145489692688,1,False,False,True,False,False,True,False,False,0.08848271842661322,c2b49d5f
+0.0743152533045929,0.3522593474791794,4.016183137893677,5,0.3882882595062256,True,http://localhost:8003,1768821021,,False,1,751e8805,2026-01-19_12-10-21,4.020460605621338,4.020460605621338,195986,sergio-XPS-15-9500,192.168.1.5,4.020460605621338,1,True,False,False,False,True,False,False,True,0.07226323202056684,751e8805
+0.0743152533045929,0.3522593474791794,2.4951744079589844,5,0.4111031532287598,False,http://localhost:8003,1768821027,,False,1,55997272,2026-01-19_12-10-27,2.4997806549072266,2.4997806549072266,197225,sergio-XPS-15-9500,192.168.1.5,2.4997806549072266,1,True,False,False,False,True,False,True,False,0.0821608621907378,55997272
+0.07954940376147028,0.3649854752864963,7.8914878368377686,5,1.1408625602722169,True,http://localhost:8003,1768821039,,False,1,c72c5c81,2026-01-19_12-10-39,7.895885229110718,7.895885229110718,198438,sergio-XPS-15-9500,192.168.1.5,7.895885229110718,1,False,False,False,True,True,True,True,False,0.09498694151430796,c72c5c81
+0.0743152533045929,0.3522593474791794,3.8655266761779785,5,0.362445068359375,True,http://localhost:8003,1768821046,,False,1,4a75d77c,2026-01-19_12-10-46,3.869797706604004,3.869797706604004,200555,sergio-XPS-15-9500,192.168.1.5,3.869797706604004,1,True,False,False,False,True,False,False,True,0.09294736151174086,4a75d77c
+0.0743152533045929,0.3522593474791794,2.3493363857269287,5,0.38149294853210447,False,http://localhost:8003,1768821051,,False,1,c2308a71,2026-01-19_12-10-51,2.353856325149536,2.353856325149536,201775,sergio-XPS-15-9500,192.168.1.5,2.353856325149536,1,True,False,False,False,True,False,False,True,0.07646901408730243,c2308a71
+0.0743152533045929,0.3522593474791794,2.2967300415039062,5,0.37166876792907716,False,http://localhost:8003,1768821057,,False,1,b39b4bbc,2026-01-19_12-10-57,2.300992012023926,2.300992012023926,202985,sergio-XPS-15-9500,192.168.1.5,2.300992012023926,1,True,False,False,False,True,False,False,True,0.06310895025982477,b39b4bbc
+0.0743152533045929,0.3522593474791794,2.3306691646575928,5,0.37825717926025393,False,http://localhost:8003,1768821063,,False,1,5c179d0f,2026-01-19_12-11-03,2.3352127075195312,2.3352127075195312,204198,sergio-XPS-15-9500,192.168.1.5,2.3352127075195312,1,True,False,False,False,True,False,False,True,0.09214745705658531,5c179d0f
+0.7921346697142901,1.0545568452820837,5.84848165512085,5,0.728736686706543,True,http://localhost:8003,1768821072,,False,1,54b75cc8,2026-01-19_12-11-12,5.8533689975738525,5.8533689975738525,205410,sergio-XPS-15-9500,192.168.1.5,5.8533689975738525,1,True,True,False,True,False,False,False,True,0.09992602021030114,54b75cc8
+0.07451994486755961,0.3515575293610934,3.878021717071533,5,0.3628075122833252,True,http://localhost:8003,1768821080,,False,1,bb5ac038,2026-01-19_12-11-20,3.8829312324523926,3.8829312324523926,207185,sergio-XPS-15-9500,192.168.1.5,3.8829312324523926,1,True,False,True,False,True,False,True,False,0.08279050013235793,bb5ac038
+0.7921346697142901,1.0545568452820837,5.78171968460083,5,0.7050829410552979,True,http://localhost:8003,1768821089,,False,1,f1c7000c,2026-01-19_12-11-29,5.786619186401367,5.786619186401367,208408,sergio-XPS-15-9500,192.168.1.5,5.786619186401367,1,True,True,False,True,True,True,False,False,0.0882484211859766,f1c7000c
+0.0813627928214657,0.373642333504444,7.883875608444214,5,1.1401109218597412,True,http://localhost:8003,1768821100,,False,1,5f64114a,2026-01-19_12-11-40,7.887973070144653,7.887973070144653,210166,sergio-XPS-15-9500,192.168.1.5,7.887973070144653,1,False,False,True,False,False,False,True,True,0.09581281484761522,5f64114a
+0.7921346697142901,1.0545568452820837,5.837187051773071,5,0.7390849590301514,True,http://localhost:8003,1768821109,,False,1,deb231ab,2026-01-19_12-11-49,5.842226028442383,5.842226028442383,212276,sergio-XPS-15-9500,192.168.1.5,5.842226028442383,1,True,True,False,True,True,False,False,False,0.014903696838843121,deb231ab
+0.07451994486755961,0.3515575293610934,3.8521182537078857,5,0.357759428024292,True,http://localhost:8003,1768821116,,False,1,8e1ad60c,2026-01-19_12-11-56,3.856376886367798,3.856376886367798,214039,sergio-XPS-15-9500,192.168.1.5,3.856376886367798,1,True,False,True,False,True,False,False,True,0.07474982974728585,8e1ad60c
+0.7657432112619441,1.0344358563738436,11.567106246948242,5,1.8771627426147461,True,http://localhost:8003,1768821131,,False,1,5c7a850a,2026-01-19_12-12-11,11.572225332260132,11.572225332260132,215255,sergio-XPS-15-9500,192.168.1.5,11.572225332260132,1,False,True,False,False,False,True,True,True,0.06667565158056586,5c7a850a
+0.0743152533045929,0.3522593474791794,3.854253053665161,5,0.36142959594726565,True,http://localhost:8003,1768821139,,False,1,41600dca,2026-01-19_12-12-19,3.858793020248413,3.858793020248413,217924,sergio-XPS-15-9500,192.168.1.5,3.858793020248413,1,True,False,False,False,True,False,False,True,0.09516963566481865,41600dca
+0.0743152533045929,0.3522593474791794,2.2381088733673096,5,0.3609159469604492,False,http://localhost:8003,1768821144,,False,1,55291f18,2026-01-19_12-12-24,2.242400646209717,2.242400646209717,219141,sergio-XPS-15-9500,192.168.1.5,2.242400646209717,1,True,False,False,False,True,False,False,True,0.09955056101622099,55291f18
+0.0743152533045929,0.3522593474791794,2.247992515563965,5,0.3638484477996826,False,http://localhost:8003,1768821150,,False,1,e05da7a3,2026-01-19_12-12-30,2.2522785663604736,2.2522785663604736,220353,sergio-XPS-15-9500,192.168.1.5,2.2522785663604736,1,True,False,False,False,True,False,False,True,0.08881643587450277,e05da7a3
+0.0743152533045929,0.3522593474791794,2.240065336227417,5,0.3607933521270752,False,http://localhost:8003,1768821155,,False,1,6773b6ef,2026-01-19_12-12-35,2.244333267211914,2.244333267211914,221554,sergio-XPS-15-9500,192.168.1.5,2.244333267211914,1,True,False,False,False,True,False,False,True,0.08162246894892994,6773b6ef
+0.0743152533045929,0.3522593474791794,2.228623628616333,5,0.3605500221252441,False,http://localhost:8003,1768821161,,False,1,88f82273,2026-01-19_12-12-41,2.233116626739502,2.233116626739502,222761,sergio-XPS-15-9500,192.168.1.5,2.233116626739502,1,True,False,False,False,True,False,False,False,0.0576590087821367,88f82273
+0.0743152533045929,0.3522593474791794,3.8948147296905518,5,0.36910762786865237,True,http://localhost:8003,1768821168,,False,1,122e7c9a,2026-01-19_12-12-48,3.898893356323242,3.898893356323242,223988,sergio-XPS-15-9500,192.168.1.5,3.898893356323242,1,True,False,False,True,True,False,True,True,0.046132117836141115,122e7c9a
+0.07451994486755961,0.3515575293610934,3.8418056964874268,5,0.3551186084747314,True,http://localhost:8003,1768821175,,False,1,6944e329,2026-01-19_12-12-55,3.846059799194336,3.846059799194336,225216,sergio-XPS-15-9500,192.168.1.5,3.846059799194336,1,True,False,True,False,True,False,False,True,0.08553768973696241,6944e329
+0.7921346697142901,1.0545568452820837,5.819804906845093,5,0.7136962413787842,True,http://localhost:8003,1768821185,,False,1,65fe9972,2026-01-19_12-13-05,5.825164794921875,5.825164794921875,226432,sergio-XPS-15-9500,192.168.1.5,5.825164794921875,1,True,True,False,False,True,False,False,False,0.09616135139330068,65fe9972
+0.07954940376147028,0.3649854752864963,7.82697319984436,5,1.1294140815734863,True,http://localhost:8003,1768821196,,False,1,e0bb2fe1,2026-01-19_12-13-16,7.831338882446289,7.831338882446289,228191,sergio-XPS-15-9500,192.168.1.5,7.831338882446289,1,False,False,False,False,True,True,True,True,0.09002271724335277,e0bb2fe1
+0.0743152533045929,0.3522593474791794,3.8710319995880127,5,0.36251654624938967,True,http://localhost:8003,1768821203,,False,1,13b36f19,2026-01-19_12-13-23,3.875239849090576,3.875239849090576,230300,sergio-XPS-15-9500,192.168.1.5,3.875239849090576,1,True,False,False,False,False,False,False,True,0.0857237854212837,13b36f19
+0.0743152533045929,0.3522593474791794,3.875215768814087,5,0.36308274269104,True,http://localhost:8003,1768821210,,False,1,9c6b5628,2026-01-19_12-13-30,3.8797342777252197,3.8797342777252197,231521,sergio-XPS-15-9500,192.168.1.5,3.8797342777252197,1,True,False,False,False,True,False,True,False,0.06880465434613751,9c6b5628
+0.0743152533045929,0.3522593474791794,2.2376744747161865,5,0.36208286285400393,False,http://localhost:8003,1768821216,,False,1,4b6d70bb,2026-01-19_12-13-36,2.242083787918091,2.242083787918091,232738,sergio-XPS-15-9500,192.168.1.5,2.242083787918091,1,True,False,False,False,True,False,True,False,0.06024639917014255,4b6d70bb
+0.0743152533045929,0.3522593474791794,2.2306642532348633,5,0.359661865234375,False,http://localhost:8003,1768821221,,False,1,ca9acee8,2026-01-19_12-13-41,2.234971046447754,2.234971046447754,233946,sergio-XPS-15-9500,192.168.1.5,2.234971046447754,1,True,False,False,False,True,False,True,False,0.0935005319022256,ca9acee8
+0.0743152533045929,0.3522593474791794,2.229747772216797,5,0.3594185829162598,False,http://localhost:8003,1768821227,,False,1,75b5c78b,2026-01-19_12-13-47,2.2341864109039307,2.2341864109039307,235172,sergio-XPS-15-9500,192.168.1.5,2.2341864109039307,1,True,False,False,False,True,False,True,False,0.05253849882367517,75b5c78b
+0.0743152533045929,0.3522593474791794,2.2397162914276123,5,0.3618612289428711,False,http://localhost:8003,1768821233,,False,1,44bf33c9,2026-01-19_12-13-53,2.243781566619873,2.243781566619873,236376,sergio-XPS-15-9500,192.168.1.5,2.243781566619873,1,True,False,False,False,True,False,True,False,0.07878420224854064,44bf33c9
+0.0743152533045929,0.3522593474791794,2.2368643283843994,5,0.36055378913879393,False,http://localhost:8003,1768821238,,False,1,f435b3b2,2026-01-19_12-13-58,2.240933895111084,2.240933895111084,237583,sergio-XPS-15-9500,192.168.1.5,2.240933895111084,1,True,False,False,False,True,False,False,True,0.07116860558400767,f435b3b2
+0.0743152533045929,0.3522593474791794,2.265198230743408,5,0.36513686180114746,False,http://localhost:8003,1768821244,,False,1,8217f139,2026-01-19_12-14-04,2.2695438861846924,2.2695438861846924,238784,sergio-XPS-15-9500,192.168.1.5,2.2695438861846924,1,True,False,False,False,True,False,True,False,0.09707599413052871,8217f139
+0.0743152533045929,0.3522593474791794,2.2422447204589844,5,0.3608452320098877,False,http://localhost:8003,1768821249,,False,1,efe10aca,2026-01-19_12-14-09,2.246490240097046,2.246490240097046,239994,sergio-XPS-15-9500,192.168.1.5,2.246490240097046,1,True,False,False,False,True,False,False,True,0.0391565433402237,efe10aca
+0.08002835367212643,0.35831740099305937,8.202797412872314,5,1.1076955318450927,True,http://localhost:8003,1768821261,,False,1,3f085082,2026-01-19_12-14-21,8.2071533203125,8.2071533203125,241216,sergio-XPS-15-9500,192.168.1.5,8.2071533203125,1,False,False,True,True,True,True,True,False,0.0835804142411709,3f085082
+0.0743152533045929,0.3522593474791794,3.885773181915283,5,0.3617554664611816,True,http://localhost:8003,1768821268,,False,1,ca26375b,2026-01-19_12-14-28,3.890075922012329,3.890075922012329,243329,sergio-XPS-15-9500,192.168.1.5,3.890075922012329,1,True,False,False,False,True,False,False,True,0.09060074015212932,ca26375b
+0.0743152533045929,0.3522593474791794,2.2462470531463623,5,0.3624699592590332,False,http://localhost:8003,1768821274,,False,1,69643aea,2026-01-19_12-14-34,2.2505128383636475,2.2505128383636475,244551,sergio-XPS-15-9500,192.168.1.5,2.2505128383636475,1,True,False,False,False,True,False,False,True,0.07530859871726936,69643aea
+0.0743152533045929,0.3522593474791794,2.263847827911377,5,0.3658243179321289,False,http://localhost:8003,1768821279,,False,1,4cae77fc,2026-01-19_12-14-39,2.267988681793213,2.267988681793213,245765,sergio-XPS-15-9500,192.168.1.5,2.267988681793213,1,True,False,False,False,True,False,False,True,0.08801626009397175,4cae77fc
+0.0743152533045929,0.3522593474791794,2.2468783855438232,5,0.3630548000335693,False,http://localhost:8003,1768821285,,False,1,6b987e08,2026-01-19_12-14-45,2.2512388229370117,2.2512388229370117,246985,sergio-XPS-15-9500,192.168.1.5,2.2512388229370117,1,True,False,False,False,True,False,False,True,0.09792932706586027,6b987e08
--- a/src/results/raytune_easyocr_results_20260119_120204.csv
+++ b/src/results/raytune_easyocr_results_20260119_120204.csv
@@ -0,0 +1,65 @@
+CER,WER,TIME,PAGES,TIME_PER_PAGE,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/text_threshold,config/low_text,config/link_threshold,config/slope_ths,config/ycenter_ths,config/height_ths,config/width_ths,config/add_margin,config/contrast_ths,config/adjust_contrast,config/decoder,config/beamWidth,config/min_size,logdir
+0.3871430382852802,0.5182750384528632,19.13978934288025,5,3.7033697605133056,http://localhost:8002,1768819587,,False,1,0ba51edc,2026-01-19_11-46-27,19.150158882141113,19.150158882141113,137518,sergio-XPS-15-9500,192.168.1.5,19.150158882141113,1,0.5066472683346976,0.3124874041155775,0.2640094851170725,0.11369463817649797,0.47012928436448354,0.7140749654136573,0.643133477191141,0.11910600147231132,0.2722183833676177,0.7684200450221536,beamsearch,3,20,0ba51edc
+0.062223201197885825,0.26399044299206303,11.377342224121094,5,2.1752953052520754,http://localhost:8002,1768819602,,False,1,c2ddb294,2026-01-19_11-46-42,11.383038759231567,11.383038759231567,137840,sergio-XPS-15-9500,192.168.1.5,11.383038759231567,1,0.4175797290692802,0.5963231402122613,0.36874666681089985,0.223680908941245,0.459921344533471,0.9160307499007694,0.9279619232072562,0.12298366234684793,0.11516147921112997,0.6668263919581685,greedy,10,20,c2ddb294
+0.39700544361882206,0.5264527267179566,12.552152156829834,5,2.4208834171295166,http://localhost:8002,1768819617,,False,1,e82ff347,2026-01-19_11-46-57,12.557852029800415,12.557852029800415,138037,sergio-XPS-15-9500,192.168.1.5,12.557852029800415,1,0.8540537965666715,0.294588934626999,0.5092574060096554,0.2836712766196415,0.6190202697962148,0.810073297090729,0.955177616687997,0.10497968516826324,0.20957208332268756,0.7475085753710696,beamsearch,7,10,e82ff347
+0.07781775834482615,0.3051087241758982,12.261723279953003,5,2.362420177459717,http://localhost:8002,1768819633,,False,1,532bade0,2026-01-19_11-47-13,12.265946626663208,12.265946626663208,138237,sergio-XPS-15-9500,192.168.1.5,12.265946626663208,1,0.8141250315590343,0.479912630164245,0.2027669826029772,0.11444262905063128,0.7404783620983263,0.301871563170945,0.35514852924629375,0.27832075427107744,0.2643837228077205,0.7950403527209229,greedy,3,10,532bade0
+0.3487860557598165,0.5005453336802469,12.705831289291382,5,2.4508751392364503,http://localhost:8002,1768819649,,False,1,7d15d320,2026-01-19_11-47-29,12.712336301803589,12.712336301803589,138464,sergio-XPS-15-9500,192.168.1.5,12.712336301803589,1,0.3225669850847642,0.2716721665537871,0.26115345621898345,0.2438651926519595,0.6194544051054931,0.5792394844360738,0.4710319694788726,0.13213212713543926,0.1990327712555196,0.3304729155445536,beamsearch,7,10,7d15d320
+0.3356719522269469,0.47356787280835055,13.11896562576294,5,2.5329070568084715,http://localhost:8002,1768819666,,False,1,9d244107,2026-01-19_11-47-46,13.124910593032837,13.124910593032837,138659,sergio-XPS-15-9500,192.168.1.5,13.124910593032837,1,0.34889752108886873,0.39007345640142954,0.22641510809759163,0.17907271838822,0.8644844159597871,0.6275871303293161,0.9722853596788665,0.25555008849029126,0.20043175984558798,0.7707927516030697,beamsearch,7,10,9d244107
+0.2742112621871928,0.43639473613327356,13.743902206420898,5,2.6569590091705324,http://localhost:8002,1768819683,,False,1,f160d61d,2026-01-19_11-48-03,13.750498533248901,13.750498533248901,138904,sergio-XPS-15-9500,192.168.1.5,13.750498533248901,1,0.8392454366146391,0.3155621572041812,0.4873405945675176,0.08582733675720434,0.9790121644393985,0.4062417762545848,0.6466326123022476,0.19715070089301498,0.23503015353761492,0.41517636715917056,beamsearch,10,15,f160d61d
+0.09848790101332737,0.32483251468294605,10.20632028579712,5,1.9527865886688232,http://localhost:8002,1768819697,,False,1,1be1e7a5,2026-01-19_11-48-17,10.210542917251587,10.210542917251587,139116,sergio-XPS-15-9500,192.168.1.5,10.210542917251587,1,0.833246186868533,0.22457589994570235,0.32254503276757784,0.23399561843072308,0.30165921403980517,0.8658122652407174,0.47250440785836867,0.2238860017234068,0.1886386486304371,0.4576817046304348,greedy,3,10,1be1e7a5
+0.4136569417819424,0.5311620590036745,14.170307874679565,5,2.746191930770874,http://localhost:8002,1768819714,,False,1,0746a065,2026-01-19_11-48-34,14.175411701202393,14.175411701202393,139318,sergio-XPS-15-9500,192.168.1.5,14.175411701202393,1,0.6782050871534447,0.22844595642210797,0.2858119663327552,0.0823237135063647,0.9612792593924089,0.665348992884313,0.8626670975336155,0.04300909760808497,0.270098820639789,0.45556228770798246,beamsearch,10,5,0746a065
+0.4517379831360281,0.5799232118269153,15.184179544448853,5,2.9479862689971923,http://localhost:8002,1768819733,,False,1,ef6faf9d,2026-01-19_11-48-53,15.188986778259277,15.188986778259277,139517,sergio-XPS-15-9500,192.168.1.5,15.188986778259277,1,0.39558213831954714,0.5599422938176799,0.3313024647230755,0.11634655299660798,0.8823955834187702,0.6660518255567262,0.796016060076042,0.1299041367034449,0.2152856765400713,0.6606446175138574,beamsearch,10,10,ef6faf9d
+0.0795526147054266,0.34016478642481734,11.83824896812439,5,2.2789079189300536,http://localhost:8002,1768819748,,False,1,e584ad1a,2026-01-19_11-49-08,11.842672109603882,11.842672109603882,139771,sergio-XPS-15-9500,192.168.1.5,11.842672109603882,1,0.521503445317256,0.5967505351644852,0.4313761698948889,0.18235873322120522,0.425714368894258,0.9959973340677325,0.7683261374584024,0.018826411104235885,0.09775666402707693,0.628476421820741,greedy,5,20,e584ad1a
+0.09113684668662517,0.3330104965172591,13.415843725204468,5,2.595126819610596,http://localhost:8002,1768819765,,False,1,933eaf3b,2026-01-19_11-49-25,13.420702457427979,13.420702457427979,139980,sergio-XPS-15-9500,192.168.1.5,13.420702457427979,1,0.6841928895220837,0.4987357892894665,0.3892687916541862,0.013496416992424515,0.7313608327277628,0.30075189594812957,0.32892055287409155,0.2910230441279402,0.12231738001404545,0.6542796585827699,greedy,3,20,933eaf3b
+0.07683542859531813,0.29422679092874626,12.476734638214111,5,2.407120943069458,http://localhost:8002,1768819781,,False,1,5cc050c0,2026-01-19_11-49-41,12.481242179870605,12.481242179870605,140188,sergio-XPS-15-9500,192.168.1.5,12.481242179870605,1,0.7076826224292139,0.4751142111109723,0.5719253650216765,0.20726075894486198,0.7574616804022614,0.48759940016947356,0.34266143931551063,0.18447732850058915,0.05055007965981624,0.5684478612561757,greedy,5,15,5cc050c0
+0.06306661910327489,0.2898453031979762,11.470694541931152,5,2.2064542293548586,http://localhost:8002,1768819796,,False,1,d3c4733b,2026-01-19_11-49-56,11.4755117893219,11.4755117893219,140395,sergio-XPS-15-9500,192.168.1.5,11.4755117893219,1,0.6620107715544297,0.46192225302253637,0.5999869164872036,0.22619461913686095,0.5081500315391371,0.475339433636797,0.5106649520736647,0.18343269541739415,0.05344530818183559,0.5503520865389809,greedy,5,15,d3c4733b
+0.062270483694448396,0.28136185456156826,11.204349517822266,5,2.1529050350189207,http://localhost:8002,1768819810,,False,1,b45ad82b,2026-01-19_11-50-10,11.209157705307007,11.209157705307007,140574,sergio-XPS-15-9500,192.168.1.5,11.209157705307007,1,0.5677170679516823,0.39248586783769635,0.5772785270028471,0.27599118000336537,0.5077328211777172,0.9804901966926808,0.4977991183990612,0.07239471385409058,0.1374763382905679,0.553666724679821,greedy,5,15,b45ad82b
+0.05996048766984661,0.26719903989315885,10.76261305809021,5,2.0644459247589113,http://localhost:8002,1768819824,,False,1,8acf6ec9,2026-01-19_11-50-24,10.767472267150879,10.767472267150879,140781,sergio-XPS-15-9500,192.168.1.5,10.767472267150879,1,0.45865724369035377,0.3694009035940602,0.4151173065881186,0.2983365466960818,0.3631913446659816,0.9980863757691772,0.5845159135795941,0.0721946556655992,0.1459278780476781,0.694791501629087,greedy,5,5,8acf6ec9
+0.06099161461125324,0.2731943754797238,10.691137313842773,5,2.049327087402344,http://localhost:8002,1768819838,,False,1,0551450f,2026-01-19_11-50-38,10.69617772102356,10.69617772102356,140969,sergio-XPS-15-9500,192.168.1.5,10.69617772102356,1,0.4402243626112622,0.3701488279313097,0.40203668237242685,0.2972046540464212,0.33871151213781014,0.8838165530603757,0.7081592028492127,0.0882537861188746,0.15672333775519132,0.701627303389235,greedy,10,5,0551450f
+0.061099404730611595,0.2721280502767147,10.488921165466309,5,2.0086814403533935,http://localhost:8002,1768819852,,False,1,e740013a,2026-01-19_11-50-52,10.493494510650635,10.493494510650635,141174,sergio-XPS-15-9500,192.168.1.5,10.493494510650635,1,0.46435985811111974,0.34785224515762775,0.45493529224642276,0.29478569868586896,0.35587921159117397,0.8172744152107332,0.7122588321341333,0.0735916007360217,0.15982046838787856,0.7164721195205754,greedy,5,5,e740013a
+0.062362858472938,0.272332407323177,10.604278802871704,5,2.034042978286743,http://localhost:8002,1768819866,,False,1,22c24728,2026-01-19_11-51-06,10.608573198318481,10.608573198318481,141340,sergio-XPS-15-9500,192.168.1.5,10.608573198318481,1,0.579678584169857,0.41597218340706976,0.4252016667747404,0.2679346252767811,0.34868781409745264,0.7747328556811077,0.5639686467419519,0.07445003550177257,0.16554473301217898,0.7073749357717483,greedy,10,5,22c24728
+0.06215767332972164,0.2747475932624559,10.546220541000366,5,2.021405267715454,http://localhost:8002,1768819880,,False,1,d1b611a8,2026-01-19_11-51-20,10.550852060317993,10.550852060317993,141520,sergio-XPS-15-9500,192.168.1.5,10.550852060317993,1,0.4422199064362936,0.3610913124264453,0.512759066575697,0.25795910850742676,0.5611259808565064,0.9053873818686548,0.5976970185172742,0.003121661182585389,0.08700122299695832,0.6200011976268031,greedy,10,5,d1b611a8
+0.06426741821045164,0.27754887165353204,10.526280164718628,5,2.017490863800049,http://localhost:8002,1768819894,,False,1,a1925725,2026-01-19_11-51-34,10.530900001525879,10.530900001525879,141685,sergio-XPS-15-9500,192.168.1.5,10.530900001525879,1,0.5079166883998535,0.44070967935910216,0.3555775923905935,0.2990878745571421,0.31120640343991984,0.9491605272601941,0.721432583570574,0.044062271648251126,0.1572631030161951,0.5962531429630691,greedy,5,5,a1925725
+0.060448802280017165,0.2709457820432465,10.548709630966187,5,2.0212356567382814,http://localhost:8002,1768819907,,False,1,f6248ceb,2026-01-19_11-51-47,10.553314208984375,10.553314208984375,141848,sergio-XPS-15-9500,192.168.1.5,10.553314208984375,1,0.4717256039811322,0.36544351935053254,0.44547752189718304,0.29867816914798173,0.3833038520221923,0.8392790049435077,0.6924094072779299,0.0852529065561854,0.1544529445184886,0.7151769237673308,greedy,5,5,f6248ceb
+0.061830952891847354,0.27643497142574114,10.458194017410278,5,2.00508770942688,http://localhost:8002,1768819921,,False,1,9408f008,2026-01-19_11-52-01,10.462894439697266,10.462894439697266,142026,sergio-XPS-15-9500,192.168.1.5,10.462894439697266,1,0.36151386422841214,0.3538388593453238,0.4559692019279934,0.258413183713029,0.39490484466097675,0.8743587585061078,0.7008339670509499,0.08528252345983173,0.1412514911085921,0.7102293742914433,greedy,5,5,9408f008
+0.06426139507144008,0.27969442229397773,10.85228157043457,5,2.0829681873321535,http://localhost:8002,1768819935,,False,1,a0aa078a,2026-01-19_11-52-15,10.856995105743408,10.856995105743408,142190,sergio-XPS-15-9500,192.168.1.5,10.856995105743408,1,0.4624158086714028,0.42040393809756477,0.41520125659911294,0.29032442769565125,0.38480963688924097,0.745502857691457,0.5817045834292819,0.045692170174803245,0.17769522993714032,0.6933972538344093,greedy,5,5,a0aa078a
+0.06269459198356074,0.27808950345890404,10.585867643356323,5,2.0289974212646484,http://localhost:8002,1768819949,,False,1,324be6ad,2026-01-19_11-52-29,10.590425252914429,10.590425252914429,142377,sergio-XPS-15-9500,192.168.1.5,10.590425252914429,1,0.39019467846190514,0.372308752898106,0.4640373077177259,0.20167201551181882,0.4408716269770253,0.8406520699713839,0.8098310920672391,0.1579316915947745,0.1384207575445601,0.7454573365368217,greedy,5,5,324be6ad
+0.07959827118630344,0.2871382933960637,11.532482385635376,5,2.219746446609497,http://localhost:8002,1768819964,,False,1,e1c26fe1,2026-01-19_11-52-44,11.537264823913574,11.537264823913574,142538,sergio-XPS-15-9500,192.168.1.5,11.537264823913574,1,0.3090429790922413,0.33472186465221,0.39720817790586443,0.0041528793175236445,0.3025883785231392,0.9359865988554746,0.4208565345904826,0.09825579905606344,0.08933198214929214,0.5029113260048625,greedy,5,5,e1c26fe1
+0.06153670825357198,0.2689836062793151,10.684980630874634,5,2.048065185546875,http://localhost:8002,1768819978,,False,1,871a2974,2026-01-19_11-52-58,10.689571142196655,10.689571142196655,142730,sergio-XPS-15-9500,192.168.1.5,10.689571142196655,1,0.6247643595063705,0.2700409637884238,0.523706372392991,0.26010593479118665,0.5419430667470642,0.8772489609968006,0.866157823298259,0.1525272090916175,0.23282983510183955,0.6005045065411087,greedy,10,5,871a2974
+0.06673842132253202,0.2895430656572954,11.181420803070068,5,2.148970937728882,http://localhost:8002,1768819993,,False,1,5aaa2960,2026-01-19_11-53-13,11.186044454574585,11.186044454574585,142902,sergio-XPS-15-9500,192.168.1.5,11.186044454574585,1,0.5312313131533724,0.5274817776501124,0.36246508220473683,0.1487343581575564,0.3926538404095683,0.9516125555915751,0.6733549601019699,0.048249293092278434,0.11205800044575707,0.7992457276130864,greedy,7,5,5aaa2960
+0.06397855317924395,0.27562926342642274,10.582021236419678,5,2.0291433334350586,http://localhost:8002,1768820006,,False,1,21bd3de3,2026-01-19_11-53-26,10.586687564849854,10.586687564849854,143089,sergio-XPS-15-9500,192.168.1.5,10.586687564849854,1,0.4768706082264196,0.4116856094728855,0.47401542881269365,0.24184252961783387,0.6689268585545911,0.7706602741028105,0.6152463359675456,0.02384590208270837,0.14958983968802692,0.6832923394286707,greedy,5,5,21bd3de3
+0.05928688439040566,0.26340764235199676,10.82849907875061,5,2.0774466037750243,http://localhost:8002,1768820021,,False,1,1557acdd,2026-01-19_11-53-41,10.833132982254028,10.833132982254028,143248,sergio-XPS-15-9500,192.168.1.5,10.833132982254028,1,0.7552574004836203,0.44533911204124527,0.31397183762754305,0.2781958432695631,0.4971448247990278,0.702889696463513,0.5563365487128928,0.10957807143315677,0.1792808875596712,0.7431378339011148,greedy,3,5,1557acdd
+0.05996751845943706,0.2656487417441341,11.046596050262451,5,2.1210866928100587,http://localhost:8002,1768820035,,False,1,23e5421b,2026-01-19_11-53-55,11.051404476165771,11.051404476165771,143435,sergio-XPS-15-9500,192.168.1.5,11.051404476165771,1,0.7718089675955625,0.4446379405494256,0.3019967059446066,0.27530868169916184,0.48775088657867727,0.7025268307300849,0.5457135094112008,0.10608020395503459,0.17680901565764098,0.7399221495601584,greedy,3,5,23e5421b
+0.05943303923556994,0.2621136461900505,10.89347219467163,5,2.090515375137329,http://localhost:8002,1768820049,,False,1,4662a08f,2026-01-19_11-54-09,10.898061990737915,10.898061990737915,143626,sergio-XPS-15-9500,192.168.1.5,10.898061990737915,1,0.7655197786088256,0.4384608011311873,0.2900656349558717,0.2738896956339715,0.4897956878476248,0.7114900186099934,0.5392251925681772,0.11338377422440528,0.18288699118515803,0.7492268780264275,greedy,3,5,4662a08f
+0.059764190418310784,0.26498596833223664,11.022373676300049,5,2.11647310256958,http://localhost:8002,1768820064,,False,1,8339cb3e,2026-01-19_11-54-24,11.026973724365234,11.026973724365234,143832,sergio-XPS-15-9500,192.168.1.5,11.026973724365234,1,0.7686099049266422,0.44630560025029414,0.2948219426310189,0.2727084952650962,0.49027990928339404,0.7249036670847477,0.5450468550932773,0.11187079599626384,0.18133138980677752,0.7495335565594098,greedy,3,5,8339cb3e
+0.060684238278697525,0.26432483439151866,12.10981273651123,5,2.3338690757751466,http://localhost:8002,1768820079,,False,1,9c9cf542,2026-01-19_11-54-39,12.114561557769775,12.114561557769775,144014,sergio-XPS-15-9500,192.168.1.5,12.114561557769775,1,0.7532180802163942,0.5128327503981508,0.2570950665245929,0.21228601663917626,0.5702886327992472,0.5874866302046862,0.41605423922305346,0.1393125792842351,0.22050576617777679,0.7624824674521864,greedy,3,20,9c9cf542
+0.08014581283242714,0.28932853882106035,10.766591310501099,5,2.0627391815185545,http://localhost:8002,1768820093,,False,1,7b99dc7d,2026-01-19_11-54-53,10.773411512374878,10.773411512374878,144217,sergio-XPS-15-9500,192.168.1.5,10.773411512374878,1,0.8900827816008225,0.43692605130405904,0.28299893768197637,0.25090796326354026,0.45116119804450994,0.7000835777935013,0.5311272120253014,0.10699302785038173,0.2904514002507723,0.7756605791225515,greedy,3,5,7b99dc7d
+0.05998922172744085,0.26585145931941695,11.418177604675293,5,2.19525465965271,http://localhost:8002,1768820108,,False,1,889ff391,2026-01-19_11-55-08,11.422764301300049,11.422764301300049,144398,sergio-XPS-15-9500,192.168.1.5,11.422764301300049,1,0.7853225189675154,0.463910613321873,0.23698735272141672,0.27377548391814954,0.6121219754884698,0.551217667291872,0.43571381214714444,0.11657214266943153,0.18871141271799163,0.7335864533748023,greedy,3,5,889ff391
+0.3537681802368841,0.4969864100911835,12.881014823913574,5,2.4865323543548583,http://localhost:8002,1768820124,,False,1,7e811d46,2026-01-19_11-55-24,12.88630223274231,12.88630223274231,144607,sergio-XPS-15-9500,192.168.1.5,12.88630223274231,1,0.7266484292255461,0.5415454213873866,0.3301145976622343,0.1865414523299046,0.47980014672018056,0.7370946863942303,0.6321175664041752,0.16199096365481883,0.24575549479858036,0.7988955477215958,beamsearch,3,5,7e811d46
+0.08668141149396207,0.3195016810538794,12.23897933959961,5,2.3584585189819336,http://localhost:8002,1768820140,,False,1,aad8a433,2026-01-19_11-55-40,12.244789123535156,12.244789123535156,144837,sergio-XPS-15-9500,192.168.1.5,12.244789123535156,1,0.8890784877906777,0.49729149007901785,0.3022378793797936,0.15068002069309427,0.5217560545383055,0.6246570748018311,0.39540672252266484,0.06113992103803731,0.19740387526722958,0.6691724379280026,greedy,3,20,aad8a433
+0.33039603802482187,0.4796702224046533,12.4546537399292,5,2.4026978492736815,http://localhost:8002,1768820156,,False,1,512657a2,2026-01-19_11-55-56,12.45941162109375,12.45941162109375,145063,sergio-XPS-15-9500,192.168.1.5,12.45941162109375,1,0.6232362282312066,0.3918712695091323,0.2051294768906529,0.23628755351196915,0.5886422425865593,0.3680701363856915,0.45704649890130883,0.1172561016305299,0.17265532433475142,0.7657720890343414,beamsearch,3,5,512657a2
+0.06198201775009295,0.2639318510923077,10.336721420288086,5,1.9784754753112792,http://localhost:8002,1768820170,,False,1,1da2591c,2026-01-19_11-56-10,10.341253757476807,10.341253757476807,145258,sergio-XPS-15-9500,192.168.1.5,10.341253757476807,1,0.7945748814752798,0.3074609198039082,0.3512850377909583,0.2803387165565871,0.676034214318366,0.5425759112229473,0.4977769366841911,0.1405039691690697,0.18414358174506226,0.6495146967256282,greedy,3,10,1da2591c
+0.23930652997356217,0.4047803085409988,13.96639633178711,5,2.70588903427124,http://localhost:8002,1768820187,,False,1,1fc76c61,2026-01-19_11-56-27,13.971062898635864,13.971062898635864,145448,sergio-XPS-15-9500,192.168.1.5,13.971062898635864,1,0.7298730667959007,0.43128174897306926,0.37543194001483676,0.033557047235571416,0.4227439352044997,0.6369762315598249,0.5720837980668902,0.16989421299763682,0.20357556846664004,0.33606535760084727,beamsearch,7,15,1fc76c61
+0.05916457749009331,0.2603697639812623,10.936553001403809,5,2.099363851547241,http://localhost:8002,1768820201,,False,1,466fabc4,2026-01-19_11-56-41,10.941264390945435,10.941264390945435,145657,sergio-XPS-15-9500,192.168.1.5,10.941264390945435,1,0.7646655943554652,0.4496059489020273,0.3037692280282893,0.27820217212001197,0.4796795208364998,0.7118325937653041,0.5413221047834652,0.10120472780313837,0.16953835385986285,0.7373278953886837,greedy,3,5,466fabc4
+0.06262277504663857,0.2680984132847148,10.939441919326782,5,2.099300193786621,http://localhost:8002,1768820216,,False,1,1d6d1749,2026-01-19_11-56-56,10.943971633911133,10.943971633911133,145853,sergio-XPS-15-9500,192.168.1.5,10.943971633911133,1,0.8234354899576677,0.4593995267054814,0.27033008042371826,0.24452223445450588,0.44951347701495115,0.6907896319414741,0.5324461272026295,0.1235467025491428,0.12932778883432983,0.7317188726093867,greedy,3,5,1d6d1749
+0.05970784035209096,0.258759438956101,10.990158319473267,5,2.1104721069335937,http://localhost:8002,1768820230,,False,1,c82e12e9,2026-01-19_11-57-10,10.994841575622559,10.994841575622559,146026,sergio-XPS-15-9500,192.168.1.5,10.994841575622559,1,0.7435671290019616,0.4902723579691337,0.31030673207841203,0.2821781420999702,0.4743635349095276,0.7868678535393907,0.6534237946773291,0.09810216733901932,0.18818934557100567,0.769000804122876,greedy,3,5,c82e12e9
+0.07399781605809005,0.2901757233837255,11.241674661636353,5,2.159311056137085,http://localhost:8002,1768820245,,False,1,dc4b8ad0,2026-01-19_11-57-25,11.246280431747437,11.246280431747437,146227,sergio-XPS-15-9500,192.168.1.5,11.246280431747437,1,0.8635855881513506,0.5679840217648511,0.3108576081126515,0.26596438861226535,0.4736117661041297,0.7935405923179888,0.6568791745253106,0.09852706108769861,0.22110037713279163,0.7726420509771701,greedy,3,5,dc4b8ad0
+0.059443757821647306,0.2652742693642366,10.932884454727173,5,2.0979042530059813,http://localhost:8002,1768820259,,False,1,dd5c1aa0,2026-01-19_11-57-39,10.937772035598755,10.937772035598755,146415,sergio-XPS-15-9500,192.168.1.5,10.937772035598755,1,0.7497851211362265,0.49340285442914233,0.24486518704295845,0.22169705261942863,0.5344290231994961,0.7343738246174152,0.6273279588084633,0.1401353190181211,0.19391335447188496,0.751931055711065,greedy,3,10,dd5c1aa0
+0.33905839374179186,0.46681630291244874,11.817269086837769,5,2.2754374027252195,http://localhost:8002,1768820274,,False,1,3e431bbc,2026-01-19_11-57-54,11.822028636932373,11.822028636932373,146609,sergio-XPS-15-9500,192.168.1.5,11.822028636932373,1,0.7409469255126825,0.486311604635016,0.2426683920471307,0.22749653812474147,0.5339818816411395,0.7675880896677424,0.6136943680830941,0.22201604923294813,0.2076143561269635,0.7773645815175689,beamsearch,3,10,3e431bbc
+0.06022704320482961,0.26313388102560387,10.998746633529663,5,2.1118124961853026,http://localhost:8002,1768820288,,False,1,156758d9,2026-01-19_11-58-08,11.003510475158691,11.003510475158691,146797,sergio-XPS-15-9500,192.168.1.5,11.003510475158691,1,0.6958655352045846,0.5130811270234237,0.3398685649368741,0.281609671843136,0.6188171051225511,0.6619539249830828,0.7621944146375241,0.13722873492512194,0.19541317596404653,0.724668083186668,greedy,3,10,156758d9
+0.0619061941971184,0.2666708599391416,10.721810817718506,5,2.0556752681732178,http://localhost:8002,1768820303,,False,1,98b752e7,2026-01-19_11-58-23,10.726754426956177,10.726754426956177,146994,sergio-XPS-15-9500,192.168.1.5,10.726754426956177,1,0.8065507370753903,0.479579558894321,0.2710803109658562,0.2200369611680297,0.562424006392253,0.6785297866543542,0.6428102120307683,0.17361026837711904,0.25222880963797256,0.6816772979912098,greedy,3,10,98b752e7
+0.060772000212913825,0.2693655727035526,11.828697204589844,5,2.276572847366333,http://localhost:8002,1768820318,,False,1,b76fb991,2026-01-19_11-58-38,11.833409070968628,11.833409070968628,147173,sergio-XPS-15-9500,192.168.1.5,11.833409070968628,1,0.6588562181986706,0.5057274333487476,0.21185595176486843,0.2530909139222912,0.6525256193586906,0.8104014913294882,0.4815502590805036,0.13014894080011688,0.16940039157653397,0.7552923776175787,greedy,3,10,b76fb991
+0.07314038576784788,0.3150308431474841,12.541530132293701,5,2.420142650604248,http://localhost:8002,1768820334,,False,1,2cddab16,2026-01-19_11-58-54,12.546295404434204,12.546295404434204,147391,sergio-XPS-15-9500,192.168.1.5,12.546295404434204,1,0.8467938317793842,0.5454167229484307,0.31221025364961774,0.19484970751487457,0.7101321488954703,0.6066858622923857,0.3883609000553786,0.09428410179254802,0.23303430823510501,0.5028771950032019,greedy,3,20,2cddab16
+0.061126787276099506,0.2754658344456032,12.830697536468506,5,2.4755293846130373,http://localhost:8002,1768820350,,False,1,c5e9c336,2026-01-19_11-59-10,12.835358381271362,12.835358381271362,147587,sergio-XPS-15-9500,192.168.1.5,12.835358381271362,1,0.7742942276856887,0.45602451871204075,0.2906132981749209,0.28249790167048744,0.49285375099310735,0.7281704754203927,0.30136076169570813,0.11215186859095508,0.18604751676297107,0.7485499894558536,greedy,3,10,c5e9c336
+0.059864794050619355,0.2672736064749025,11.629220485687256,5,2.235791301727295,http://localhost:8002,1768820365,,False,1,4746a594,2026-01-19_11-59-25,11.634002208709717,11.634002208709717,147814,sergio-XPS-15-9500,192.168.1.5,11.634002208709717,1,0.7600341581312108,0.4856987064104726,0.25429745004407167,0.23782684371695748,0.5171294186553896,0.7199868218813051,0.5150852975917685,0.1466645033310691,0.21110091695829342,0.7887741773568971,greedy,3,15,4746a594
+0.059687361636187354,0.25915782844539953,10.77558970451355,5,2.0677656650543215,http://localhost:8002,1768820379,,False,1,914de1fb,2026-01-19_11-59-39,10.780381202697754,10.780381202697754,148016,sergio-XPS-15-9500,192.168.1.5,10.780381202697754,1,0.7206799644549393,0.4038298181079831,0.22829349882480535,0.26704100913427425,0.40874625247425306,0.7422689086598406,0.5495893868854069,0.1269079072494077,0.1907921420998867,0.7564991275004229,greedy,3,5,914de1fb
+0.05944052289142775,0.2603006035896063,11.235510110855103,5,2.1587305068969727,http://localhost:8002,1768820394,,False,1,67d86f75,2026-01-19_11-59-54,11.240439653396606,11.240439653396606,148208,sergio-XPS-15-9500,192.168.1.5,11.240439653396606,1,0.7181780900634192,0.40619737782309295,0.23858930971427372,0.17036711251144926,0.41028051751847794,0.7619291987754846,0.5944586460638401,0.12680370752155648,0.19408677869066687,0.7340530830475422,greedy,3,5,67d86f75
+0.05864527764234886,0.2576966365837255,10.711666345596313,5,2.054002857208252,http://localhost:8002,1768820408,,False,1,ec233275,2026-01-19_12-00-08,10.71644115447998,10.71644115447998,148394,sergio-XPS-15-9500,192.168.1.5,10.71644115447998,1,0.7119912570829008,0.4067353312041748,0.22290482686450167,0.13876386837316096,0.4216785745225061,0.7449060175492836,0.6189859060561754,0.12837536724587273,0.16720360936555814,0.6490148035375993,greedy,7,5,ec233275
+0.058352281456512646,0.26325850918850957,10.756606340408325,5,2.0632463455200196,http://localhost:8002,1768820422,,False,1,1b85472e,2026-01-19_12-00-22,10.761056900024414,10.761056900024414,148589,sergio-XPS-15-9500,192.168.1.5,10.761056900024414,1,0.6647184009064185,0.42466969296752816,0.21838222053573686,0.1629305080861391,0.7994293119091709,0.6436655189392679,0.6065310919737225,0.14619053351152517,0.1671131734904739,0.6416317933607728,greedy,7,10,1b85472e
+0.3660240439441647,0.4947236362577508,13.107557773590088,5,2.53425874710083,http://localhost:8002,1768820439,,False,1,c50724c2,2026-01-19_12-00-39,13.112190961837769,13.112190961837769,148777,sergio-XPS-15-9500,192.168.1.5,13.112190961837769,1,0.6634068628046286,0.4221917610956251,0.22391772412866445,0.1502086057528373,0.7842640466327674,0.6529205282440211,0.5966980952588006,0.18574462350804272,0.16077183234622805,0.6422206751727608,beamsearch,7,5,c50724c2
+0.062152982639591625,0.27540347582693964,10.558995485305786,5,2.022567129135132,http://localhost:8002,1768820452,,False,1,881d9f45,2026-01-19_12-00-52,10.563637018203735,10.563637018203735,148992,sergio-XPS-15-9500,192.168.1.5,10.563637018203735,1,0.6346549144056921,0.3809145239465362,0.2823575989757486,0.1284614307850303,0.8067091565131851,0.8337121990108658,0.567787249051487,0.20829411872710996,0.16721474316062188,0.6282852294207945,greedy,7,5,881d9f45
+0.05856626938126275,0.2581712321259471,10.876498222351074,5,2.087088108062744,http://localhost:8002,1768820467,,False,1,48fc43e4,2026-01-19_12-01-07,10.88118314743042,10.88118314743042,149163,sergio-XPS-15-9500,192.168.1.5,10.88118314743042,1,0.7042086976838686,0.4025170289737934,0.2160231541556799,0.16549335913941385,0.8858930429274254,0.6801565065140187,0.6745339610780225,0.08262987034261617,0.13360114059916128,0.5889319630704115,greedy,7,15,48fc43e4
+0.05976448758711881,0.2592269888370555,10.979224681854248,5,2.107044887542725,http://localhost:8002,1768820481,,False,1,652caf77,2026-01-19_12-01-21,10.983993291854858,10.983993291854858,149348,sergio-XPS-15-9500,192.168.1.5,10.983993291854858,1,0.6988387104713508,0.4289552511338064,0.2166071100318819,0.08342333197598858,0.8547499849878485,0.6078156114278425,0.6796871899662313,0.0591502474857241,0.14761325178795806,0.5982026862890478,greedy,7,15,652caf77
+0.059133090191924254,0.2616872288695368,10.93423843383789,5,2.0975637912750242,http://localhost:8002,1768820495,,False,1,2e85880b,2026-01-19_12-01-35,10.93894910812378,10.93894910812378,149544,sergio-XPS-15-9500,192.168.1.5,10.93894910812378,1,0.6774988511926161,0.4026304656490138,0.20196424213945063,0.16637061772902026,0.9069000290827862,0.6717917525978443,0.607813099351824,0.08607375284532315,0.12816482122073206,0.5836410965708964,greedy,7,15,2e85880b
+0.058834943191146474,0.258064666499282,10.712863683700562,5,2.055108594894409,http://localhost:8002,1768820509,,False,1,08c06d24,2026-01-19_12-01-49,10.71783971786499,10.71783971786499,149716,sergio-XPS-15-9500,192.168.1.5,10.71783971786499,1,0.676761107149889,0.3948167640336808,0.20446373408896712,0.1252645275302706,0.928745330628802,0.6772167484136661,0.728934789581864,0.07948320492885358,0.12455482683154301,0.5820049881076059,greedy,7,15,08c06d24
+0.05934167210765926,0.26507859745022083,10.722304821014404,5,2.0554207801818847,http://localhost:8002,1768820524,,False,1,b3f45b00,2026-01-19_12-02-04,10.727020978927612,10.727020978927612,149910,sergio-XPS-15-9500,192.168.1.5,10.727020978927612,1,0.6791241480460476,0.38507960399360586,0.2008675489682369,0.13136654102633838,0.9452093699034901,0.6808870002862947,0.7451721898503598,0.08065678907057289,0.11084582244266457,0.5764033974919818,greedy,7,15,b3f45b00
--- a/src/results/raytune_paddle_results_20260119_122609.csv
+++ b/src/results/raytune_paddle_results_20260119_122609.csv
@@ -0,0 +1,65 @@
+CER,WER,TIME,PAGES,TIME_PER_PAGE,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/use_doc_orientation_classify,config/use_doc_unwarping,config/textline_orientation,config/text_det_thresh,config/text_det_box_thresh,config/text_det_unclip_ratio,config/text_rec_score_thresh,logdir
+0.03506661663316561,0.09890345974963388,11.85569167137146,5,2.223856973648071,http://localhost:8002,1768821470,,False,1,c385d490,2026-01-19_12-17-50,11.864287614822388,11.864287614822388,255694,sergio-XPS-15-9500,192.168.1.5,11.864287614822388,1,False,False,False,0.3694663403739679,0.4296387270337578,0.0,0.1783109083293045,c385d490
+0.03599172786858722,0.09831877575011358,3.6901509761810303,5,0.642470121383667,http://localhost:8002,1768821477,,False,1,28a0a423,2026-01-19_12-17-57,3.6944973468780518,3.6944973468780518,255930,sergio-XPS-15-9500,192.168.1.5,3.6944973468780518,1,True,False,False,0.443249796611768,0.4817558265252385,0.0,0.06237975078446407,28a0a423
+0.07296898422220219,0.13203708321215762,10.501965999603271,5,2.0055192947387694,http://localhost:8002,1768821491,,False,1,f699b826,2026-01-19_12-18-11,10.506679058074951,10.506679058074951,256056,sergio-XPS-15-9500,192.168.1.5,10.506679058074951,1,False,True,False,0.2851409433291632,0.5181201198120159,0.0,0.5402431853279566,f699b826
+0.06341497143231878,0.12432485697376627,10.013647079467773,5,1.9113924980163575,http://localhost:8002,1768821505,,False,1,49e77d45,2026-01-19_12-18-25,10.018975019454956,10.018975019454956,256261,sergio-XPS-15-9500,192.168.1.5,10.018975019454956,1,False,True,False,0.4091020962342421,0.5477675836994064,0.0,0.28125964062929637,49e77d45
+0.06363307378397837,0.11080195018785229,10.315315961837769,5,1.9735893249511718,http://localhost:8002,1768821518,,False,1,08dff189,2026-01-19_12-18-38,10.319286346435547,10.319286346435547,256431,sergio-XPS-15-9500,192.168.1.5,10.319286346435547,1,False,True,True,0.4761569778732009,0.47781667917332393,0.0,0.010287859440038183,08dff189
+0.00927190028988934,0.08293509652512027,3.394526243209839,5,0.5889779567718506,http://localhost:8002,1768821525,,False,1,2808180e,2026-01-19_12-18-45,3.3984148502349854,3.3984148502349854,256622,sergio-XPS-15-9500,192.168.1.5,3.3984148502349854,1,False,False,True,0.49092093640044654,0.16386227611297105,0.0,0.36495336114676485,2808180e
+0.06414858633862171,0.1138840355665884,10.091642618179321,5,1.9286378383636475,http://localhost:8002,1768821539,,False,1,8b33e2a2,2026-01-19_12-18-59,10.095749855041504,10.095749855041504,256746,sergio-XPS-15-9500,192.168.1.5,10.095749855041504,1,False,True,False,0.664057104821503,0.380194482697527,0.0,0.0957856258135195,8b33e2a2
+0.04089344159161516,0.11588877886734197,3.399895191192627,5,0.5897929668426514,http://localhost:8002,1768821546,,False,1,2b3b0aad,2026-01-19_12-19-06,3.403998613357544,3.403998613357544,256911,sergio-XPS-15-9500,192.168.1.5,3.403998613357544,1,True,False,False,0.15162885621474814,0.015269709226466177,0.0,0.6005426046606002,2b3b0aad
+0.06440335927000067,0.125496108332261,10.158945322036743,5,1.9415269851684571,http://localhost:8002,1768821559,,False,1,8c1998de,2026-01-19_12-19-19,10.162839651107788,10.162839651107788,257030,sergio-XPS-15-9500,192.168.1.5,10.162839651107788,1,True,True,True,0.3692127966881518,0.23308318268023623,0.0,0.3773645637989277,8c1998de
+0.0637132502302169,0.11234475429253714,9.987636089324951,5,1.9088503837585449,http://localhost:8002,1768821573,,False,1,52bacbb6,2026-01-19_12-19-33,9.991463661193848,9.991463661193848,257222,sergio-XPS-15-9500,192.168.1.5,9.991463661193848,1,False,True,False,0.6035565410217514,0.21880259661403342,0.0,0.18713153326839937,52bacbb6
+0.008343047226538839,0.08349130431035265,3.386183738708496,5,0.5885046482086181,http://localhost:8002,1768821579,,False,1,08c1ee35,2026-01-19_12-19-39,3.39007830619812,3.39007830619812,257399,sergio-XPS-15-9500,192.168.1.5,3.39007830619812,1,True,False,True,0.15926489447447112,0.017648992877564967,0.0,0.44224480118340653,08c1ee35
+0.007922541795615114,0.07887346048819885,3.468980550765991,5,0.5967342376708984,http://localhost:8002,1768821586,,False,1,d00c4e76,2026-01-19_12-19-46,3.472960948944092,3.472960948944092,257525,sergio-XPS-15-9500,192.168.1.5,3.472960948944092,1,True,False,True,0.07077078342680466,0.004051086507914577,0.0,0.46605997897727297,d00c4e76
+0.016055552163489285,0.08753651728221294,3.3815455436706543,5,0.5863098621368408,http://localhost:8002,1768821593,,False,1,bb72b916,2026-01-19_12-19-53,3.385627031326294,3.385627031326294,257655,sergio-XPS-15-9500,192.168.1.5,3.385627031326294,1,True,False,True,0.00406946269144004,0.024694902295496916,0.0,0.48724120796716147,bb72b916
+0.04101062641443912,0.11434949759329069,3.3144912719726562,5,0.5752715110778809,http://localhost:8002,1768821599,,False,1,c12ba2dc,2026-01-19_12-19-59,3.3184001445770264,3.3184001445770264,257771,sergio-XPS-15-9500,192.168.1.5,3.3184001445770264,1,True,False,True,0.11631707320987289,0.690466345723201,0.0,0.6724394280648069,c12ba2dc
+0.00877838333494364,0.08577894245301848,3.401432514190674,5,0.589998722076416,http://localhost:8002,1768821606,,False,1,463a2384,2026-01-19_12-20-06,3.4053428173065186,3.4053428173065186,257879,sergio-XPS-15-9500,192.168.1.5,3.4053428173065186,1,True,False,True,0.22358777119494402,0.11342742897015146,0.0,0.42574884909601923,463a2384
+0.008258946852964685,0.07832593783541303,3.4435582160949707,5,0.5993115901947021,http://localhost:8002,1768821613,,False,1,9ec8a6c5,2026-01-19_12-20-13,3.447549343109131,3.447549343109131,257998,sergio-XPS-15-9500,192.168.1.5,3.447549343109131,1,True,False,True,0.00914625516134962,0.28951184233224014,0.0,0.4822045024114849,9ec8a6c5
+0.016055552163489285,0.08753651728221294,3.357020139694214,5,0.58282470703125,http://localhost:8002,1768821620,,False,1,c5e2ab01,2026-01-19_12-20-20,3.360861301422119,3.360861301422119,258136,sergio-XPS-15-9500,192.168.1.5,3.360861301422119,1,True,False,True,0.003475038037149451,0.29241480396041347,0.0,0.5570331572371645,c5e2ab01
+0.009030183622618133,0.06800810511996136,3.4037389755249023,5,0.5921475410461425,http://localhost:8002,1768821627,,False,1,791ed981,2026-01-19_12-20-27,3.4075520038604736,3.4075520038604736,258252,sergio-XPS-15-9500,192.168.1.5,3.4075520038604736,1,True,False,True,0.08655779066151734,0.3187645875435276,0.0,0.2687428540439976,791ed981
+0.008664940340048574,0.08581798715920706,3.501950263977051,5,0.6108397006988525,http://localhost:8002,1768821633,,False,1,f8442025,2026-01-19_12-20-33,3.5058133602142334,3.5058133602142334,258364,sergio-XPS-15-9500,192.168.1.5,3.5058133602142334,1,True,False,True,0.26385969784523366,0.10646638343274928,0.0,0.6888529567810926,f8442025
+0.013289181242042186,0.08277097527295318,3.2847726345062256,5,0.5695433616638184,http://localhost:8002,1768821640,,False,1,c4cc8356,2026-01-19_12-20-40,3.2885093688964844,3.2885093688964844,258479,sergio-XPS-15-9500,192.168.1.5,3.2885093688964844,1,True,False,True,0.0783907286407576,0.6144374684317566,0.0,0.49431837576833404,c4cc8356
+0.008558844366776789,0.08503058558440392,3.376376152038574,5,0.5869657039642334,http://localhost:8002,1768821647,,False,1,fb7bf10e,2026-01-19_12-20-47,3.380413770675659,3.380413770675659,258615,sergio-XPS-15-9500,192.168.1.5,3.380413770675659,1,True,False,True,0.19290877255165814,0.09975349505857617,0.0,0.6114422209758432,fb7bf10e
+0.007997676431652,0.07780877475636923,3.3821396827697754,5,0.5890754699707031,http://localhost:8002,1768821654,,False,1,d2036b54,2026-01-19_12-20-54,3.386087417602539,3.386087417602539,258726,sergio-XPS-15-9500,192.168.1.5,3.386087417602539,1,True,False,True,0.045413006981742665,0.014462040606135707,0.0,0.43172761082245126,d2036b54
+0.009147368445442098,0.06969651955749985,3.374091148376465,5,0.5859436988830566,http://localhost:8002,1768821660,,False,1,50ea7f3b,2026-01-19_12-21-00,3.3778791427612305,3.3778791427612305,258841,sergio-XPS-15-9500,192.168.1.5,3.3778791427612305,1,True,False,True,0.05615414666061707,0.1767564331348277,0.0,0.294181079680786,50ea7f3b
+0.008414440034646826,0.07859969916180594,3.3822972774505615,5,0.5889940738677979,http://localhost:8002,1768821667,,False,1,248f11ad,2026-01-19_12-21-07,3.3861117362976074,3.3861117362976074,258958,sergio-XPS-15-9500,192.168.1.5,3.3861117362976074,1,True,False,True,0.037929131718362014,0.08279922744979032,0.0,0.44895447738110594,248f11ad
+0.008631855890798765,0.08171378358546351,3.3687093257904053,5,0.5860745429992675,http://localhost:8002,1768821674,,False,1,ed62f7dc,2026-01-19_12-21-14,3.372666835784912,3.372666835784912,259076,sergio-XPS-15-9500,192.168.1.5,3.372666835784912,1,True,False,True,0.1333628019047363,0.2729950555484231,0.0,0.39746071410829,ed62f7dc
+0.008664940340048574,0.08499154087821534,3.371145248413086,5,0.5862448215484619,http://localhost:8002,1768821681,,False,1,d8907a1f,2026-01-19_12-21-21,3.375185012817383,3.375185012817383,259206,sergio-XPS-15-9500,192.168.1.5,3.375185012817383,1,True,False,True,0.2765606196671755,0.060003260056553154,0.0,0.5025665425204284,d8907a1f
+0.009147368445442098,0.07229696373274716,3.3624093532562256,5,0.5846651554107666,http://localhost:8002,1768821687,,False,1,ebaac043,2026-01-19_12-21-27,3.366320848464966,3.366320848464966,259323,sergio-XPS-15-9500,192.168.1.5,3.366320848464966,1,True,False,True,0.04919576638833845,0.36820782546645486,0.0,0.32312205105133734,ebaac043
+0.008558844366776789,0.08503058558440392,3.3781065940856934,5,0.587260627746582,http://localhost:8002,1768821694,,False,1,a0894bc0,2026-01-19_12-21-34,3.3822152614593506,3.3822152614593506,259443,sergio-XPS-15-9500,192.168.1.5,3.3822152614593506,1,True,False,True,0.1994235733794807,0.15972291414455095,0.0,0.5977644425109412,a0894bc0
+0.008024895940958,0.07962534018744696,3.398592710494995,5,0.5916557788848877,http://localhost:8002,1768821701,,False,1,3498c1b8,2026-01-19_12-21-41,3.4023826122283936,3.4023826122283936,259554,sergio-XPS-15-9500,192.168.1.5,3.4023826122283936,1,True,False,True,0.1046266985888523,0.23508200526753675,0.0,0.5467266950434034,3498c1b8
+0.008024895940958,0.07962534018744696,3.4101011753082275,5,0.5957276344299316,http://localhost:8002,1768821707,,False,1,00fc5f6a,2026-01-19_12-21-47,3.4141347408294678,3.4141347408294678,259689,sergio-XPS-15-9500,192.168.1.5,3.4141347408294678,1,True,False,True,0.09816375424029757,0.40866092341544563,0.0,0.5397528720422529,00fc5f6a
+0.008449143199810622,0.08349130431035265,3.4055111408233643,5,0.5931827545166015,http://localhost:8002,1768821714,,False,1,e98c02d1,2026-01-19_12-21-54,3.409532070159912,3.409532070159912,259816,sergio-XPS-15-9500,192.168.1.5,3.409532070159912,1,True,False,True,0.3140290686317056,0.052614998451672106,0.0,0.6465903750193005,e98c02d1
+0.008024895940958,0.07962534018744696,3.3723814487457275,5,0.5866386890411377,http://localhost:8002,1768821721,,False,1,c70f3f43,2026-01-19_12-22-01,3.3762624263763428,3.3762624263763428,259923,sergio-XPS-15-9500,192.168.1.5,3.3762624263763428,1,True,False,True,0.10014126954970229,0.42707748560882025,0.0,0.5502134276128419,c70f3f43
+0.008343047226538839,0.08349130431035265,3.3672409057617188,5,0.5856597900390625,http://localhost:8002,1768821728,,False,1,70400fbe,2026-01-19_12-22-08,3.371093511581421,3.371093511581421,260039,sergio-XPS-15-9500,192.168.1.5,3.371093511581421,1,True,False,True,0.16292741177177594,0.4548418182130589,0.0,0.5302300590456391,70400fbe
+0.008664940340048574,0.08499154087821534,3.4183735847473145,5,0.5965535163879394,http://localhost:8002,1768821734,,False,1,4dcb599d,2026-01-19_12-22-14,3.4222280979156494,3.4222280979156494,260159,sergio-XPS-15-9500,192.168.1.5,3.4222280979156494,1,True,False,True,0.23726923927972388,0.4074643735298082,0.0,0.41001202937163644,4dcb599d
+0.04068873330092939,0.11438501946884572,3.257974624633789,5,0.5640182018280029,http://localhost:8002,1768821741,,False,1,4228b5e1,2026-01-19_12-22-21,3.261892557144165,3.261892557144165,260291,sergio-XPS-15-9500,192.168.1.5,3.261892557144165,1,True,False,False,0.12333092543339132,0.5239761637260665,0.0,0.5745717593014468,4228b5e1
+0.06275857947195311,0.12652527218853557,9.750442743301392,5,1.8625127792358398,http://localhost:8002,1768821754,,False,1,3588064b,2026-01-19_12-22-34,9.754103899002075,9.754103899002075,260400,sergio-XPS-15-9500,192.168.1.5,9.754103899002075,1,False,True,True,0.10034065797370648,0.34091325083457025,0.0,0.6394382232363077,3588064b
+0.040999537564886945,0.11588877886734197,3.285776138305664,5,0.5690357685089111,http://localhost:8002,1768821761,,False,1,11ccb158,2026-01-19_12-22-41,3.289609670639038,3.289609670639038,260569,sergio-XPS-15-9500,192.168.1.5,3.289609670639038,1,True,False,False,0.32864774599403973,0.14086017880721893,0.0,0.46819585706944256,11ccb158
+0.062252142887134154,0.11824393793048431,9.891753673553467,5,1.8906636714935303,http://localhost:8002,1768821774,,False,1,6fc2cbb9,2026-01-19_12-22-54,9.895762920379639,9.895762920379639,260704,sergio-XPS-15-9500,192.168.1.5,9.895762920379639,1,False,True,True,0.059161274748840434,0.21510105294599707,0.0,0.5189526304991655,6fc2cbb9
+0.035476033214537156,0.11817641701000778,3.285740613937378,5,0.5687613487243652,http://localhost:8002,1768821781,,False,1,d915205d,2026-01-19_12-23-01,3.289746046066284,3.289746046066284,260873,sergio-XPS-15-9500,192.168.1.5,3.289746046066284,1,True,False,False,0.4165672741815639,0.0010212040152359678,0.0,0.34076033139687656,d915205d
+0.0640894002629319,0.11483863284111936,9.806191444396973,5,1.8735287666320801,http://localhost:8002,1768821794,,False,1,2f6a0de8,2026-01-19_12-23-14,9.809982538223267,9.809982538223267,260993,sergio-XPS-15-9500,192.168.1.5,9.809982538223267,1,False,True,True,0.5305871352962446,0.5562291603129679,0.0,0.19677826870589865,2f6a0de8
+0.008734210036141653,0.08345578243479762,3.3932855129241943,5,0.590654993057251,http://localhost:8002,1768821801,,False,1,75a6f03e,2026-01-19_12-23-21,3.3974790573120117,3.3974790573120117,261182,sergio-XPS-15-9500,192.168.1.5,3.3974790573120117,1,True,False,True,0.17403705065527203,0.05196087574793615,0.0,0.37230135627667593,75a6f03e
+0.008024895940958,0.07962534018744696,3.372239828109741,5,0.586278247833252,http://localhost:8002,1768821807,,False,1,59bdf5af,2026-01-19_12-23-27,3.3761444091796875,3.3761444091796875,261290,sergio-XPS-15-9500,192.168.1.5,3.3761444091796875,1,True,False,True,0.0964007218643779,0.4285920164263687,0.0,0.5544150084923888,59bdf5af
+0.007884233436756935,0.07784781946255781,3.391608476638794,5,0.5895267486572265,http://localhost:8002,1768821814,,False,1,181fa700,2026-01-19_12-23-34,3.3955013751983643,3.3955013751983643,261408,sergio-XPS-15-9500,192.168.1.5,3.3955013751983643,1,True,False,True,0.04616218689941105,0.4861882831078568,0.0,0.5658024954699784,181fa700
+0.008187554044856696,0.07781229758700277,3.379288911819458,5,0.5891064167022705,http://localhost:8002,1768821821,,False,1,8df7daf7,2026-01-19_12-23-41,3.383202314376831,3.383202314376831,261523,sergio-XPS-15-9500,192.168.1.5,3.383202314376831,1,True,False,True,0.02800972164203512,0.4596234327116702,0.0,0.5894305118437192,8df7daf7
+0.0080286377688869,0.07962181735681341,3.3880317211151123,5,0.5899625778198242,http://localhost:8002,1768821828,,False,1,d427a211,2026-01-19_12-23-48,3.3918912410736084,3.3918912410736084,261651,sergio-XPS-15-9500,192.168.1.5,3.3918912410736084,1,True,False,True,0.060058513373542344,0.4968017369460056,0.0,0.4546749796342963,d427a211
+0.04089344159161516,0.11588877886734197,3.2276556491851807,5,0.5582141876220703,http://localhost:8002,1768821834,,False,1,c83e898d,2026-01-19_12-23-54,3.2317638397216797,3.2317638397216797,261771,sergio-XPS-15-9500,192.168.1.5,3.2317638397216797,1,False,False,False,0.12734972085227625,0.3933923240644007,0.0,0.6218152533645911,c83e898d
+0.07289971452610912,0.1312833201534554,8.918929815292358,5,1.6958380699157716,http://localhost:8002,1768821846,,False,1,34bfaecf,2026-01-19_12-24-06,8.923492193222046,8.923492193222046,261885,sergio-XPS-15-9500,192.168.1.5,8.923492193222046,1,True,True,True,0.02983245257805507,0.5541286918768669,0.0,0.5254000761733085,34bfaecf
+0.008664940340048574,0.08424318400960076,3.3413267135620117,5,0.5809893608093262,http://localhost:8002,1768821853,,False,1,d28ff6ad,2026-01-19_12-24-13,3.3452816009521484,3.3452816009521484,262045,sergio-XPS-15-9500,192.168.1.5,3.3452816009521484,1,True,False,True,0.15364693264219786,0.5914356505484054,0.0,0.4346147311057641,d28ff6ad
+0.00877838333494364,0.08577894245301848,3.4076058864593506,5,0.5933670043945313,http://localhost:8002,1768821860,,False,1,1bd5239a,2026-01-19_12-24-20,3.4112603664398193,3.4112603664398193,262180,sergio-XPS-15-9500,192.168.1.5,3.4112603664398193,1,True,False,True,0.22332206917987685,0.3526810869908701,0.0,0.5730079634012908,1bd5239a
+0.03369141887914488,0.11024529401954712,3.2711544036865234,5,0.5658481121063232,http://localhost:8002,1768821867,,False,1,df514085,2026-01-19_12-24-27,3.2749204635620117,3.2749204635620117,262288,sergio-XPS-15-9500,192.168.1.5,3.2749204635620117,1,True,False,False,0.07573375090561205,0.2490247970846971,0.0,0.39959759235219644,df514085
+0.0623615517224065,0.124505989182175,9.822217226028442,5,1.8769143104553223,http://localhost:8002,1768821880,,False,1,05146970,2026-01-19_12-24-40,9.826353549957275,9.826353549957275,262409,sergio-XPS-15-9500,192.168.1.5,9.826353549957275,1,False,True,True,0.01074645265207852,0.13367849913726723,0.0,0.6632577581918868,05146970
+0.008024895940958,0.07962534018744696,3.3825182914733887,5,0.5886817455291748,http://localhost:8002,1768821886,,False,1,b670fd4b,2026-01-19_12-24-46,3.3867027759552,3.3867027759552,262594,sergio-XPS-15-9500,192.168.1.5,3.3867027759552,1,True,False,True,0.09944138895292096,0.44624592238486255,0.0,0.5462963698223894,b670fd4b
+0.016572945800740084,0.09518707717328821,3.4130094051361084,5,0.5945035457611084,http://localhost:8002,1768821893,,False,1,be5f9b1d,2026-01-19_12-24-53,3.4169981479644775,3.4169981479644775,262711,sergio-XPS-15-9500,192.168.1.5,3.4169981479644775,1,True,False,True,0.6894923163644786,0.4890742911772068,0.0,0.4855884110840981,be5f9b1d
+0.008251781930748131,0.08198754491185642,3.367403745651245,5,0.5863472938537597,http://localhost:8002,1768821900,,False,1,1c75b89c,2026-01-19_12-25-00,3.371392011642456,3.371392011642456,262819,sergio-XPS-15-9500,192.168.1.5,3.371392011642456,1,True,False,True,0.1150745104873075,0.32762735447067737,0.0,0.5208070473970087,1c75b89c
+0.007922541795615114,0.07887346048819885,3.387901544570923,5,0.5906172752380371,http://localhost:8002,1768821907,,False,1,6340f2d6,2026-01-19_12-25-07,3.391674041748047,3.391674041748047,262936,sergio-XPS-15-9500,192.168.1.5,3.391674041748047,1,True,False,True,0.07997843641478165,0.4088133874043337,0.0,0.5627391657839758,6340f2d6
+0.007922541795615114,0.07887346048819885,3.368699312210083,5,0.585447120666504,http://localhost:8002,1768821913,,False,1,7ffe088b,2026-01-19_12-25-13,3.372554302215576,3.372554302215576,263058,sergio-XPS-15-9500,192.168.1.5,3.372554302215576,1,True,False,True,0.07055815208796122,0.07907086437131383,0.0,0.46815861739605075,7ffe088b
+0.007922541795615114,0.07887346048819885,3.376523733139038,5,0.5873369693756103,http://localhost:8002,1768821920,,False,1,f252a3e6,2026-01-19_12-25-20,3.3803553581237793,3.3803553581237793,263185,sergio-XPS-15-9500,192.168.1.5,3.3803553581237793,1,True,False,True,0.06870328017999491,0.03579995978472439,0.0,0.5047711345804472,f252a3e6
+0.02490382433538609,0.09753449830381603,3.3904788494110107,5,0.5890470027923584,http://localhost:8002,1768821927,,False,1,edee0586,2026-01-19_12-25-27,3.394632577896118,3.394632577896118,263300,sergio-XPS-15-9500,192.168.1.5,3.394632577896118,1,True,False,True,0.0009275348433581271,0.031063654135949786,0.0,0.45979693397354415,edee0586
+0.008414440034646826,0.07859969916180594,3.4424312114715576,5,0.5994386196136474,http://localhost:8002,1768821934,,False,1,ef76bf22,2026-01-19_12-25-34,3.446359395980835,3.446359395980835,263418,sergio-XPS-15-9500,192.168.1.5,3.446359395980835,1,True,False,True,0.03189500271483534,0.0016098696097210721,0.0,0.49583062638649,ef76bf22
+0.007922541795615114,0.07887346048819885,3.445734977722168,5,0.6011210918426514,http://localhost:8002,1768821941,,False,1,f647f452,2026-01-19_12-25-41,3.449845314025879,3.449845314025879,263537,sergio-XPS-15-9500,192.168.1.5,3.449845314025879,1,True,False,True,0.06868764014547389,0.08690693420543298,0.0,0.42607348522409366,f647f452
+0.007922541795615114,0.07887346048819885,3.4003381729125977,5,0.5931215763092041,http://localhost:8002,1768821947,,False,1,92f45b9b,2026-01-19_12-25-47,3.404212713241577,3.404212713241577,263672,sergio-XPS-15-9500,192.168.1.5,3.404212713241577,1,True,False,True,0.0725476612921705,0.08215869338356059,0.0,0.4170900315829183,92f45b9b
+0.007922541795615114,0.07887346048819885,3.3902156352996826,5,0.5895231246948243,http://localhost:8002,1768821954,,False,1,7349d65b,2026-01-19_12-25-54,3.3941099643707275,3.3941099643707275,263792,sergio-XPS-15-9500,192.168.1.5,3.3941099643707275,1,True,False,True,0.07327612908475345,0.09511260866628114,0.0,0.42047687042215837,7349d65b
+0.008631855890798765,0.08246566328471161,3.3953261375427246,5,0.5909849166870117,http://localhost:8002,1768821961,,False,1,dbe6de3f,2026-01-19_12-26-01,3.39920711517334,3.39920711517334,263908,sergio-XPS-15-9500,192.168.1.5,3.39920711517334,1,True,False,True,0.1407896872320316,0.07713209075208538,0.0,0.38134661262033054,dbe6de3f
+0.007922541795615114,0.07887346048819885,3.451122760772705,5,0.6020939826965332,http://localhost:8002,1768821968,,False,1,7d295e31,2026-01-19_12-26-08,3.4549307823181152,3.4549307823181152,264023,sergio-XPS-15-9500,192.168.1.5,3.4549307823181152,1,True,False,True,0.06788051560872134,0.03348309120485185,0.0,0.476817937122221,7d295e31
--- a/src/run_tuning.py
+++ b/src/run_tuning.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""Run hyperparameter tuning for OCR services."""
+
+import sys
+import argparse
+from raytune_ocr import (
+    check_workers, create_trainable, run_tuner, analyze_results,
+    paddle_ocr_payload, doctr_payload, easyocr_payload,
+    PADDLE_OCR_SEARCH_SPACE, DOCTR_SEARCH_SPACE, EASYOCR_SEARCH_SPACE,
+    PADDLE_OCR_CONFIG_KEYS, DOCTR_CONFIG_KEYS, EASYOCR_CONFIG_KEYS,
+)
+
+SERVICES = {
+    "paddle": {
+        "ports": [8002],
+        "payload_fn": paddle_ocr_payload,
+        "search_space": PADDLE_OCR_SEARCH_SPACE,
+        "config_keys": PADDLE_OCR_CONFIG_KEYS,
+        "name": "PaddleOCR",
+    },
+    "doctr": {
+        "ports": [8003],
+        "payload_fn": doctr_payload,
+        "search_space": DOCTR_SEARCH_SPACE,
+        "config_keys": DOCTR_CONFIG_KEYS,
+        "name": "DocTR",
+    },
+    "easyocr": {
+        "ports": [8002],
+        "payload_fn": easyocr_payload,
+        "search_space": EASYOCR_SEARCH_SPACE,
+        "config_keys": EASYOCR_CONFIG_KEYS,
+        "name": "EasyOCR",
+    },
+}
+
+def main():
+    parser = argparse.ArgumentParser(description="Run OCR hyperparameter tuning")
+    parser.add_argument("--service", choices=["paddle", "doctr", "easyocr"], required=True)
+    parser.add_argument("--samples", type=int, default=64, help="Number of samples")
+    args = parser.parse_args()
+
+    cfg = SERVICES[args.service]
+    print(f"\n{'='*50}")
+    print(f"Hyperparameter Tuning: {cfg['name']}")
+    print(f"Samples: {args.samples}")
+    print(f"{'='*50}\n")
+
+    # Check workers
+    healthy = check_workers(cfg["ports"], cfg["name"])
+
+    # Create trainable and run tuning
+    trainable = create_trainable(cfg["ports"], cfg["payload_fn"])
+    results = run_tuner(
+        trainable=trainable,
+        search_space=cfg["search_space"],
+        num_samples=args.samples,
+        num_workers=len(healthy),
+    )
+
+    # Analyze results
+    df = analyze_results(
+        results,
+        output_folder="results",
+        prefix=f"raytune_{args.service}",
+        config_keys=cfg["config_keys"],
+    )
+
+    print(f"\n{'='*50}")
+    print("Tuning complete!")
+    print(f"{'='*50}")
+
+if __name__ == "__main__":
+    main()