Paddle ocr, easyicr and doctr gpu support. (#4)
All checks were successful
build_docker / essential (push) Successful in 0s
build_docker / build_cpu (push) Successful in 5m0s
build_docker / build_gpu (push) Successful in 22m55s
build_docker / build_easyocr (push) Successful in 18m47s
build_docker / build_easyocr_gpu (push) Successful in 19m0s
build_docker / build_raytune (push) Successful in 3m27s
build_docker / build_doctr (push) Successful in 19m42s
build_docker / build_doctr_gpu (push) Successful in 14m49s
237
.gitea/workflows/ci.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
name: build_docker
|
||||
run-name: ${{ gitea.event.head_commit.message }}
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
PADDLE_VERSION: "3.0.0"
|
||||
|
||||
jobs:
|
||||
essential:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
Version: 1.0.${{ gitea.run_number }}
|
||||
repo: seryus.ddns.net
|
||||
image_cpu: seryus.ddns.net/unir/paddle-ocr-cpu
|
||||
image_gpu: seryus.ddns.net/unir/paddle-ocr-gpu
|
||||
image_easyocr: seryus.ddns.net/unir/easyocr-cpu
|
||||
image_easyocr_gpu: seryus.ddns.net/unir/easyocr-gpu
|
||||
image_doctr: seryus.ddns.net/unir/doctr-cpu
|
||||
image_doctr_gpu: seryus.ddns.net/unir/doctr-gpu
|
||||
image_raytune: seryus.ddns.net/unir/raytune
|
||||
steps:
|
||||
- name: Output version info
|
||||
run: |
|
||||
echo "## Build Info" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Version: 1.0.${{ gitea.run_number }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Event: ${{ gitea.event_name }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# PaddleOCR CPU image (amd64 only)
|
||||
build_cpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push CPU image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/paddle_ocr
|
||||
file: src/paddle_ocr/Dockerfile.cpu
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_cpu }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_cpu }}:latest
|
||||
|
||||
# PaddleOCR GPU image (amd64 only)
|
||||
build_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push GPU image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/paddle_ocr
|
||||
file: src/paddle_ocr/Dockerfile.gpu
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_gpu }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_gpu }}:latest
|
||||
|
||||
# EasyOCR CPU image (amd64 only)
|
||||
build_easyocr:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push EasyOCR image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/easyocr_service
|
||||
file: src/easyocr_service/Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_easyocr }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_easyocr }}:latest
|
||||
|
||||
# EasyOCR GPU image (amd64 only)
|
||||
build_easyocr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push EasyOCR GPU image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/easyocr_service
|
||||
file: src/easyocr_service/Dockerfile.gpu
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_easyocr_gpu }}:latest
|
||||
|
||||
# DocTR CPU image (amd64 only)
|
||||
build_doctr:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push DocTR image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/doctr_service
|
||||
file: src/doctr_service/Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_doctr }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_doctr }}:latest
|
||||
|
||||
# DocTR GPU image (amd64 only)
|
||||
build_doctr_gpu:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push DocTR GPU image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/doctr_service
|
||||
file: src/doctr_service/Dockerfile.gpu
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_doctr_gpu }}:latest
|
||||
|
||||
# Ray Tune OCR image (amd64 only)
|
||||
build_raytune:
|
||||
runs-on: ubuntu-latest
|
||||
needs: essential
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Gitea Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ needs.essential.outputs.repo }}
|
||||
username: username
|
||||
password: ${{ secrets.CI_READWRITE }}
|
||||
|
||||
- name: Build and push Ray Tune image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: src/raytune
|
||||
file: src/raytune/Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ needs.essential.outputs.image_raytune }}:${{ needs.essential.outputs.Version }}
|
||||
${{ needs.essential.outputs.image_raytune }}:latest
|
||||
6
.gitignore
vendored
@@ -6,3 +6,9 @@ results
|
||||
.DS_Store
|
||||
.claude
|
||||
node_modules
|
||||
src/paddle_ocr/wheels
|
||||
src/*.log
|
||||
src/output_*.ipynb
|
||||
debugset/
|
||||
|
||||
src/dataset_hf/
|
||||
|
||||
138
README.md
@@ -18,11 +18,15 @@ Optimizar el rendimiento de PaddleOCR para documentos académicos en español me
|
||||
|
||||
## Resultados Principales
|
||||
|
||||
**Tabla.** *Comparación de métricas OCR entre configuración baseline y optimizada.*
|
||||
|
||||
| Modelo | CER | Precisión Caracteres | WER | Precisión Palabras |
|
||||
|--------|-----|---------------------|-----|-------------------|
|
||||
| PaddleOCR (Baseline) | 7.78% | 92.22% | 14.94% | 85.06% |
|
||||
| **PaddleOCR-HyperAdjust** | **1.49%** | **98.51%** | **7.62%** | **92.38%** |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
**Mejora obtenida:** Reducción del CER en un **80.9%**
|
||||
|
||||
### Configuración Óptima Encontrada
|
||||
@@ -56,6 +60,8 @@ PDF (académico UNIR)
|
||||
|
||||
### Experimento de Optimización
|
||||
|
||||
**Tabla.** *Parámetros de configuración del experimento Ray Tune.*
|
||||
|
||||
| Parámetro | Valor |
|
||||
|-----------|-------|
|
||||
| Número de trials | 64 |
|
||||
@@ -64,6 +70,8 @@ PDF (académico UNIR)
|
||||
| Trials concurrentes | 2 |
|
||||
| Tiempo total | ~6 horas (CPU) |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
---
|
||||
|
||||
## Estructura del Repositorio
|
||||
@@ -113,18 +121,50 @@ MastersThesis/
|
||||
|
||||
---
|
||||
|
||||
## Rendimiento GPU
|
||||
|
||||
Se realizó una validación adicional con aceleración GPU para evaluar la viabilidad práctica del enfoque en escenarios de producción.
|
||||
|
||||
**Tabla.** *Comparación de rendimiento CPU vs GPU.*
|
||||
|
||||
| Métrica | CPU | GPU (RTX 3060) | Aceleración |
|
||||
|---------|-----|----------------|-------------|
|
||||
| Tiempo/Página | 69.4s | 0.55s | **126x** |
|
||||
| Dataset completo (45 páginas) | ~52 min | ~25 seg | **126x** |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### Recomendación de Modelos
|
||||
|
||||
**Tabla.** *Comparación de modelos PaddleOCR en RTX 3060.*
|
||||
|
||||
| Modelo | VRAM | Recomendación |
|
||||
|--------|------|---------------|
|
||||
| **PP-OCRv5 Mobile** | 0.06 GB | ✓ Recomendado |
|
||||
| PP-OCRv5 Server | 5.3 GB | ✗ Causa OOM en RTX 3060 |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
**Conclusión:** Para hardware con VRAM limitada (≤6 GB), los modelos Mobile ofrecen el mejor balance entre precisión y recursos. La aceleración GPU hace viable el procesamiento en tiempo real.
|
||||
|
||||
---
|
||||
|
||||
## Requisitos
|
||||
|
||||
**Tabla.** *Dependencias principales del proyecto y versiones utilizadas.*
|
||||
|
||||
| Componente | Versión |
|
||||
|------------|---------|
|
||||
| Python | 3.11.9 |
|
||||
| Python | 3.12.3 |
|
||||
| PaddlePaddle | 3.2.2 |
|
||||
| PaddleOCR | 3.3.2 |
|
||||
| Ray | 2.52.1 |
|
||||
| Optuna | 4.6.0 |
|
||||
| Optuna | 4.7.0 |
|
||||
| jiwer | (para métricas CER/WER) |
|
||||
| PyMuPDF | (para conversión PDF) |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
---
|
||||
|
||||
## Uso
|
||||
@@ -155,7 +195,7 @@ python src/paddle_ocr_tuning.py \
|
||||
|
||||
## Fuentes de Datos
|
||||
|
||||
- **Dataset**: Instrucciones para la elaboración del TFE (UNIR), 24 páginas
|
||||
- **Dataset**: 2 documentos UNIR (45 páginas total): Instrucciones TFE (24 pág.) + Plantilla TFE (21 pág.)
|
||||
- **Resultados Ray Tune (PRINCIPAL)**: `src/raytune_paddle_subproc_results_20251207_192320.csv` - 64 trials de optimización con todas las métricas y configuraciones
|
||||
|
||||
---
|
||||
@@ -234,14 +274,18 @@ python3 apply_content.py
|
||||
|
||||
### Archivos de Entrada y Salida
|
||||
|
||||
**Tabla.** *Relación de scripts de generación con sus archivos de entrada y salida.*
|
||||
|
||||
| Script | Entrada | Salida |
|
||||
|--------|---------|--------|
|
||||
| `generate_mermaid_figures.py` | `docs/*.md` (bloques ```mermaid```) | `thesis_output/figures/figura_*.png`, `figures_manifest.json` |
|
||||
| `apply_content.py` | `instructions/plantilla_individual.htm`, `docs/*.md`, `thesis_output/figures/*.png` | `thesis_output/plantilla_individual.htm` |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### Contenido Generado Automáticamente
|
||||
|
||||
- **30 tablas** con formato APA (Tabla X. *Título* + Fuente: ...)
|
||||
- **53 tablas** con formato APA (Tabla X. *Título* + Fuente: ...)
|
||||
- **8 figuras** desde Mermaid (Figura X. *Título* + Fuente: Elaboración propia)
|
||||
- **25 referencias** en formato APA con sangría francesa
|
||||
- **Resumen/Abstract** con palabras clave
|
||||
@@ -252,48 +296,70 @@ python3 apply_content.py
|
||||
|
||||
## Trabajo Pendiente para Completar el TFM
|
||||
|
||||
### Contexto: Limitaciones de Hardware
|
||||
### Contexto: Hardware
|
||||
|
||||
Este trabajo adoptó la estrategia de **optimización de hiperparámetros** en lugar de **fine-tuning** debido a:
|
||||
- **Sin GPU dedicada**: Ejecución exclusivamente en CPU
|
||||
- **Tiempo de inferencia elevado**: ~69 segundos/página en CPU
|
||||
- **Fine-tuning inviable**: Entrenar modelos de deep learning sin GPU requeriría tiempos prohibitivos
|
||||
Este trabajo adoptó la estrategia de **optimización de hiperparámetros** en lugar de **fine-tuning** debido a que el fine-tuning de modelos OCR requiere datasets etiquetados extensos y tiempos de entrenamiento prohibitivos.
|
||||
|
||||
**Hardware utilizado:**
|
||||
- **Optimización (CPU)**: Los 64 trials de Ray Tune se ejecutaron en CPU (~69s/página)
|
||||
- **Validación (GPU)**: Se validó con RTX 3060 logrando 126x de aceleración (0.55s/página)
|
||||
|
||||
La optimización de hiperparámetros demostró ser una **alternativa efectiva** al fine-tuning, logrando una reducción del 80.9% en el CER sin reentrenar el modelo.
|
||||
|
||||
### Tareas Completadas
|
||||
|
||||
- [x] **Estructura docs/ según plantilla UNIR**: Todos los capítulos siguen numeración exacta (1.1, 1.2, etc.)
|
||||
- [x] **Añadir diagramas Mermaid**: 7 diagramas añadidos (pipeline OCR, arquitectura Ray Tune, gráficos de comparación)
|
||||
- [x] **Generar documento TFM unificado**: Script `apply_content.py` genera documento completo desde docs/
|
||||
- [x] **Convertir Mermaid a PNG**: Script `generate_mermaid_figures.py` genera figuras automáticamente
|
||||
|
||||
### Tareas Pendientes
|
||||
|
||||
#### 1. Validación del Enfoque (Prioridad Alta)
|
||||
- [ ] **Validación cruzada en otros documentos**: Evaluar la configuración óptima en otros tipos de documentos en español (facturas, formularios, contratos) para verificar generalización
|
||||
- [ ] **Ampliar el dataset**: El dataset actual tiene solo 24 páginas. Construir un corpus más amplio y diverso (mínimo 100 páginas)
|
||||
- [ ] **Validación del ground truth**: Revisar manualmente el texto de referencia extraído automáticamente para asegurar su exactitud
|
||||
|
||||
#### 2. Experimentación Adicional (Prioridad Media)
|
||||
- [ ] **Explorar `text_det_unclip_ratio`**: Este parámetro quedó fijado en 0.0. Incluirlo en el espacio de búsqueda podría mejorar resultados
|
||||
- [ ] **Comparativa con fine-tuning** (si se obtiene acceso a GPU): Cuantificar la brecha de rendimiento entre optimización de hiperparámetros y fine-tuning real
|
||||
- [ ] **Evaluación con GPU**: Medir tiempos de inferencia con aceleración GPU para escenarios de producción
|
||||
|
||||
#### 3. Documentación y Presentación (Prioridad Alta)
|
||||
#### Obligatorias para Entrega
|
||||
- [ ] **Revisión final del documento**: Abrir en Word, actualizar índices (Ctrl+A → F9), ajustar figuras, guardar como .docx
|
||||
- [ ] **Crear presentación**: Preparar slides para la defensa del TFM
|
||||
- [ ] **Revisión final del documento**: Verificar formato, índices y contenido en Word
|
||||
|
||||
#### 4. Extensiones Futuras (Opcional)
|
||||
- [ ] **Herramienta de configuración automática**: Desarrollar una herramienta que determine automáticamente la configuración óptima para un nuevo tipo de documento
|
||||
- [ ] **Benchmark público para español**: Publicar un benchmark de OCR para documentos en español que facilite comparación de soluciones
|
||||
- [ ] **Optimización multi-objetivo**: Considerar CER, WER y tiempo de inferencia simultáneamente
|
||||
#### Opcionales (Mejoras Futuras)
|
||||
- [ ] **Validación cruzada**: Evaluar configuración en otros documentos (facturas, formularios)
|
||||
- [ ] **Explorar `text_det_unclip_ratio`**: Parámetro fijado en 0.0, podría mejorar resultados
|
||||
- [ ] **Comparativa con fine-tuning**: Cuantificar brecha vs fine-tuning real
|
||||
- [ ] **Herramienta de configuración automática**: Auto-detectar configuración óptima por documento
|
||||
- [ ] **Benchmark público para español**: Facilitar comparación de soluciones OCR
|
||||
|
||||
### Recomendación de Próximos Pasos
|
||||
#### Completadas
|
||||
- [x] **Estructura docs/ según plantilla UNIR**
|
||||
- [x] **Diagramas Mermaid**: 8 figuras generadas
|
||||
- [x] **Documento TFM unificado**: Script `apply_content.py`
|
||||
- [x] **Evaluación con GPU**: RTX 3060 - 126x más rápido (0.55s/página)
|
||||
|
||||
1. **Inmediato**: Abrir documento generado en Word, actualizar índices (Ctrl+A, F9), guardar como .docx
|
||||
2. **Corto plazo**: Validar en 2-3 tipos de documentos adicionales para demostrar generalización
|
||||
3. **Para la defensa**: Crear presentación con visualizaciones de resultados
|
||||
### Dataset
|
||||
|
||||
El dataset contiene **45 páginas** de 2 documentos UNIR:
|
||||
- `src/dataset/0/`: Instrucciones TFE (24 páginas)
|
||||
- `src/dataset/1/`: Plantilla TFE (21 páginas)
|
||||
|
||||
#### Formato Hugging Face
|
||||
|
||||
El dataset está disponible en formato Hugging Face en `src/dataset_hf/`:
|
||||
|
||||
```
|
||||
src/dataset_hf/
|
||||
├── README.md # Dataset card
|
||||
├── metadata.jsonl # Metadata (image_path, text, doc_id, page_num)
|
||||
└── data/ # 45 imágenes PNG
|
||||
```
|
||||
|
||||
#### Generar/Regenerar Dataset
|
||||
|
||||
```bash
|
||||
# Convertir de formato original a HF
|
||||
source .venv/bin/activate
|
||||
python src/dataset_formatting/convert_to_hf_dataset.py
|
||||
|
||||
# Upload a Gitea packages (requiere GITEA_TOKEN)
|
||||
./src/dataset_formatting/upload-dataset.sh $GITEA_TOKEN
|
||||
```
|
||||
|
||||
#### Descargar Dataset
|
||||
|
||||
```bash
|
||||
# Desde Gitea packages
|
||||
curl -O https://seryus.ddns.net/api/packages/unir/generic/ocr-dataset-spanish/1.0.0/dataset-1.0.0.tar.gz
|
||||
tar -xzf dataset-1.0.0.tar.gz -C src/dataset_hf/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
import re
|
||||
import os
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from latex2mathml.converter import convert as latex_to_mathml
|
||||
|
||||
BASE_DIR = '/Users/sergio/Desktop/MastersThesis'
|
||||
TEMPLATE = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm')
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
|
||||
TEMPLATE_OUTPUT = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm')
|
||||
DOCS_DIR = os.path.join(BASE_DIR, 'docs')
|
||||
|
||||
# Global counters for tables and figures
|
||||
@@ -33,6 +35,32 @@ def md_to_html_para(text):
|
||||
text = re.sub(r'\*([^*]+)\*', r'<i>\1</i>', text)
|
||||
# Inline code
|
||||
text = re.sub(r'`([^`]+)`', r'<span style="font-family:Consolas;font-size:10pt">\1</span>', text)
|
||||
# Links [text](url) -> <a href="url">text</a>
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
|
||||
return text
|
||||
|
||||
def convert_latex_formulas(text):
|
||||
"""Convert LaTeX formulas to MathML for Word compatibility."""
|
||||
# Block formulas $$...$$
|
||||
def convert_block(match):
|
||||
latex = match.group(1)
|
||||
try:
|
||||
mathml = latex_to_mathml(latex, display="block")
|
||||
return f'<p class=MsoNormal style="text-align:center">{mathml}</p>'
|
||||
except:
|
||||
return match.group(0) # Keep original if conversion fails
|
||||
|
||||
text = re.sub(r'\$\$([^$]+)\$\$', convert_block, text)
|
||||
|
||||
# Inline formulas $...$
|
||||
def convert_inline(match):
|
||||
latex = match.group(1)
|
||||
try:
|
||||
return latex_to_mathml(latex, display="inline")
|
||||
except:
|
||||
return match.group(0)
|
||||
|
||||
text = re.sub(r'\$([^$]+)\$', convert_inline, text)
|
||||
return text
|
||||
|
||||
def extract_table_title(lines, current_index):
|
||||
@@ -168,6 +196,7 @@ def parse_md_to_html_blocks(md_content):
|
||||
|
||||
# Check if previous line has table title (e.g., **Tabla 1.** *Title*)
|
||||
table_title = None
|
||||
alt_title = None # Alternative title from **bold text:** pattern
|
||||
table_source = "Elaboración propia"
|
||||
|
||||
# Look back for table title
|
||||
@@ -177,6 +206,9 @@ def parse_md_to_html_blocks(md_content):
|
||||
# Extract title text
|
||||
table_title = re.sub(r'\*+', '', prev_line).strip()
|
||||
break
|
||||
elif prev_line.startswith('**') and prev_line.endswith(':**'):
|
||||
# Alternative: **Bold title:** pattern (for informal tables)
|
||||
alt_title = re.sub(r'\*+', '', prev_line).rstrip(':').strip()
|
||||
elif prev_line and not prev_line.startswith('|'):
|
||||
break
|
||||
|
||||
@@ -197,26 +229,30 @@ def parse_md_to_html_blocks(md_content):
|
||||
# Word TOC looks for text with Caption style - anchor must be outside main caption text
|
||||
bookmark_id = f"_Ref_Tab{table_counter}"
|
||||
if table_title:
|
||||
clean_title = table_title.replace(f"Tabla {table_counter}.", "").strip()
|
||||
# Remove any "Tabla X." or "Tabla AX." pattern from the title
|
||||
clean_title = re.sub(r'^Tabla\s+[A-Z]?\d+\.\s*', '', table_title).strip()
|
||||
elif alt_title:
|
||||
# Use alternative title from **bold text:** pattern
|
||||
clean_title = alt_title
|
||||
else:
|
||||
clean_title = "Tabla de datos."
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
|
||||
# Build table HTML with APA style (horizontal lines only, no vertical)
|
||||
table_html = '<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0 style="border-collapse:collapse;border:none">'
|
||||
table_html = '<table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
|
||||
for j, tline in enumerate(table_lines):
|
||||
cells = [c.strip() for c in tline.split('|')[1:-1]]
|
||||
table_html += '<tr>'
|
||||
for cell in cells:
|
||||
if j == 0:
|
||||
# Header row: top and bottom border, bold text
|
||||
table_html += f'<td style="border-top:solid windowtext 1.0pt;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0"><b><span lang=ES>{md_to_html_para(cell)}</span></b></p></td>'
|
||||
table_html += f'<td style="border-top:solid windowtext 1.0pt;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><b><span lang=ES>{md_to_html_para(cell)}</span></b></p></td>'
|
||||
elif j == len(table_lines) - 1:
|
||||
# Last row: bottom border only
|
||||
table_html += f'<td style="border-top:none;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
table_html += f'<td style="border-top:none;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
else:
|
||||
# Middle rows: no borders
|
||||
table_html += f'<td style="border:none;padding:5px"><p class=MsoNormal style="margin:0"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
table_html += f'<td style="border:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
table_html += '</tr>'
|
||||
table_html += '</table>'
|
||||
html_blocks.append(table_html)
|
||||
@@ -240,6 +276,7 @@ def parse_md_to_html_blocks(md_content):
|
||||
if re.match(r'^[\-\*\+]\s', line):
|
||||
while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
|
||||
item_text = lines[i][2:].strip()
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt"> </span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
|
||||
i += 1
|
||||
continue
|
||||
@@ -249,6 +286,7 @@ def parse_md_to_html_blocks(md_content):
|
||||
num = 1
|
||||
while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
|
||||
item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt"> </span>{md_to_html_para(item_text)}</span></p>')
|
||||
num += 1
|
||||
i += 1
|
||||
@@ -273,7 +311,12 @@ def parse_md_to_html_blocks(md_content):
|
||||
i += 1
|
||||
|
||||
para_text = ' '.join(para_lines)
|
||||
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
|
||||
para_text = convert_latex_formulas(para_text)
|
||||
# Check if paragraph contains MathML (already wrapped)
|
||||
if '<math' in para_text:
|
||||
html_blocks.append(para_text)
|
||||
else:
|
||||
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
|
||||
|
||||
return '\n\n'.join(html_blocks)
|
||||
|
||||
@@ -365,7 +408,7 @@ def main():
|
||||
global table_counter, figure_counter
|
||||
|
||||
print("Reading template...")
|
||||
html_content = read_file(TEMPLATE)
|
||||
html_content = read_file(TEMPLATE_INPUT)
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
print("Reading docs content...")
|
||||
@@ -595,9 +638,9 @@ def main():
|
||||
|
||||
print("Saving modified template...")
|
||||
output_html = str(soup)
|
||||
write_file(TEMPLATE, output_html)
|
||||
write_file(TEMPLATE_OUTPUT, output_html)
|
||||
|
||||
print(f"✓ Done! Modified: {TEMPLATE}")
|
||||
print(f"✓ Done! Modified: {TEMPLATE_OUTPUT}")
|
||||
print("\nTo convert to DOCX:")
|
||||
print("1. Open the .htm file in Microsoft Word")
|
||||
print("2. Replace [Insertar diagrama Mermaid aquí] placeholders with actual diagrams")
|
||||
|
||||
@@ -18,6 +18,8 @@ El procesamiento de documentos en español presenta particularidades que complic
|
||||
|
||||
La Tabla 1 resume los principales desafíos lingüísticos del OCR en español:
|
||||
|
||||
**Tabla 1.** *Desafíos lingüísticos específicos del OCR en español.*
|
||||
|
||||
| Desafío | Descripción | Impacto en OCR |
|
||||
|---------|-------------|----------------|
|
||||
| Caracteres especiales | ñ, á, é, í, ó, ú, ü, ¿, ¡ | Confusión con caracteres similares (n/ñ, a/á) |
|
||||
@@ -25,7 +27,7 @@ La Tabla 1 resume los principales desafíos lingüísticos del OCR en español:
|
||||
| Abreviaturas | Dr., Sra., Ud., etc. | Puntos internos confunden segmentación |
|
||||
| Nombres propios | Tildes en apellidos (García, Martínez) | Bases de datos sin soporte Unicode |
|
||||
|
||||
*Tabla 1. Desafíos lingüísticos específicos del OCR en español. Fuente: Elaboración propia.*
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
Además de los aspectos lingüísticos, los documentos académicos y administrativos en español presentan características tipográficas que complican el reconocimiento: variaciones en fuentes entre encabezados, cuerpo y notas al pie; presencia de tablas con bordes y celdas; logotipos institucionales; marcas de agua; y elementos gráficos como firmas o sellos. Estos elementos generan ruido que puede propagarse en aplicaciones downstream como la extracción de entidades nombradas o el análisis semántico.
|
||||
|
||||
@@ -37,6 +39,8 @@ La adaptación de modelos preentrenados a dominios específicos típicamente req
|
||||
|
||||
La Tabla 2 ilustra los requisitos típicos para diferentes estrategias de mejora de OCR:
|
||||
|
||||
**Tabla 2.** *Comparación de estrategias de mejora de modelos OCR.*
|
||||
|
||||
| Estrategia | Datos requeridos | Hardware | Tiempo | Expertise |
|
||||
|------------|------------------|----------|--------|-----------|
|
||||
| Fine-tuning completo | >10,000 imágenes etiquetadas | GPU (≥16GB VRAM) | Días-Semanas | Alto |
|
||||
@@ -44,7 +48,7 @@ La Tabla 2 ilustra los requisitos típicos para diferentes estrategias de mejora
|
||||
| Transfer learning | >500 imágenes etiquetadas | GPU (≥8GB VRAM) | Horas | Medio |
|
||||
| **Optimización de hiperparámetros** | **<100 imágenes de validación** | **CPU suficiente** | **Horas** | **Bajo-Medio** |
|
||||
|
||||
*Tabla 2. Comparación de estrategias de mejora de modelos OCR. Fuente: Elaboración propia.*
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### La oportunidad: optimización sin fine-tuning
|
||||
|
||||
@@ -88,6 +92,8 @@ Una solución técnicamente superior pero impracticable tiene valor limitado. Es
|
||||
|
||||
Este trabajo se centra específicamente en:
|
||||
|
||||
**Tabla 3.** *Delimitación del alcance del trabajo.*
|
||||
|
||||
| Aspecto | Dentro del alcance | Fuera del alcance |
|
||||
|---------|-------------------|-------------------|
|
||||
| **Tipo de documento** | Documentos académicos digitales (PDF) | Documentos escaneados, manuscritos |
|
||||
@@ -96,7 +102,7 @@ Este trabajo se centra específicamente en:
|
||||
| **Método de mejora** | Optimización de hiperparámetros | Fine-tuning, aumento de datos |
|
||||
| **Hardware** | Ejecución en CPU | Aceleración GPU |
|
||||
|
||||
*Tabla 3. Delimitación del alcance del trabajo. Fuente: Elaboración propia.*
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### Relevancia y beneficiarios
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ Este capítulo establece los objetivos del trabajo siguiendo la metodología SMA
|
||||
|
||||
### Justificación SMART del Objetivo General
|
||||
|
||||
**Tabla 4.** *Justificación SMART del objetivo general.*
|
||||
|
||||
| Criterio | Cumplimiento |
|
||||
|----------|--------------|
|
||||
| **Específico (S)** | Se define claramente qué se quiere lograr: optimizar PaddleOCR mediante ajuste de hiperparámetros para documentos en español |
|
||||
@@ -16,6 +18,8 @@ Este capítulo establece los objetivos del trabajo siguiendo la metodología SMA
|
||||
| **Relevante (R)** | El impacto es demostrable: mejora la extracción de texto en documentos académicos sin costes adicionales de infraestructura |
|
||||
| **Temporal (T)** | El plazo es un cuatrimestre, correspondiente al TFM |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
## Objetivos específicos
|
||||
|
||||
### OE1: Comparar soluciones OCR de código abierto
|
||||
@@ -100,45 +104,32 @@ flowchart LR
|
||||
|
||||
#### Clase ImageTextDataset
|
||||
|
||||
Se implementó una clase Python para cargar pares imagen-texto:
|
||||
|
||||
```python
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
# Carga pares (imagen, texto) de carpetas pareadas
|
||||
|
||||
def __getitem__(self, idx):
|
||||
# Retorna (PIL.Image, str)
|
||||
```
|
||||
Se implementó una clase Python para cargar pares imagen-texto que retorna tuplas (PIL.Image, str) desde carpetas pareadas. La implementación completa está disponible en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A).
|
||||
|
||||
### Fase 2: Benchmark Comparativo
|
||||
|
||||
#### Modelos Evaluados
|
||||
|
||||
**Tabla 5.** *Modelos OCR evaluados en el benchmark inicial.*
|
||||
|
||||
| Modelo | Versión | Configuración |
|
||||
|--------|---------|---------------|
|
||||
| EasyOCR | - | Idiomas: ['es', 'en'] |
|
||||
| PaddleOCR | PP-OCRv5 | Modelos server_det + server_rec |
|
||||
| DocTR | - | db_resnet50 + sar_resnet31 |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
#### Métricas de Evaluación
|
||||
|
||||
Se utilizó la biblioteca `jiwer` para calcular:
|
||||
|
||||
```python
|
||||
from jiwer import wer, cer
|
||||
|
||||
def evaluate_text(reference, prediction):
|
||||
return {
|
||||
'WER': wer(reference, prediction),
|
||||
'CER': cer(reference, prediction)
|
||||
}
|
||||
```
|
||||
Se utilizó la biblioteca `jiwer` para calcular CER y WER comparando el texto de referencia con la predicción del modelo OCR. La implementación está disponible en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A).
|
||||
|
||||
### Fase 3: Espacio de Búsqueda
|
||||
|
||||
#### Hiperparámetros Seleccionados
|
||||
|
||||
**Tabla 6.** *Hiperparámetros seleccionados para optimización.*
|
||||
|
||||
| Parámetro | Tipo | Rango/Valores | Descripción |
|
||||
|-----------|------|---------------|-------------|
|
||||
| `use_doc_orientation_classify` | Booleano | [True, False] | Clasificación de orientación del documento |
|
||||
@@ -149,76 +140,42 @@ def evaluate_text(reference, prediction):
|
||||
| `text_det_unclip_ratio` | Fijo | 0.0 | Coeficiente de expansión (fijado) |
|
||||
| `text_rec_score_thresh` | Continuo | [0.0, 0.7] | Umbral de confianza de reconocimiento |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
#### Configuración de Ray Tune
|
||||
|
||||
```python
|
||||
from ray import tune
|
||||
from ray.tune.search.optuna import OptunaSearch
|
||||
|
||||
search_space = {
|
||||
"use_doc_orientation_classify": tune.choice([True, False]),
|
||||
"use_doc_unwarping": tune.choice([True, False]),
|
||||
"textline_orientation": tune.choice([True, False]),
|
||||
"text_det_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_box_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_unclip_ratio": tune.choice([0.0]),
|
||||
"text_rec_score_thresh": tune.uniform(0.0, 0.7),
|
||||
}
|
||||
|
||||
tuner = tune.Tuner(
|
||||
trainable_paddle_ocr,
|
||||
tune_config=tune.TuneConfig(
|
||||
metric="CER",
|
||||
mode="min",
|
||||
search_alg=OptunaSearch(),
|
||||
num_samples=64,
|
||||
max_concurrent_trials=2
|
||||
)
|
||||
)
|
||||
```
|
||||
El espacio de búsqueda se definió utilizando `tune.choice()` para parámetros booleanos y `tune.uniform()` para parámetros continuos, con OptunaSearch como algoritmo de optimización configurado para minimizar CER en 64 trials. La implementación completa está disponible en `src/raytune/raytune_ocr.py` (ver Anexo A).
|
||||
|
||||
### Fase 4: Ejecución de Optimización
|
||||
|
||||
#### Arquitectura de Ejecución
|
||||
|
||||
Debido a incompatibilidades entre Ray y PaddleOCR en el mismo proceso, se implementó una arquitectura basada en subprocesos:
|
||||
Se implementó una arquitectura basada en contenedores Docker para aislar los servicios OCR y facilitar la reproducibilidad (ver sección 4.2.3 para detalles de la arquitectura).
|
||||
|
||||
```mermaid
|
||||
---
|
||||
title: "Arquitectura de ejecución con subprocesos"
|
||||
---
|
||||
flowchart LR
|
||||
A["Ray Tune (proceso principal)"]
|
||||
#### Ejecución con Docker Compose
|
||||
|
||||
A --> B["Subprocess 1: paddle_ocr_tuning.py --config"]
|
||||
B --> B_out["Retorna JSON con métricas"]
|
||||
|
||||
A --> C["Subprocess 2: paddle_ocr_tuning.py --config"]
|
||||
C --> C_out["Retorna JSON con métricas"]
|
||||
```
|
||||
|
||||
#### Script de Evaluación (paddle_ocr_tuning.py)
|
||||
|
||||
El script recibe hiperparámetros por línea de comandos:
|
||||
Los servicios se orquestan mediante Docker Compose (`src/docker-compose.tuning.*.yml`):
|
||||
|
||||
```bash
|
||||
python paddle_ocr_tuning.py \
|
||||
--pdf-folder ./dataset \
|
||||
--textline-orientation True \
|
||||
--text-det-box-thresh 0.5 \
|
||||
--text-det-thresh 0.4 \
|
||||
--text-rec-score-thresh 0.6
|
||||
# Iniciar servicio OCR
|
||||
docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
|
||||
|
||||
# Ejecutar optimización (64 trials)
|
||||
docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
|
||||
|
||||
# Detener servicios
|
||||
docker compose -f docker-compose.tuning.doctr.yml down
|
||||
```
|
||||
|
||||
Y retorna métricas en formato JSON:
|
||||
El servicio OCR expone una API REST que retorna métricas en formato JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"CER": 0.0125,
|
||||
"WER": 0.1040,
|
||||
"TIME": 331.09,
|
||||
"CER": 0.0149,
|
||||
"WER": 0.0762,
|
||||
"TIME": 15.8,
|
||||
"PAGES": 5,
|
||||
"TIME_PER_PAGE": 66.12
|
||||
"TIME_PER_PAGE": 3.16
|
||||
}
|
||||
```
|
||||
|
||||
@@ -235,23 +192,67 @@ Y retorna métricas en formato JSON:
|
||||
|
||||
#### Hardware
|
||||
|
||||
**Tabla 7.** *Especificaciones de hardware del entorno de desarrollo.*
|
||||
|
||||
| Componente | Especificación |
|
||||
|------------|----------------|
|
||||
| CPU | Intel Core (especificar modelo) |
|
||||
| RAM | 16 GB |
|
||||
| GPU | No disponible (ejecución en CPU) |
|
||||
| CPU | AMD Ryzen 7 5800H |
|
||||
| RAM | 16 GB DDR4 |
|
||||
| GPU | NVIDIA RTX 3060 Laptop (5.66 GB VRAM) |
|
||||
| Almacenamiento | SSD |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
#### Software
|
||||
|
||||
**Tabla 8.** *Versiones de software utilizadas.*
|
||||
|
||||
| Componente | Versión |
|
||||
|------------|---------|
|
||||
| Sistema Operativo | Windows 10/11 |
|
||||
| Python | 3.11.9 |
|
||||
| Sistema Operativo | Ubuntu 24.04.3 LTS |
|
||||
| Python | 3.12.3 |
|
||||
| PaddleOCR | 3.3.2 |
|
||||
| PaddlePaddle | 3.2.2 |
|
||||
| Ray | 2.52.1 |
|
||||
| Optuna | 4.6.0 |
|
||||
| Optuna | 4.7.0 |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
#### Justificación de Ejecución Local vs Cloud
|
||||
|
||||
La decisión de ejecutar los experimentos en hardware local en lugar de utilizar servicios cloud se fundamenta en un análisis de costos y beneficios operativos.
|
||||
|
||||
**Tabla 9.** *Costos de GPU en plataformas cloud.*
|
||||
|
||||
| Plataforma | GPU | Costo/Hora | Costo Mensual |
|
||||
|------------|-----|------------|---------------|
|
||||
| AWS EC2 g4dn.xlarge | NVIDIA T4 (16 GB) | $0.526 | ~$384 |
|
||||
| Google Colab Pro | T4/P100 | ~$1.30 | $10 + CU extras |
|
||||
| Google Colab Pro+ | T4/V100/A100 | ~$1.30 | $50 + CU extras |
|
||||
|
||||
*Fuente: Elaboración propia a partir de precios públicos de AWS y Google Cloud (enero 2026).*
|
||||
|
||||
Para las tareas específicas de este proyecto, los costos estimados en cloud serían:
|
||||
|
||||
**Tabla 10.** *Análisis de costos del proyecto en plataformas cloud.*
|
||||
|
||||
| Tarea | Tiempo GPU | Costo AWS | Costo Colab Pro |
|
||||
|-------|------------|-----------|-----------------|
|
||||
| Ajuste hiperparámetros (64×3 trials) | ~3 horas | ~$1.58 | ~$3.90 |
|
||||
| Evaluación completa (45 páginas) | ~5 min | ~$0.04 | ~$0.11 |
|
||||
| Desarrollo y depuración (20 horas/mes) | 20 horas | ~$10.52 | ~$26.00 |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
Las ventajas de la ejecución local incluyen:
|
||||
|
||||
1. **Costo cero de GPU**: La RTX 3060 ya está disponible en el equipo de desarrollo
|
||||
2. **Sin límites de tiempo**: AWS y Colab imponen timeouts de sesión que interrumpen experimentos largos
|
||||
3. **Acceso instantáneo**: Sin tiempo de aprovisionamiento de instancias cloud
|
||||
4. **Almacenamiento local**: Dataset y resultados en disco sin costos de transferencia
|
||||
5. **Iteración rápida**: Reinicio inmediato de contenedores Docker para depuración
|
||||
|
||||
Para un proyecto de investigación con múltiples iteraciones de ajuste de hiperparámetros, la ejecución local ahorra aproximadamente $50-100 mensuales comparado con servicios cloud, además de ofrecer mayor flexibilidad en la velocidad de iteración durante el desarrollo.
|
||||
|
||||
### Limitaciones Metodológicas
|
||||
|
||||
|
||||
@@ -34,6 +34,11 @@ Se seleccionaron tres soluciones OCR de código abierto representativas del esta
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
**Imágenes Docker disponibles en el registro del proyecto:**
|
||||
- PaddleOCR: `seryus.ddns.net/unir/paddle-ocr-gpu`, `seryus.ddns.net/unir/paddle-ocr-cpu`
|
||||
- EasyOCR: `seryus.ddns.net/unir/easyocr-gpu`
|
||||
- DocTR: `seryus.ddns.net/unir/doctr-gpu`
|
||||
|
||||
### Criterios de Éxito
|
||||
|
||||
Los criterios establecidos para evaluar las soluciones fueron:
|
||||
@@ -65,121 +70,25 @@ Se utilizó el documento "Instrucciones para la redacción y elaboración del TF
|
||||
|
||||
#### Proceso de Conversión
|
||||
|
||||
La conversión del PDF a imágenes se realizó mediante PyMuPDF (fitz):
|
||||
|
||||
```python
|
||||
import fitz # PyMuPDF
|
||||
|
||||
def pdf_to_images(pdf_path, output_dir, dpi=300):
|
||||
doc = fitz.open(pdf_path)
|
||||
for page_num, page in enumerate(doc):
|
||||
# Matriz de transformación para 300 DPI
|
||||
mat = fitz.Matrix(dpi/72, dpi/72)
|
||||
pix = page.get_pixmap(matrix=mat)
|
||||
pix.save(f"{output_dir}/page_{page_num:04d}.png")
|
||||
```
|
||||
|
||||
La resolución de 300 DPI fue seleccionada como estándar para OCR de documentos, proporcionando suficiente detalle para caracteres pequeños sin generar archivos excesivamente grandes.
|
||||
La conversión del PDF a imágenes se realizó mediante PyMuPDF (fitz) a 300 DPI, resolución estándar para OCR que proporciona suficiente detalle para caracteres pequeños sin generar archivos excesivamente grandes. La implementación está disponible en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A).
|
||||
|
||||
#### Extracción del Ground Truth
|
||||
|
||||
El texto de referencia se extrajo directamente del PDF mediante PyMuPDF:
|
||||
|
||||
```python
|
||||
def extract_text(pdf_path):
|
||||
doc = fitz.open(pdf_path)
|
||||
text = ""
|
||||
for page in doc:
|
||||
blocks = page.get_text("dict")["blocks"]
|
||||
for block in blocks:
|
||||
if "lines" in block:
|
||||
for line in block["lines"]:
|
||||
for span in line["spans"]:
|
||||
text += span["text"]
|
||||
text += "\n"
|
||||
return text
|
||||
```
|
||||
|
||||
Esta aproximación preserva la estructura de líneas del documento original, aunque puede introducir errores en layouts muy complejos (tablas anidadas, texto en columnas).
|
||||
El texto de referencia se extrajo directamente del PDF mediante PyMuPDF, preservando la estructura de líneas del documento original. Esta aproximación puede introducir errores en layouts muy complejos (tablas anidadas, texto en columnas). La implementación está disponible en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A).
|
||||
|
||||
#### Configuración de los Modelos
|
||||
|
||||
Según el código en `ocr_benchmark_notebook.ipynb`:
|
||||
La configuración de cada modelo se detalla en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A):
|
||||
|
||||
**EasyOCR**:
|
||||
```python
|
||||
import easyocr
|
||||
- **EasyOCR**: Configurado con soporte para español e inglés, permitiendo reconocer palabras en ambos idiomas que puedan aparecer en documentos académicos (referencias, términos técnicos).
|
||||
|
||||
easyocr_reader = easyocr.Reader(['es', 'en']) # Spanish and English
|
||||
results = easyocr_reader.readtext(image_path)
|
||||
text = ' '.join([r[1] for r in results])
|
||||
```
|
||||
- **PaddleOCR (PP-OCRv5)**: Se utilizaron los modelos "server" (PP-OCRv5_server_det y PP-OCRv5_server_rec) que ofrecen mayor precisión a costa de mayor tiempo de inferencia. La versión utilizada fue PaddleOCR 3.2.0.
|
||||
|
||||
La configuración incluye soporte para español e inglés, permitiendo reconocer palabras en ambos idiomas que puedan aparecer en documentos académicos (referencias, términos técnicos).
|
||||
|
||||
**PaddleOCR (PP-OCRv5)**:
|
||||
```python
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
paddleocr_model = PaddleOCR(
|
||||
text_detection_model_name="PP-OCRv5_server_det",
|
||||
text_recognition_model_name="PP-OCRv5_server_rec",
|
||||
use_doc_orientation_classify=False,
|
||||
use_doc_unwarping=False,
|
||||
use_textline_orientation=True,
|
||||
)
|
||||
|
||||
result = paddleocr_model.predict(image_path)
|
||||
text = '\n'.join([line['rec_texts'][0] for line in result[0]['rec_res']])
|
||||
```
|
||||
|
||||
Se utilizaron los modelos "server" que ofrecen mayor precisión a costa de mayor tiempo de inferencia. La versión utilizada fue PaddleOCR 3.2.0.
|
||||
|
||||
**DocTR**:
|
||||
```python
|
||||
from doctr.models import ocr_predictor
|
||||
|
||||
doctr_model = ocr_predictor(
|
||||
det_arch="db_resnet50",
|
||||
reco_arch="sar_resnet31",
|
||||
pretrained=True
|
||||
)
|
||||
|
||||
result = doctr_model([image])
|
||||
text = result.render()
|
||||
```
|
||||
|
||||
Se seleccionaron las arquitecturas db_resnet50 para detección y sar_resnet31 para reconocimiento, representando una configuración de alta precisión.
|
||||
- **DocTR**: Se seleccionaron las arquitecturas db_resnet50 para detección y sar_resnet31 para reconocimiento, representando una configuración de alta precisión.
|
||||
|
||||
#### Métricas de Evaluación
|
||||
|
||||
Se utilizó la biblioteca `jiwer` para calcular CER y WER de manera estandarizada:
|
||||
|
||||
```python
|
||||
from jiwer import wer, cer
|
||||
|
||||
def evaluate_text(reference, prediction):
|
||||
"""
|
||||
Calcula métricas de error entre texto de referencia y predicción.
|
||||
|
||||
Args:
|
||||
reference: Texto ground truth
|
||||
prediction: Texto predicho por el OCR
|
||||
|
||||
Returns:
|
||||
dict con WER y CER
|
||||
"""
|
||||
# Normalización básica
|
||||
ref_clean = reference.lower().strip()
|
||||
pred_clean = prediction.lower().strip()
|
||||
|
||||
return {
|
||||
'WER': wer(ref_clean, pred_clean),
|
||||
'CER': cer(ref_clean, pred_clean)
|
||||
}
|
||||
```
|
||||
|
||||
La normalización a minúsculas y eliminación de espacios extremos asegura una comparación justa que no penaliza diferencias de capitalización.
|
||||
Se utilizó la biblioteca `jiwer` para calcular CER y WER de manera estandarizada. La normalización a minúsculas y eliminación de espacios extremos asegura una comparación justa que no penaliza diferencias de capitalización. La implementación está disponible en `src/ocr_benchmark_notebook.ipynb` (ver Anexo A).
|
||||
|
||||
### Resultados del Benchmark
|
||||
|
||||
@@ -322,7 +231,7 @@ Esta sección ha presentado:
|
||||
|
||||
### Introducción
|
||||
|
||||
Esta sección describe el proceso de optimización de hiperparámetros de PaddleOCR utilizando Ray Tune con el algoritmo de búsqueda Optuna. Los experimentos fueron implementados en el notebook `src/paddle_ocr_fine_tune_unir_raytune.ipynb` y los resultados se almacenaron en `src/raytune_paddle_subproc_results_20251207_192320.csv`.
|
||||
Esta sección describe el proceso de optimización de hiperparámetros de PaddleOCR utilizando Ray Tune con el algoritmo de búsqueda Optuna. Los experimentos fueron implementados en [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py) con la librería de utilidades [`src/raytune_ocr.py`](https://github.com/seryus/MastersThesis/blob/main/src/raytune_ocr.py), y los resultados se almacenaron en [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results).
|
||||
|
||||
La optimización de hiperparámetros representa una alternativa al fine-tuning tradicional que no requiere:
|
||||
- Acceso a GPU dedicada
|
||||
@@ -339,59 +248,68 @@ El experimento se ejecutó en el siguiente entorno:
|
||||
|
||||
| Componente | Versión/Especificación |
|
||||
|------------|------------------------|
|
||||
| Sistema operativo | Windows 10/11 |
|
||||
| Python | 3.11.9 |
|
||||
| Sistema operativo | Ubuntu 24.04.3 LTS |
|
||||
| Python | 3.12.3 |
|
||||
| PaddlePaddle | 3.2.2 |
|
||||
| PaddleOCR | 3.3.2 |
|
||||
| Ray | 2.52.1 |
|
||||
| Optuna | 4.6.0 |
|
||||
| CPU | Intel Core (multinúcleo) |
|
||||
| RAM | 16 GB |
|
||||
| GPU | No disponible (ejecución CPU) |
|
||||
| Optuna | 4.7.0 |
|
||||
| CPU | AMD Ryzen 7 5800H |
|
||||
| RAM | 16 GB DDR4 |
|
||||
| GPU | NVIDIA RTX 3060 Laptop (5.66 GB VRAM) |
|
||||
|
||||
*Fuente: Outputs del notebook `src/paddle_ocr_fine_tune_unir_raytune.ipynb`.*
|
||||
*Fuente: Configuración del entorno de ejecución. Resultados en `src/results/` generados por `src/run_tuning.py`.*
|
||||
|
||||
#### Arquitectura de Ejecución
|
||||
|
||||
Debido a incompatibilidades entre Ray y PaddleOCR cuando se ejecutan en el mismo proceso, se implementó una arquitectura basada en subprocesos:
|
||||
La arquitectura basada en contenedores Docker es fundamental para este proyecto debido a los conflictos de dependencias inherentes entre los diferentes componentes:
|
||||
|
||||
- **Conflictos entre motores OCR**: PaddleOCR, DocTR y EasyOCR tienen dependencias mutuamente incompatibles (diferentes versiones de PyTorch/PaddlePaddle, OpenCV, etc.)
|
||||
- **Incompatibilidades CUDA/cuDNN**: Cada motor OCR requiere versiones específicas de CUDA y cuDNN que no pueden coexistir en un mismo entorno virtual
|
||||
- **Aislamiento de Ray Tune**: Ray Tune tiene sus propias dependencias que pueden entrar en conflicto con las librerías de inferencia OCR
|
||||
|
||||
Esta arquitectura containerizada permite ejecutar cada componente en su entorno aislado óptimo, comunicándose via API REST:
|
||||
|
||||
```mermaid
|
||||
---
|
||||
title: "Arquitectura de ejecución con subprocesos"
|
||||
title: "Arquitectura de ejecución con Docker Compose"
|
||||
---
|
||||
flowchart LR
|
||||
A["Ray Tune (proceso principal)"]
|
||||
subgraph Docker["Docker Compose"]
|
||||
A["RayTune Container"]
|
||||
B["OCR Service Container"]
|
||||
end
|
||||
|
||||
A --> B["Subprocess 1: paddle_ocr_tuning.py --config"]
|
||||
B --> B_out["Retorna JSON con métricas"]
|
||||
|
||||
A --> C["Subprocess 2: paddle_ocr_tuning.py --config"]
|
||||
C --> C_out["Retorna JSON con métricas"]
|
||||
A -->|"HTTP POST /evaluate"| B
|
||||
B -->|"JSON {CER, WER, TIME}"| A
|
||||
A -.->|"Health check /health"| B
|
||||
```
|
||||
|
||||
El script `src/paddle_ocr_tuning.py` actúa como wrapper que:
|
||||
1. Recibe hiperparámetros por línea de comandos
|
||||
2. Inicializa PaddleOCR con la configuración especificada
|
||||
3. Evalúa sobre el dataset
|
||||
4. Retorna métricas en formato JSON
|
||||
La arquitectura containerizada (`src/docker-compose.tuning.*.yml`) ofrece:
|
||||
1. Aislamiento de dependencias entre Ray Tune y los motores OCR
|
||||
2. Health checks automáticos para asegurar disponibilidad del servicio
|
||||
3. Comunicación via API REST (endpoints `/health` y `/evaluate`)
|
||||
4. Soporte para GPU mediante nvidia-docker
|
||||
|
||||
```bash
|
||||
python paddle_ocr_tuning.py \
|
||||
--pdf-folder ./dataset \
|
||||
--textline-orientation True \
|
||||
--text-det-box-thresh 0.5 \
|
||||
--text-det-thresh 0.4 \
|
||||
--text-rec-score-thresh 0.6
|
||||
# Iniciar servicio OCR con GPU
|
||||
docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
|
||||
|
||||
# Ejecutar optimización (64 trials)
|
||||
docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
|
||||
|
||||
# Detener servicios
|
||||
docker compose -f docker-compose.tuning.doctr.yml down
|
||||
```
|
||||
|
||||
Salida:
|
||||
Respuesta del servicio OCR:
|
||||
```json
|
||||
{
|
||||
"CER": 0.0125,
|
||||
"WER": 0.1040,
|
||||
"TIME": 331.09,
|
||||
"CER": 0.0149,
|
||||
"WER": 0.0762,
|
||||
"TIME": 15.8,
|
||||
"PAGES": 5,
|
||||
"TIME_PER_PAGE": 66.12
|
||||
"TIME_PER_PAGE": 3.16
|
||||
}
|
||||
```
|
||||
|
||||
@@ -411,54 +329,11 @@ Para la fase de optimización se extendió el dataset:
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
La clase `ImageTextDataset` en `src/dataset_manager.py` gestiona la carga de pares imagen-texto:
|
||||
|
||||
```python
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
"""
|
||||
Carga pares (imagen, texto) de carpetas pareadas.
|
||||
|
||||
Estructura esperada:
|
||||
root/
|
||||
0/
|
||||
img/
|
||||
page_0001.png
|
||||
txt/
|
||||
page_0001.txt
|
||||
"""
|
||||
self.pairs = []
|
||||
for doc_folder in sorted(os.listdir(root)):
|
||||
img_folder = os.path.join(root, doc_folder, 'img')
|
||||
txt_folder = os.path.join(root, doc_folder, 'txt')
|
||||
# Cargar pares...
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img_path, txt_path = self.pairs[idx]
|
||||
return PIL.Image.open(img_path), open(txt_path).read()
|
||||
```
|
||||
La clase `ImageTextDataset` gestiona la carga de pares imagen-texto desde la estructura de carpetas pareadas. La implementación está disponible en el repositorio (ver Anexo A).
|
||||
|
||||
#### Espacio de Búsqueda
|
||||
|
||||
El espacio de búsqueda se definió considerando los hiperparámetros más relevantes identificados en la documentación de PaddleOCR:
|
||||
|
||||
```python
|
||||
from ray import tune
|
||||
from ray.tune.search.optuna import OptunaSearch
|
||||
|
||||
search_space = {
|
||||
# Parámetros booleanos
|
||||
"use_doc_orientation_classify": tune.choice([True, False]),
|
||||
"use_doc_unwarping": tune.choice([True, False]),
|
||||
"textline_orientation": tune.choice([True, False]),
|
||||
|
||||
# Parámetros continuos (umbrales)
|
||||
"text_det_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_box_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_unclip_ratio": tune.choice([0.0]), # Fijado
|
||||
"text_rec_score_thresh": tune.uniform(0.0, 0.7),
|
||||
}
|
||||
```
|
||||
El espacio de búsqueda se definió considerando los hiperparámetros más relevantes identificados en la documentación de PaddleOCR, utilizando `tune.choice()` para parámetros booleanos y `tune.uniform()` para umbrales continuos. La implementación está disponible en `src/raytune/raytune_ocr.py` (ver Anexo A).
|
||||
|
||||
**Tabla 17.** *Descripción detallada del espacio de búsqueda.*
|
||||
|
||||
@@ -484,23 +359,7 @@ search_space = {
|
||||
|
||||
#### Configuración de Ray Tune
|
||||
|
||||
```python
|
||||
tuner = tune.Tuner(
|
||||
trainable_paddle_ocr,
|
||||
tune_config=tune.TuneConfig(
|
||||
metric="CER",
|
||||
mode="min",
|
||||
search_alg=OptunaSearch(),
|
||||
num_samples=64,
|
||||
max_concurrent_trials=2
|
||||
),
|
||||
run_config=air.RunConfig(
|
||||
verbose=2,
|
||||
log_to_file=False
|
||||
),
|
||||
param_space=search_space
|
||||
)
|
||||
```
|
||||
Se configuró Ray Tune con OptunaSearch como algoritmo de búsqueda, optimizando CER en 64 trials con 2 ejecuciones concurrentes. La implementación está disponible en `src/raytune/raytune_ocr.py` (ver Anexo A).
|
||||
|
||||
**Tabla 18.** *Parámetros de configuración de Ray Tune.*
|
||||
|
||||
@@ -613,7 +472,7 @@ Configuración óptima:
|
||||
| text_det_unclip_ratio | 0.0 | 1.5 | -1.5 (fijado) |
|
||||
| text_rec_score_thresh | **0.6350** | 0.5 | +0.135 |
|
||||
|
||||
*Fuente: Análisis del notebook.*
|
||||
*Fuente: Análisis de [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results) generados por [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py).*
|
||||
|
||||
#### Análisis de Correlación
|
||||
|
||||
@@ -628,7 +487,7 @@ Se calculó la correlación de Pearson entre los parámetros continuos y las mé
|
||||
| `text_rec_score_thresh` | -0.161 | Correlación débil negativa |
|
||||
| `text_det_unclip_ratio` | NaN | Varianza cero (valor fijo) |
|
||||
|
||||
*Fuente: Análisis del notebook.*
|
||||
*Fuente: Análisis de [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results) generados por [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py).*
|
||||
|
||||
**Tabla 24.** *Correlación de parámetros con WER.*
|
||||
|
||||
@@ -638,7 +497,7 @@ Se calculó la correlación de Pearson entre los parámetros continuos y las mé
|
||||
| `text_det_box_thresh` | +0.227 | Correlación débil positiva |
|
||||
| `text_rec_score_thresh` | -0.173 | Correlación débil negativa |
|
||||
|
||||
*Fuente: Análisis del notebook.*
|
||||
*Fuente: Análisis de [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results) generados por [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py).*
|
||||
|
||||
**Hallazgo clave**: El parámetro `text_det_thresh` muestra la correlación más fuerte (-0.52 con ambas métricas), indicando que valores más altos de este umbral tienden a reducir el error. Este umbral controla qué píxeles se consideran "texto" en el mapa de probabilidad del detector.
|
||||
|
||||
@@ -653,7 +512,7 @@ El parámetro booleano `textline_orientation` demostró tener el mayor impacto e
|
||||
| True | 3.76% | 7.12% | 12.73% | 32 |
|
||||
| False | 12.40% | 14.93% | 21.71% | 32 |
|
||||
|
||||
*Fuente: Análisis del notebook.*
|
||||
*Fuente: Análisis de [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results) generados por [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py).*
|
||||
|
||||
**Interpretación:**
|
||||
|
||||
@@ -706,33 +565,7 @@ Los trials con CER muy alto (>20%) presentaron patrones específicos:
|
||||
|
||||
#### Evaluación sobre Dataset Completo
|
||||
|
||||
La configuración óptima identificada se evaluó sobre el dataset completo de 24 páginas, comparando con la configuración baseline:
|
||||
|
||||
**Configuración Baseline:**
|
||||
```python
|
||||
baseline_config = {
|
||||
"textline_orientation": False, # Valor por defecto
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"text_det_thresh": 0.3, # Valor por defecto
|
||||
"text_det_box_thresh": 0.6, # Valor por defecto
|
||||
"text_det_unclip_ratio": 1.5, # Valor por defecto
|
||||
"text_rec_score_thresh": 0.5, # Valor por defecto
|
||||
}
|
||||
```
|
||||
|
||||
**Configuración Optimizada:**
|
||||
```python
|
||||
optimized_config = {
|
||||
"textline_orientation": True,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"text_det_thresh": 0.4690,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.6350,
|
||||
}
|
||||
```
|
||||
La configuración óptima identificada se evaluó sobre el dataset completo de 24 páginas, comparando con la configuración baseline (valores por defecto de PaddleOCR). Los parámetros optimizados más relevantes fueron: `textline_orientation=True`, `text_det_thresh=0.4690`, `text_det_box_thresh=0.5412`, y `text_rec_score_thresh=0.6350`.
|
||||
|
||||
**Tabla 27.** *Comparación baseline vs optimizado (24 páginas).*
|
||||
|
||||
@@ -741,7 +574,7 @@ optimized_config = {
|
||||
| PaddleOCR (Baseline) | 7.78% | 92.22% | 14.94% | 85.06% |
|
||||
| PaddleOCR-HyperAdjust | **1.49%** | **98.51%** | **7.62%** | **92.38%** |
|
||||
|
||||
*Fuente: Ejecución final en notebook `src/paddle_ocr_fine_tune_unir_raytune.ipynb`.*
|
||||
*Fuente: Validación final. Código en [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py), resultados en [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results).*
|
||||
|
||||
#### Métricas de Mejora
|
||||
|
||||
@@ -759,15 +592,16 @@ optimized_config = {
|
||||
|
||||
```mermaid
|
||||
---
|
||||
title: "Comparación Baseline vs Optimizado (24 páginas)"
|
||||
title: "Reducción de errores: Baseline vs Optimizado"
|
||||
---
|
||||
xychart-beta
|
||||
x-axis ["CER", "WER"]
|
||||
x-axis ["CER Baseline", "CER Optimizado", "WER Baseline", "WER Optimizado"]
|
||||
y-axis "Tasa de error (%)" 0 --> 16
|
||||
bar "Baseline" [7.78, 14.94]
|
||||
bar "Optimizado" [1.49, 7.62]
|
||||
bar [7.78, 1.49, 14.94, 7.62]
|
||||
```
|
||||
|
||||
*Leyenda: CER = Character Error Rate, WER = Word Error Rate. Baseline = configuración por defecto de PaddleOCR. Optimizado = configuración encontrada por Ray Tune.*
|
||||
|
||||
#### Impacto Práctico
|
||||
|
||||
**En un documento típico de 10,000 caracteres:**
|
||||
@@ -808,7 +642,7 @@ xychart-beta
|
||||
|
||||
Esta sección ha presentado:
|
||||
|
||||
1. **Configuración del experimento**: Arquitectura de subprocesos, dataset extendido, espacio de búsqueda de 7 dimensiones
|
||||
1. **Configuración del experimento**: Arquitectura Docker Compose, dataset extendido, espacio de búsqueda de 7 dimensiones
|
||||
|
||||
2. **Resultados estadísticos**:
|
||||
- CER medio: 5.25% (std: 11.03%)
|
||||
@@ -823,9 +657,9 @@ Esta sección ha presentado:
|
||||
4. **Mejora final**: CER reducido de 7.78% a 1.49% (reducción del 80.9%)
|
||||
|
||||
**Fuentes de datos:**
|
||||
- `src/paddle_ocr_fine_tune_unir_raytune.ipynb`: Código del experimento
|
||||
- `src/raytune_paddle_subproc_results_20251207_192320.csv`: Resultados de 64 trials
|
||||
- `src/paddle_ocr_tuning.py`: Script de evaluación
|
||||
- [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py): Script principal de optimización
|
||||
- [`src/raytune_ocr.py`](https://github.com/seryus/MastersThesis/blob/main/src/raytune_ocr.py): Librería de utilidades Ray Tune
|
||||
- [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results): Resultados CSV de los trials
|
||||
|
||||
## Discusión y análisis de resultados
|
||||
|
||||
@@ -1002,24 +836,18 @@ Para documentos PDF digitales como los evaluados, estos módulos son innecesario
|
||||
|
||||
Para documentos académicos en español similares a los evaluados:
|
||||
|
||||
**Configuración recomendada:**
|
||||
```python
|
||||
config_recomendada = {
|
||||
# OBLIGATORIO
|
||||
"textline_orientation": True,
|
||||
**Tabla 31.** *Configuración recomendada para PaddleOCR.*
|
||||
|
||||
# RECOMENDADO
|
||||
"text_det_thresh": 0.45, # Rango: 0.4-0.5
|
||||
"text_rec_score_thresh": 0.6, # Rango: 0.5-0.7
|
||||
| Parámetro | Valor | Prioridad | Justificación |
|
||||
|-----------|-------|-----------|---------------|
|
||||
| `textline_orientation` | True | Obligatorio | Reduce CER en 69.7% |
|
||||
| `text_det_thresh` | 0.45 (rango: 0.4-0.5) | Recomendado | Correlación fuerte con CER |
|
||||
| `text_rec_score_thresh` | 0.6 (rango: 0.5-0.7) | Recomendado | Filtra reconocimientos poco confiables |
|
||||
| `text_det_box_thresh` | 0.55 (rango: 0.5-0.6) | Opcional | Impacto moderado |
|
||||
| `use_doc_orientation_classify` | False | No recomendado | Innecesario para PDFs digitales |
|
||||
| `use_doc_unwarping` | False | No recomendado | Innecesario para PDFs digitales |
|
||||
|
||||
# OPCIONAL
|
||||
"text_det_box_thresh": 0.55, # Rango: 0.5-0.6
|
||||
|
||||
# NO RECOMENDADO para PDFs digitales
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
}
|
||||
```
|
||||
*Fuente: Análisis de resultados de optimización.*
|
||||
|
||||
#### Cuándo Aplicar Esta Metodología
|
||||
|
||||
@@ -1066,5 +894,74 @@ Este capítulo ha presentado el desarrollo completo de la contribución:
|
||||
**Resultado principal**: Se logró alcanzar el objetivo de CER < 2% mediante optimización de hiperparámetros, sin requerir fine-tuning ni recursos GPU.
|
||||
|
||||
**Fuentes de datos:**
|
||||
- `src/raytune_paddle_subproc_results_20251207_192320.csv`: Resultados de 64 trials
|
||||
- `src/paddle_ocr_fine_tune_unir_raytune.ipynb`: Notebook principal del experimento
|
||||
- [`src/run_tuning.py`](https://github.com/seryus/MastersThesis/blob/main/src/run_tuning.py): Script principal de optimización
|
||||
- [`src/results/`](https://github.com/seryus/MastersThesis/tree/main/src/results): Resultados CSV de los trials
|
||||
|
||||
**Imágenes Docker:**
|
||||
- `seryus.ddns.net/unir/paddle-ocr-gpu`: PaddleOCR con soporte GPU
|
||||
- `seryus.ddns.net/unir/easyocr-gpu`: EasyOCR con soporte GPU
|
||||
- `seryus.ddns.net/unir/doctr-gpu`: DocTR con soporte GPU
|
||||
|
||||
### Validación con Aceleración GPU
|
||||
|
||||
Para evaluar la viabilidad práctica del enfoque optimizado en escenarios de producción, se realizó una validación adicional utilizando aceleración GPU. Esta fase complementa los experimentos en CPU presentados anteriormente y demuestra la aplicabilidad del método cuando se dispone de hardware con capacidad de procesamiento paralelo.
|
||||
|
||||
#### Configuración del Entorno GPU
|
||||
|
||||
**Tabla 36.** *Especificaciones del entorno de validación GPU.*
|
||||
|
||||
| Componente | Especificación |
|
||||
|------------|----------------|
|
||||
| GPU | NVIDIA GeForce RTX 3060 Laptop |
|
||||
| VRAM | 5.66 GB |
|
||||
| CUDA | 12.4 |
|
||||
| Sistema Operativo | Ubuntu 24.04.3 LTS |
|
||||
| Kernel | 6.14.0-37-generic |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
El entorno de validación representa hardware de consumo típico para desarrollo de aplicaciones de machine learning, permitiendo evaluar el rendimiento en condiciones realistas de despliegue.
|
||||
|
||||
#### Comparación CPU vs GPU
|
||||
|
||||
Se evaluó el tiempo de procesamiento utilizando la configuración optimizada identificada en la fase anterior, comparando el rendimiento entre CPU y GPU.
|
||||
|
||||
**Tabla 37.** *Rendimiento comparativo CPU vs GPU.*
|
||||
|
||||
| Métrica | CPU | GPU (RTX 3060) | Factor de Aceleración |
|
||||
|---------|-----|----------------|----------------------|
|
||||
| Tiempo/Página | 69.4s | 0.55s | **126x** |
|
||||
| Dataset completo (45 páginas) | ~52 min | ~25 seg | **126x** |
|
||||
|
||||
*Fuente: Elaboración propia a partir de experimentos.*
|
||||
|
||||
La aceleración de 126x obtenida con GPU transforma la aplicabilidad práctica del sistema. Mientras que el procesamiento en CPU limita el uso a escenarios de procesamiento por lotes sin restricciones de tiempo, la velocidad con GPU habilita casos de uso interactivos y de tiempo real.
|
||||
|
||||
#### Comparación de Modelos PaddleOCR
|
||||
|
||||
PaddleOCR ofrece dos variantes de modelos: Mobile (optimizados para dispositivos con recursos limitados) y Server (mayor precisión a costa de mayor consumo de memoria). Se evaluó la viabilidad de ambas variantes en el hardware disponible.
|
||||
|
||||
**Tabla 38.** *Comparación de modelos Mobile vs Server en RTX 3060.*
|
||||
|
||||
| Modelo | VRAM Requerida | Resultado | Recomendación |
|
||||
|--------|----------------|-----------|---------------|
|
||||
| PP-OCRv5 Mobile | 0.06 GB | Funciona correctamente | ✓ Recomendado |
|
||||
| PP-OCRv5 Server | 5.3 GB | OOM en página 2 | ✗ Requiere >8 GB VRAM |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
Los modelos Server, a pesar de ofrecer potencialmente mayor precisión, resultan inviables en hardware con VRAM limitada (≤6 GB) debido a errores de memoria (Out of Memory). Los modelos Mobile, con un consumo de memoria 88 veces menor, funcionan de manera estable y ofrecen rendimiento suficiente para el caso de uso evaluado.
|
||||
|
||||
#### Conclusiones de la Validación GPU
|
||||
|
||||
La validación con aceleración GPU permite extraer las siguientes conclusiones:
|
||||
|
||||
1. **Aceleración significativa**: La GPU proporciona una aceleración de 126x sobre CPU, haciendo viable el procesamiento en tiempo real para aplicaciones interactivas.
|
||||
|
||||
2. **Modelos Mobile recomendados**: Para hardware con VRAM limitada (≤6 GB), los modelos Mobile de PP-OCRv5 ofrecen el mejor balance entre precisión y recursos, funcionando de manera estable sin errores de memoria.
|
||||
|
||||
3. **Viabilidad práctica**: Con GPU, el procesamiento de un documento completo (45 páginas) toma menos de 30 segundos, validando la aplicabilidad en entornos de producción donde el tiempo de respuesta es crítico.
|
||||
|
||||
4. **Escalabilidad**: La arquitectura de microservicios dockerizados utilizada para la validación GPU facilita el despliegue horizontal, permitiendo escalar el procesamiento según demanda.
|
||||
|
||||
Esta validación demuestra que la configuración optimizada mediante Ray Tune no solo mejora la precisión (CER: 7.78% → 1.49%) sino que, combinada con aceleración GPU, resulta prácticamente aplicable en escenarios de producción real.
|
||||
|
||||
@@ -10,10 +10,14 @@ Este Trabajo Fin de Máster ha demostrado que es posible mejorar significativame
|
||||
|
||||
El objetivo principal del trabajo era alcanzar un CER inferior al 2% en documentos académicos en español. Los resultados obtenidos confirman el cumplimiento de este objetivo:
|
||||
|
||||
**Tabla 39.** *Cumplimiento del objetivo de CER.*
|
||||
|
||||
| Métrica | Objetivo | Resultado |
|
||||
|---------|----------|-----------|
|
||||
| CER | < 2% | **1.49%** |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### Conclusiones Específicas
|
||||
|
||||
**Respecto a OE1 (Comparativa de soluciones OCR)**:
|
||||
@@ -34,7 +38,7 @@ El objetivo principal del trabajo era alcanzar un CER inferior al 2% en document
|
||||
**Respecto a OE4 (Optimización con Ray Tune)**:
|
||||
- Se ejecutaron 64 trials con el algoritmo OptunaSearch
|
||||
- El tiempo total del experimento fue aproximadamente 6 horas (en CPU)
|
||||
- La arquitectura basada en subprocesos permitió superar incompatibilidades entre Ray y PaddleOCR
|
||||
- La arquitectura basada en contenedores Docker permitió superar incompatibilidades entre Ray y los motores OCR, facilitando además la portabilidad y reproducibilidad
|
||||
|
||||
**Respecto a OE5 (Validación de la configuración)**:
|
||||
- Se validó la configuración óptima sobre el dataset completo de 24 páginas
|
||||
@@ -69,7 +73,7 @@ El objetivo principal del trabajo era alcanzar un CER inferior al 2% en document
|
||||
|
||||
3. **Ground truth automático**: La extracción automática del texto de referencia puede introducir errores en layouts complejos.
|
||||
|
||||
4. **Ejecución en CPU**: Los tiempos de procesamiento (~69s/página) limitan la aplicabilidad en escenarios de alto volumen.
|
||||
4. **Validación en entorno limitado**: Aunque se validó con GPU (126x más rápido que CPU, 0.55s/página), los experimentos se realizaron en hardware de consumo (RTX 3060). Hardware empresarial podría ofrecer mejor rendimiento.
|
||||
|
||||
5. **Parámetro no explorado**: `text_det_unclip_ratio` permaneció fijo en 0.0 durante todo el experimento.
|
||||
|
||||
@@ -83,8 +87,6 @@ El objetivo principal del trabajo era alcanzar un CER inferior al 2% en document
|
||||
|
||||
3. **Dataset ampliado**: Construir un corpus más amplio y diverso de documentos en español.
|
||||
|
||||
4. **Evaluación con GPU**: Medir tiempos de inferencia con aceleración GPU.
|
||||
|
||||
### Líneas de Investigación
|
||||
|
||||
1. **Transfer learning de hiperparámetros**: Investigar si las configuraciones óptimas para un tipo de documento transfieren a otros dominios.
|
||||
|
||||
@@ -8,61 +8,210 @@ El código fuente completo y los datos utilizados en este trabajo están disponi
|
||||
|
||||
El repositorio incluye:
|
||||
|
||||
- **Notebooks de experimentación**: Código completo de los experimentos realizados
|
||||
- **Scripts de evaluación**: Herramientas para evaluar modelos OCR
|
||||
- **Servicios OCR dockerizados**: PaddleOCR, DocTR, EasyOCR con soporte GPU
|
||||
- **Scripts de evaluación**: Herramientas para evaluar y comparar modelos OCR
|
||||
- **Scripts de ajuste**: Ray Tune con Optuna para optimización de hiperparámetros
|
||||
- **Dataset**: Imágenes y textos de referencia utilizados
|
||||
- **Resultados**: Archivos CSV con los resultados de los 64 trials de Ray Tune
|
||||
- **Resultados**: Archivos CSV con los resultados de los 64 trials por servicio
|
||||
|
||||
## A.2 Estructura del Repositorio
|
||||
|
||||
```mermaid
|
||||
---
|
||||
title: "Estructura del repositorio del proyecto"
|
||||
---
|
||||
flowchart LR
|
||||
root["MastersThesis/"] --> docs["docs/"]
|
||||
root --> src["src/"]
|
||||
root --> instructions["instructions/"]
|
||||
root --> scripts["Scripts generación"]
|
||||
|
||||
src --> nb1["paddle_ocr_fine_tune_unir_raytune.ipynb"]
|
||||
src --> py1["paddle_ocr_tuning.py"]
|
||||
src --> csv["raytune_paddle_subproc_results_*.csv"]
|
||||
|
||||
scripts --> gen1["generate_mermaid_figures.py"]
|
||||
scripts --> gen2["apply_content.py"]
|
||||
```
|
||||
|
||||
**Descripción de componentes:**
|
||||
|
||||
- **docs/**: Capítulos de la tesis en Markdown (estructura UNIR)
|
||||
- **src/**: Código fuente de experimentación
|
||||
- `paddle_ocr_fine_tune_unir_raytune.ipynb`: Notebook principal con 64 trials Ray Tune
|
||||
- `paddle_ocr_tuning.py`: Script CLI para evaluación OCR
|
||||
- `raytune_paddle_subproc_results_20251207_192320.csv`: Resultados de optimización
|
||||
- **instructions/**: Plantilla e instrucciones UNIR
|
||||
- **Scripts de generación**: `generate_mermaid_figures.py` y `apply_content.py` para generar el documento TFM
|
||||
MastersThesis/
|
||||
├── docs/ # Documentación de la tesis
|
||||
│ └── metrics/ # Métricas de rendimiento OCR
|
||||
│ ├── metrics.md # Resumen comparativo
|
||||
│ ├── metrics_paddle.md # Resultados PaddleOCR
|
||||
│ ├── metrics_doctr.md # Resultados DocTR
|
||||
│ └── metrics_easyocr.md # Resultados EasyOCR
|
||||
├── src/
|
||||
│ ├── paddle_ocr/ # Servicio PaddleOCR
|
||||
│ │ ├── Dockerfile.gpu # Imagen Docker GPU
|
||||
│ │ ├── Dockerfile.cpu # Imagen Docker CPU
|
||||
│ │ ├── docker-compose.yml # Configuración Docker
|
||||
│ │ └── main.py # API FastAPI
|
||||
│ ├── doctr_service/ # Servicio DocTR
|
||||
│ │ ├── Dockerfile.gpu
|
||||
│ │ ├── docker-compose.yml
|
||||
│ │ └── main.py
|
||||
│ ├── easyocr_service/ # Servicio EasyOCR
|
||||
│ │ ├── Dockerfile.gpu
|
||||
│ │ ├── docker-compose.yml
|
||||
│ │ └── main.py
|
||||
│ ├── dataset/ # Dataset de evaluación
|
||||
│ ├── raytune_ocr.py # Utilidades compartidas Ray Tune
|
||||
│ └── results/ # Resultados de ajuste CSV
|
||||
└── .gitea/workflows/ci.yaml # Pipeline CI/CD
|
||||
```
|
||||
|
||||
## A.3 Requisitos de Software
|
||||
|
||||
Para reproducir los experimentos se requieren las siguientes dependencias:
|
||||
### Sistema de Desarrollo
|
||||
|
||||
**Tabla A1.** *Especificaciones del sistema de desarrollo.*
|
||||
|
||||
| Componente | Especificación |
|
||||
|------------|----------------|
|
||||
| Sistema Operativo | Ubuntu 24.04.3 LTS |
|
||||
| CPU | AMD Ryzen 7 5800H |
|
||||
| RAM | 16 GB DDR4 |
|
||||
| GPU | NVIDIA RTX 3060 Laptop (5.66 GB VRAM) |
|
||||
| CUDA | 12.4 |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
### Dependencias
|
||||
|
||||
**Tabla A2.** *Dependencias del proyecto.*
|
||||
|
||||
| Componente | Versión |
|
||||
|------------|---------|
|
||||
| Python | 3.11.9 |
|
||||
| PaddlePaddle | 3.2.2 |
|
||||
| PaddleOCR | 3.3.2 |
|
||||
| Python | 3.12.3 |
|
||||
| Docker | 29.1.5 |
|
||||
| NVIDIA Container Toolkit | Requerido para GPU |
|
||||
| Ray | 2.52.1 |
|
||||
| Optuna | 4.6.0 |
|
||||
| jiwer | (última versión) |
|
||||
| PyMuPDF | (última versión) |
|
||||
| Optuna | 4.7.0 |
|
||||
|
||||
## A.4 Instrucciones de Ejecución
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
1. Clonar el repositorio
|
||||
2. Instalar dependencias: `pip install -r requirements.txt`
|
||||
3. Ejecutar el notebook `src/paddle_ocr_fine_tune_unir_raytune.ipynb`
|
||||
## A.4 Instrucciones de Ejecución de Servicios OCR
|
||||
|
||||
## A.5 Licencia
|
||||
### PaddleOCR (Puerto 8002)
|
||||
|
||||
**Imágenes Docker:**
|
||||
- GPU: `seryus.ddns.net/unir/paddle-ocr-gpu`
|
||||
- CPU: `seryus.ddns.net/unir/paddle-ocr-cpu`
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# GPU (recomendado)
|
||||
docker compose up -d
|
||||
|
||||
# CPU (más lento, 126x)
|
||||
docker compose -f docker-compose.cpu-registry.yml up -d
|
||||
```
|
||||
|
||||
### DocTR (Puerto 8003)
|
||||
|
||||
**Imagen Docker:** `seryus.ddns.net/unir/doctr-gpu`
|
||||
|
||||
```bash
|
||||
cd src/doctr_service
|
||||
|
||||
# GPU
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### EasyOCR (Puerto 8002)
|
||||
|
||||
**Imagen Docker:** `seryus.ddns.net/unir/easyocr-gpu`
|
||||
|
||||
```bash
|
||||
cd src/easyocr_service
|
||||
|
||||
# GPU
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### Verificar Estado del Servicio
|
||||
|
||||
```bash
|
||||
# Verificar salud del servicio
|
||||
curl http://localhost:8002/health
|
||||
|
||||
# Respuesta esperada:
|
||||
# {"status": "ok", "model_loaded": true, "gpu_name": "NVIDIA GeForce RTX 3060"}
|
||||
```
|
||||
|
||||
## A.5 Uso de la API OCR
|
||||
|
||||
### Evaluar Dataset Completo
|
||||
|
||||
```bash
|
||||
# PaddleOCR - Evaluación completa
|
||||
curl -X POST http://localhost:8002/evaluate_full \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"save_output": true
|
||||
}'
|
||||
```
|
||||
|
||||
### Evaluar con Hiperparámetros Optimizados
|
||||
|
||||
```bash
|
||||
# PaddleOCR con configuración óptima
|
||||
curl -X POST http://localhost:8002/evaluate_full \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": true,
|
||||
"use_doc_unwarping": false,
|
||||
"textline_orientation": true,
|
||||
"text_det_thresh": 0.0462,
|
||||
"text_det_box_thresh": 0.4862,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.5658,
|
||||
"save_output": true
|
||||
}'
|
||||
```
|
||||
|
||||
## A.6 Ajuste de Hiperparámetros con Ray Tune
|
||||
|
||||
### Ejecutar Ajuste
|
||||
|
||||
```bash
|
||||
cd src
|
||||
|
||||
# Activar entorno virtual
|
||||
source ../.venv/bin/activate
|
||||
|
||||
# PaddleOCR (64 muestras)
|
||||
python -c "
|
||||
from raytune_ocr import *
|
||||
|
||||
ports = [8002]
|
||||
check_workers(ports, 'PaddleOCR')
|
||||
trainable = create_trainable(ports, paddle_ocr_payload)
|
||||
results = run_tuner(trainable, PADDLE_OCR_SEARCH_SPACE, num_samples=64)
|
||||
analyze_results(results, prefix='raytune_paddle', config_keys=PADDLE_OCR_CONFIG_KEYS)
|
||||
"
|
||||
```
|
||||
|
||||
### Servicios y Puertos
|
||||
|
||||
**Tabla A3.** *Servicios Docker y puertos.*
|
||||
|
||||
| Servicio | Puerto | Script de Ajuste |
|
||||
|----------|--------|------------------|
|
||||
| PaddleOCR | 8002 | `paddle_ocr_payload` |
|
||||
| DocTR | 8003 | `doctr_payload` |
|
||||
| EasyOCR | 8002 | `easyocr_payload` |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
## A.7 Métricas de Rendimiento
|
||||
|
||||
Los resultados detallados de las evaluaciones y ajustes de hiperparámetros se encuentran en:
|
||||
|
||||
- [Métricas Generales](metrics/metrics.md) - Comparativa de los tres servicios
|
||||
- [PaddleOCR](metrics/metrics_paddle.md) - Mejor precisión (7.76% CER baseline, **1.49% optimizado**)
|
||||
- [DocTR](metrics/metrics_doctr.md) - Más rápido (0.50s/página)
|
||||
- [EasyOCR](metrics/metrics_easyocr.md) - Balance intermedio
|
||||
|
||||
### Resumen de Resultados
|
||||
|
||||
**Tabla A4.** *Resumen de resultados del benchmark por servicio.*
|
||||
|
||||
| Servicio | CER Base | CER Ajustado | Mejora |
|
||||
|----------|----------|--------------|--------|
|
||||
| **PaddleOCR** | 8.85% | **7.72%** | 12.8% |
|
||||
| DocTR | 12.06% | 12.07% | 0% |
|
||||
| EasyOCR | 11.23% | 11.14% | 0.8% |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
## A.8 Licencia
|
||||
|
||||
El código se distribuye bajo licencia MIT.
|
||||
|
||||
242
docs/metrics/metrics.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# Métricas de Rendimiento OCR
|
||||
|
||||
**Fecha de Benchmark:** 2026-01-19
|
||||
**Dataset de Prueba:** 45 páginas (2 PDFs)
|
||||
|
||||
## Especificaciones del Sistema
|
||||
|
||||
| Componente | Especificación |
|
||||
|------------|----------------|
|
||||
| **Sistema Operativo** | Ubuntu 24.04.3 LTS (Noble) |
|
||||
| **Kernel** | 6.14.0-37-generic |
|
||||
| **CPU** | AMD Ryzen 7 5800H with Radeon Graphics |
|
||||
| **RAM** | 16 GB DDR4 |
|
||||
| **GPU** | NVIDIA GeForce RTX 3060 Laptop GPU |
|
||||
| **VRAM** | 5.66 GB |
|
||||
| **CUDA** | 12.4 |
|
||||
|
||||
## Justificación de Ejecución Local vs Cloud
|
||||
|
||||
### Costos de Cloud GPU
|
||||
|
||||
| Plataforma | GPU | Costo/Hora | Costo Mensual |
|
||||
|------------|-----|------------|---------------|
|
||||
| **AWS EC2 g4dn.xlarge** | NVIDIA T4 (16 GB) | $0.526 | ~$384 |
|
||||
| **Google Colab Pro** | T4/P100 | ~$1.30 | $10 + CU extras |
|
||||
| **Google Colab Pro+** | T4/V100/A100 | ~$1.30 | $50 + CU extras |
|
||||
|
||||
### Análisis de Costos para Este Proyecto
|
||||
|
||||
| Tarea | Tiempo GPU | Costo AWS | Costo Colab Pro |
|
||||
|-------|------------|-----------|-----------------|
|
||||
| Ajuste hiperparámetros (64×3 trials) | ~3 horas | ~$1.58 | ~$3.90 |
|
||||
| Evaluación completa (45 páginas) | ~5 min | ~$0.04 | ~$0.11 |
|
||||
| Desarrollo/debug (20 horas/mes) | 20 horas | ~$10.52 | ~$26.00 |
|
||||
|
||||
### Ventajas de Ejecución Local
|
||||
|
||||
1. **Costo cero de GPU**: La RTX 3060 ya está disponible en el equipo de desarrollo
|
||||
2. **Sin límites de tiempo**: AWS y Colab tienen timeouts de sesión
|
||||
3. **Acceso instantáneo**: Sin tiempo de aprovisionamiento de instancias
|
||||
4. **Almacenamiento local**: Dataset y resultados en disco sin costos de transferencia
|
||||
5. **Iteración rápida**: Reinicio inmediato de contenedores Docker
|
||||
|
||||
### Conclusión
|
||||
|
||||
Para un proyecto de investigación con múltiples iteraciones de ajuste de hiperparámetros y desarrollo, **la ejecución local ahorra ~$50-100/mes** comparado con cloud, además de ofrecer mayor flexibilidad y velocidad de iteración
|
||||
|
||||
## Resumen Ejecutivo
|
||||
|
||||
| Servicio | CER | WER | Tiempo/Página | Tiempo Total | VRAM |
|
||||
|----------|-----|-----|---------------|--------------|------|
|
||||
| **PaddleOCR (Mobile)** | **7.76%** | **11.62%** | 0.58s | 32.0s | 0.06 GB |
|
||||
| EasyOCR | 11.23% | 36.36% | 1.88s | 88.5s | ~2 GB |
|
||||
| DocTR | 12.06% | 42.01% | 0.50s | 28.4s | ~1 GB |
|
||||
|
||||
> **Ganador:** PaddleOCR (Mobile) - Mejor precisión (7.76% CER) con velocidad competitiva.
|
||||
|
||||
## Fases Experimentales
|
||||
|
||||
Este documento presenta resultados de dos fases experimentales distintas realizadas durante el desarrollo del TFM. La primera fase corresponde a la optimización de hiperparámetros utilizando Ray Tune, ejecutada en CPU debido a las limitaciones de hardware iniciales. La segunda fase corresponde a la validación práctica con aceleración GPU para evaluar la viabilidad en escenarios de producción.
|
||||
|
||||
**Tabla.** *Fases experimentales y sus características.*
|
||||
|
||||
| Fase | Dataset | Hardware | Resultado Principal |
|
||||
|------|---------|----------|---------------------|
|
||||
| Optimización (CPU) | 24 páginas | CPU | CER: 7.78% → **1.49%** (80.9% mejora) |
|
||||
| Validación (GPU) | 45 páginas | RTX 3060 | CER: 7.76% baseline, 0.55s/página |
|
||||
|
||||
*Fuente: Elaboración propia.*
|
||||
|
||||
La fase de optimización representa el **resultado principal del TFM** (CER 1.49%, precisión 98.51%). La fase de validación GPU confirma la viabilidad práctica del enfoque, demostrando una aceleración de 126x respecto a CPU.
|
||||
|
||||
## Comparación de Servicios OCR
|
||||
|
||||
### Comparación de Precisión (CER - menor es mejor)
|
||||
|
||||
```mermaid
|
||||
xychart-beta
|
||||
title "Tasa de Error de Caracteres por Servicio"
|
||||
x-axis ["PaddleOCR", "EasyOCR", "DocTR"]
|
||||
y-axis "CER %" 0 --> 15
|
||||
bar [7.76, 11.23, 12.06]
|
||||
```
|
||||
|
||||
### Comparación de Velocidad (Tiempo por Página)
|
||||
|
||||
```mermaid
|
||||
xychart-beta
|
||||
title "Tiempo de Procesamiento por Página (segundos)"
|
||||
x-axis ["DocTR", "PaddleOCR", "EasyOCR"]
|
||||
y-axis "Segundos" 0 --> 2
|
||||
bar [0.50, 0.58, 1.88]
|
||||
```
|
||||
|
||||
### Flujo de Recomendación de Servicio
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A{Prioridad?}
|
||||
A -->|Precisión| B[PaddleOCR]
|
||||
A -->|Velocidad| C[DocTR]
|
||||
A -->|Balance| B
|
||||
B --> D["7.76% CER<br/>0.58s/página"]
|
||||
C --> E["12.06% CER<br/>0.50s/página"]
|
||||
```
|
||||
|
||||
### Hallazgos Clave
|
||||
|
||||
1. **Mejor Precisión**: PaddleOCR logra las tasas de error más bajas (7.76% CER, 11.62% WER)
|
||||
2. **Mejor Velocidad**: DocTR es el más rápido (0.50s/página), pero 55% menos preciso que PaddleOCR
|
||||
3. **EasyOCR**: El más lento (3.8x más lento que PaddleOCR) con precisión intermedia
|
||||
4. **Eficiencia VRAM**: PaddleOCR Mobile usa solo 0.06 GB
|
||||
|
||||
## Configuración de Modelos
|
||||
|
||||
| Servicio | Detección | Reconocimiento | ¿Correcto para Español? |
|
||||
|----------|-----------|----------------|-------------------------|
|
||||
| **PaddleOCR** | PP-OCRv5_mobile_det | PP-OCRv5_mobile_rec | Sí |
|
||||
| **DocTR** | db_resnet50 | crnn_vgg16_bn | No (entrenado en inglés/francés) |
|
||||
| **EasyOCR** | CRAFT | latin_g2.pth | Sí |
|
||||
|
||||
### Notas sobre Modelos
|
||||
|
||||
- **PaddleOCR**: Modelos server más precisos disponibles pero requieren >5.3 GB VRAM (OOM en RTX 3060)
|
||||
- **DocTR**: Se probó modelo `parseq` como alternativa, resultó 2% peor CER y 2x más lento. El problema de diacríticos es de datos de entrenamiento, no de arquitectura
|
||||
- **EasyOCR**: Modelo `latin_g2.pth` es correcto. Los problemas son del detector CRAFT, no del reconocimiento
|
||||
|
||||
> **Conclusión sobre Fine-tuning:** Para documentos en español, **usar PaddleOCR directamente**. El fine-tuning de DocTR/EasyOCR no se justifica dado que PaddleOCR ya ofrece 31-36% mejor precisión sin configuración adicional.
|
||||
|
||||
## Análisis de Errores (del debugset)
|
||||
|
||||
### PaddleOCR (Mejor - 7.76% CER)
|
||||
- **Fortalezas**: Preserva estructura de líneas, maneja bien acentos españoles
|
||||
- **Problemas**: Errores menores de espaciado, diferencias ocasionales de mayúsculas en acentos
|
||||
- **Mejorable**: Sí - el ajuste de hiperparámetros probablemente ayude
|
||||
|
||||
### DocTR (Peor WER - 42.01%)
|
||||
- **Problema Crítico**: Colapsa todo el texto en líneas únicas (pierde estructura)
|
||||
- **Problema de Acentos**: Omite diacríticos ("Indice" vs "Índice")
|
||||
- **Mejorable**: Parcialmente - el problema de estructura puede ser a nivel de modelo
|
||||
|
||||
### EasyOCR (36.36% WER)
|
||||
- **Problema Crítico**: Inserciones espurias de caracteres (";", "g", "0", "1")
|
||||
- **Pérdida de Estructura**: Saltos de línea no preservados
|
||||
- **Mejorable**: Sí - umbrales de detección demasiado sensibles
|
||||
|
||||
## Comparación de Modelos PaddleOCR (RTX 3060)
|
||||
|
||||
| Métrica | Modelos Server | Modelos Mobile | Ganador |
|
||||
|---------|----------------|----------------|---------|
|
||||
| **Tiempo** | 2.47s | 1.08s | Mobile (2.3x más rápido) |
|
||||
| **CER** | 1.82% | 1.42% | Mobile |
|
||||
| **WER** | 16.14% | 12.20% | Mobile |
|
||||
| **VRAM** | 5.3 GB (OOM en página 2) | 0.06 GB | Mobile |
|
||||
| **Multi-página** | No (OOM) | Sí | Mobile |
|
||||
|
||||
> **Conclusión:** Se recomiendan los modelos Mobile - más rápidos, más precisos, caben en VRAM.
|
||||
|
||||
## Rendimiento CPU vs GPU (PaddleOCR)
|
||||
|
||||
Datos de `raytune_paddle_subproc_results_20251207_192320.csv` (CPU) vs RTX 3060 (GPU):
|
||||
|
||||
| Métrica | CPU | GPU (RTX 3060) | Aceleración |
|
||||
|---------|-----|----------------|-------------|
|
||||
| **Tiempo/Página** | 69.4s | 0.55s | **126x más rápido** |
|
||||
| **Mejor CER** | 1.15% | 0.79% | GPU mejor |
|
||||
| **45 páginas** | ~52 min | ~25 seg | **126x más rápido** |
|
||||
|
||||
```mermaid
|
||||
xychart-beta
|
||||
title "Tiempo de Procesamiento por Página: CPU vs GPU"
|
||||
x-axis ["CPU", "GPU (RTX 3060)"]
|
||||
y-axis "Segundos" 0 --> 80
|
||||
bar [69.4, 0.55]
|
||||
```
|
||||
|
||||
> **Conclusión:** GPU es esencial para uso práctico de OCR. El procesamiento en CPU es 126x más lento, haciéndolo impráctico para procesamiento por lotes.
|
||||
|
||||
## Datos Crudos del Benchmark
|
||||
|
||||
```json
|
||||
{
|
||||
"timestamp": "2026-01-19T11:00:00.000000",
|
||||
"platform": {
|
||||
"gpu": "NVIDIA GeForce RTX 3060 Laptop GPU",
|
||||
"vram": "5.66 GB",
|
||||
"cuda": "12.4"
|
||||
},
|
||||
"services": {
|
||||
"PaddleOCR_Mobile": {
|
||||
"port": 8002,
|
||||
"models": {"det": "PP-OCRv5_mobile_det", "rec": "PP-OCRv5_mobile_rec"},
|
||||
"vram_used": "0.06 GB",
|
||||
"results": {
|
||||
"CER": 0.0776,
|
||||
"WER": 0.1162,
|
||||
"PAGES": 45,
|
||||
"TIME_PER_PAGE": 0.58,
|
||||
"TOTAL_TIME": 32.0
|
||||
}
|
||||
},
|
||||
"DocTR": {
|
||||
"port": 8003,
|
||||
"models": {"det": "db_resnet50", "rec": "crnn_vgg16_bn"},
|
||||
"vram_used": "~1 GB",
|
||||
"results": {
|
||||
"CER": 0.1206,
|
||||
"WER": 0.4201,
|
||||
"PAGES": 45,
|
||||
"TIME_PER_PAGE": 0.50,
|
||||
"TOTAL_TIME": 28.4
|
||||
}
|
||||
},
|
||||
"EasyOCR": {
|
||||
"port": 8002,
|
||||
"languages": ["es", "en"],
|
||||
"vram_used": "~2 GB",
|
||||
"results": {
|
||||
"CER": 0.1123,
|
||||
"WER": 0.3636,
|
||||
"PAGES": 45,
|
||||
"TIME_PER_PAGE": 1.88,
|
||||
"TOTAL_TIME": 88.5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Resultados de Ajuste de Hiperparámetros
|
||||
|
||||
Resultados individuales de ajuste por servicio (64 muestras cada uno, páginas 5-10):
|
||||
|
||||
- [Resultados de Ajuste PaddleOCR](metrics_paddle.md)
|
||||
- [Resultados de Ajuste DocTR](metrics_doctr.md)
|
||||
- [Resultados de Ajuste EasyOCR](metrics_easyocr.md)
|
||||
|
||||
## Próximos Pasos
|
||||
|
||||
1. ~~Ajuste de Hiperparámetros~~ - Completado (64 muestras por servicio)
|
||||
2. **Evaluación del Dataset Completo** - Ejecutar mejores configuraciones en las 45 páginas
|
||||
3. **Comparar** - Rendimiento base vs ajustado en dataset completo
|
||||
180
docs/metrics/metrics_doctr.md
Normal file
@@ -0,0 +1,180 @@
|
||||
# Resultados de Ajuste de Hiperparámetros DocTR
|
||||
|
||||
**Fecha de Ajuste:** 2026-01-19
|
||||
**Plataforma:** NVIDIA RTX 3060 Laptop GPU
|
||||
**Muestras:** 64
|
||||
**Páginas de Prueba:** 5-10 (primer documento)
|
||||
|
||||
### ¿Por Qué Solo 5 Páginas?
|
||||
|
||||
Usamos solo 5 páginas (páginas 5-10) para el ajuste de hiperparámetros porque:
|
||||
|
||||
1. **Velocidad**: 64 pruebas × 5 páginas = 320 evaluaciones de página. Con 45 páginas, serían 2,880 evaluaciones (~9x más tiempo)
|
||||
2. **Eficiencia de recursos**: Cada prueba toma ~2-20 segundos en GPU
|
||||
|
||||
**Riesgo de Sobreajuste**: El ajuste de hiperparámetros en un subconjunto pequeño PUEDE causar sobreajuste. Nuestros resultados confirman esto:
|
||||
- Subconjunto de ajuste: **38% mejora** (7.43% CER)
|
||||
- Dataset completo: **0% mejora** (12.07% CER)
|
||||
|
||||
La falta total de mejora en el dataset completo indica sobreajuste severo a las páginas 5-10, combinado con limitaciones a nivel de modelo (manejo de diacríticos, estructura de líneas) que los hiperparámetros no pueden corregir.
|
||||
|
||||
## Evaluación del Dataset Completo (45 páginas)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 12.06% | 12.07% | **0%** |
|
||||
| **WER** | 42.01% | 42.26% | **0%** |
|
||||
| Tiempo/Página | 0.33s | 0.34s | - |
|
||||
|
||||
> **Nota:** El ajuste no generalizó al dataset completo. Los problemas de DocTR parecen ser a nivel de modelo (diacríticos, estructura de líneas).
|
||||
|
||||
## Resultados del Subconjunto de Ajuste (páginas 5-10)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 12.06% | **7.43%** | **38%** |
|
||||
| **WER** | 42.01% | **35.23%** | **16%** |
|
||||
|
||||
> Nota: Las mejoras en el subconjunto de ajuste no se transfirieron al dataset completo.
|
||||
|
||||
## Mejor Configuración Encontrada
|
||||
|
||||
```json
|
||||
{
|
||||
"assume_straight_pages": true,
|
||||
"straighten_pages": false,
|
||||
"preserve_aspect_ratio": false,
|
||||
"symmetric_pad": false,
|
||||
"disable_page_orientation": true,
|
||||
"disable_crop_orientation": false,
|
||||
"resolve_lines": true,
|
||||
"resolve_blocks": false,
|
||||
"paragraph_break": 0.0977
|
||||
}
|
||||
```
|
||||
|
||||
## Hallazgos Clave
|
||||
|
||||
1. **straighten_pages: false** - ¡Crítico! Configurarlo en true causa ~79% CER (catastrófico)
|
||||
2. **assume_straight_pages: true** - Funciona bien con escaneos de documentos rectos
|
||||
3. **resolve_lines: true** - Ayuda a mantener la estructura de líneas
|
||||
4. **disable_page_orientation: true** - Evita rotación innecesaria
|
||||
|
||||
## Impacto de Parámetros
|
||||
|
||||
Parámetros que mejoraron la precisión:
|
||||
- `straighten_pages=False` absolutamente crítico
|
||||
- `assume_straight_pages=True` en los mejores resultados
|
||||
- `resolve_lines=True` mantiene la estructura del texto
|
||||
|
||||
Parámetros que perjudicaron la precisión:
|
||||
- `straighten_pages=True` catastróficamente malo (~79% CER)
|
||||
- `resolve_blocks=True` ligeramente peor que False
|
||||
|
||||
## Limitaciones Conocidas
|
||||
|
||||
Incluso con ajuste, DocTR todavía tiene problemas:
|
||||
- Omite diacríticos (tildes) - probablemente problema a nivel de modelo
|
||||
- Todavía tiene mayor WER que PaddleOCR debido a problemas de estructura
|
||||
|
||||
## Evaluación del Dataset Completo
|
||||
|
||||
**Estado:** Completado
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8003/evaluate_full \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"assume_straight_pages": true,
|
||||
"straighten_pages": false,
|
||||
"preserve_aspect_ratio": false,
|
||||
"symmetric_pad": false,
|
||||
"disable_page_orientation": true,
|
||||
"disable_crop_orientation": false,
|
||||
"resolve_lines": true,
|
||||
"resolve_blocks": false,
|
||||
"paragraph_break": 0.0977,
|
||||
"save_output": true
|
||||
}'
|
||||
```
|
||||
|
||||
**Resultado:** CER 12.07%, WER 42.26%, 0.34s/página (sin mejora sobre la base)
|
||||
|
||||
**Conclusión:** Los problemas de precisión de DocTR son a nivel de modelo, no ajustables por hiperparámetros.
|
||||
|
||||
## Configuración del Modelo
|
||||
|
||||
### Modelo Actual
|
||||
|
||||
| Componente | Modelo | Estado |
|
||||
|------------|--------|--------|
|
||||
| Detección | `db_resnet50` | Correcto |
|
||||
| Reconocimiento | `crnn_vgg16_bn` | Mejor opción disponible |
|
||||
|
||||
El modelo `crnn_vgg16_bn` fue entrenado principalmente con datasets en inglés y francés, lo que explica la pérdida sistemática de diacríticos españoles (á, é, í, ó, ú, ñ).
|
||||
|
||||
### Prueba con Modelo Alternativo (parseq)
|
||||
|
||||
Se probó el modelo `parseq` (transformer) como alternativa:
|
||||
|
||||
| Métrica | crnn_vgg16_bn | parseq | Resultado |
|
||||
|---------|---------------|--------|-----------|
|
||||
| **CER** | 12.07% | 12.32% | **+2% peor** |
|
||||
| **WER** | 42.26% | 44.0% | **+4% peor** |
|
||||
| Tiempo/Página | 0.34s | 0.70s | 2x más lento |
|
||||
| Diacríticos | No | No | Sin mejora |
|
||||
|
||||
**Conclusión:** El modelo `parseq` no mejora los diacríticos españoles y es más lento. Todos los modelos pre-entrenados de DocTR fueron entrenados principalmente en inglés/francés. Para español se requeriría **fine-tuning con corpus español**.
|
||||
|
||||
### No Se Recomienda Cambio de Modelo
|
||||
|
||||
Mantener `crnn_vgg16_bn` (más rápido, ligeramente mejor precisión). Los problemas de diacríticos son de **datos de entrenamiento**, no de arquitectura del modelo
|
||||
|
||||
## Análisis de Errores del Debugset
|
||||
|
||||
### Errores Observados
|
||||
|
||||
| Ground Truth | DocTR | Tipo de Error |
|
||||
|--------------|-------|---------------|
|
||||
| `bibliográficas` | `bibliograficas` | Diacrítico omitido |
|
||||
| `sección` | `seccion` | Diacrítico omitido |
|
||||
| `Máster` | `Master` | Diacrítico omitido |
|
||||
| `información` | `informacion` | Diacrítico omitido |
|
||||
| `o amplían` | `O amplian` | Mayúscula incorrecta |
|
||||
| Líneas separadas | Todo en una línea | **Estructura perdida** |
|
||||
|
||||
### Problemas Críticos
|
||||
|
||||
1. **Pérdida total de estructura**: Todo el texto de la página se colapsa en una sola línea
|
||||
2. **Omisión sistemática de diacríticos**: TODOS los acentos españoles se pierden
|
||||
3. **Errores de capitalización**: `o` → `O` en medio de oraciones
|
||||
|
||||
### ¿Fine-tuning Recomendado?
|
||||
|
||||
**Sí, para diacríticos.** El modelo CRNN de DocTR fue entrenado principalmente con textos en inglés y francés, lo que explica la omisión sistemática de acentos españoles.
|
||||
|
||||
| Problema | ¿Fine-tuning ayuda? | Explicación |
|
||||
|----------|---------------------|-------------|
|
||||
| Diacríticos | **Sí** | Entrenar con corpus español enseñaría al modelo los acentos |
|
||||
| Estructura de líneas | **No** | Problema arquitectural del modelo, no de entrenamiento |
|
||||
| Capitalización | **Parcial** | Podría mejorar con datos de entrenamiento adecuados |
|
||||
|
||||
### Cómo Fine-Tunear DocTR
|
||||
|
||||
```python
|
||||
from doctr.models import recognition_predictor
|
||||
from doctr.datasets import RecognitionDataset
|
||||
|
||||
# Cargar dataset español
|
||||
train_set = RecognitionDataset(
|
||||
img_folder="path/to/spanish/images",
|
||||
labels_path="path/to/spanish/labels.json"
|
||||
)
|
||||
|
||||
# Fine-tune el modelo de reconocimiento
|
||||
model = recognition_predictor(pretrained=True)
|
||||
# ... configurar entrenamiento
|
||||
```
|
||||
|
||||
Documentación: https://mindee.github.io/doctr/using_doctr/custom_models_training.html
|
||||
182
docs/metrics/metrics_easyocr.md
Normal file
@@ -0,0 +1,182 @@
|
||||
# Resultados de Ajuste de Hiperparámetros EasyOCR
|
||||
|
||||
**Fecha de Ajuste:** 2026-01-19
|
||||
**Plataforma:** NVIDIA RTX 3060 Laptop GPU
|
||||
**Muestras:** 64
|
||||
**Páginas de Prueba:** 5-10 (primer documento)
|
||||
|
||||
### ¿Por Qué Solo 5 Páginas?
|
||||
|
||||
Usamos solo 5 páginas (páginas 5-10) para el ajuste de hiperparámetros porque:
|
||||
|
||||
1. **Velocidad**: 64 pruebas × 5 páginas = 320 evaluaciones de página. Con 45 páginas, serían 2,880 evaluaciones (~9x más tiempo)
|
||||
2. **Eficiencia de recursos**: Cada prueba toma ~10-20 segundos en GPU
|
||||
|
||||
**Riesgo de Sobreajuste**: El ajuste de hiperparámetros en un subconjunto pequeño PUEDE causar sobreajuste. Nuestros resultados confirman esto:
|
||||
- Subconjunto de ajuste: **48% mejora** (5.83% CER)
|
||||
- Dataset completo: **0.8% mejora** (11.14% CER)
|
||||
|
||||
La mejora mínima en el dataset completo indica que los hiperparámetros se sobreajustaron a las páginas 5-10. Los problemas de EasyOCR (detecciones espurias, pérdida de estructura) también pueden ser parcialmente a nivel de modelo.
|
||||
|
||||
## Evaluación del Dataset Completo (45 páginas)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 11.23% | 11.14% | **0.8%** |
|
||||
| **WER** | 36.36% | 36.85% | **-1.3%** |
|
||||
| Tiempo/Página | 1.84s | 1.94s | - |
|
||||
|
||||
> **Nota:** El ajuste mostró mejora mínima en el dataset completo. Los problemas de EasyOCR pueden ser a nivel de modelo.
|
||||
|
||||
## Resultados del Subconjunto de Ajuste (páginas 5-10)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 11.23% | **5.83%** | **48%** |
|
||||
| **WER** | 36.36% | **26.33%** | **28%** |
|
||||
|
||||
> Nota: Las grandes mejoras en el subconjunto de ajuste no se transfirieron al dataset completo.
|
||||
|
||||
## Mejor Configuración Encontrada
|
||||
|
||||
```json
|
||||
{
|
||||
"text_threshold": 0.6647,
|
||||
"low_text": 0.4247,
|
||||
"link_threshold": 0.2184,
|
||||
"slope_ths": 0.1629,
|
||||
"ycenter_ths": 0.7994,
|
||||
"height_ths": 0.6437,
|
||||
"width_ths": 0.6065,
|
||||
"add_margin": 0.1462,
|
||||
"contrast_ths": 0.1671,
|
||||
"adjust_contrast": 0.6416,
|
||||
"decoder": "greedy",
|
||||
"beamWidth": 7,
|
||||
"min_size": 10
|
||||
}
|
||||
```
|
||||
|
||||
## Hallazgos Clave
|
||||
|
||||
1. **decoder: greedy** - Consistentemente mejor que beamsearch para este dataset
|
||||
2. **Mayor text_threshold (0.66)** - Reduce detecciones espurias
|
||||
3. **min_size: 10** - Filtra artefactos de ruido pequeños
|
||||
4. **Umbrales moderados** - Sensibilidad de detección balanceada
|
||||
|
||||
## Impacto de Parámetros
|
||||
|
||||
Parámetros que mejoraron la precisión:
|
||||
- `decoder="greedy"` consistentemente superó a beamsearch
|
||||
- Mayor `text_threshold` (0.6-0.8) redujo el ruido
|
||||
- `min_size >= 5` ayudó a filtrar artefactos
|
||||
|
||||
Parámetros que perjudicaron la precisión:
|
||||
- `decoder="beamsearch"` causó ~35-40% CER en muchas pruebas
|
||||
- `text_threshold` muy bajo (<0.4) detectó demasiado ruido
|
||||
- `min_size` alto (>15) omitió algo de texto
|
||||
|
||||
## Comparación con Problemas de Base
|
||||
|
||||
Problemas originales identificados en el debugset:
|
||||
- Inserciones espurias de caracteres - **Mejorado** con umbrales más altos
|
||||
- Pérdida de estructura - Todavía presente pero menos severa
|
||||
|
||||
## Evaluación del Dataset Completo
|
||||
|
||||
**Estado:** Completado
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8002/evaluate_full \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"text_threshold": 0.6647,
|
||||
"low_text": 0.4247,
|
||||
"link_threshold": 0.2184,
|
||||
"slope_ths": 0.1629,
|
||||
"ycenter_ths": 0.7994,
|
||||
"height_ths": 0.6437,
|
||||
"width_ths": 0.6065,
|
||||
"add_margin": 0.1462,
|
||||
"contrast_ths": 0.1671,
|
||||
"adjust_contrast": 0.6416,
|
||||
"decoder": "greedy",
|
||||
"beamWidth": 7,
|
||||
"min_size": 10,
|
||||
"save_output": true
|
||||
}'
|
||||
```
|
||||
|
||||
**Resultado:** CER 11.14%, WER 36.85%, 1.94s/página (mejora mínima)
|
||||
|
||||
**Conclusión:** El ajuste de EasyOCR proporcionó mejora insignificante en el dataset completo.
|
||||
|
||||
## Configuración del Modelo
|
||||
|
||||
### Modelo Actual (Correcto para Español)
|
||||
|
||||
| Componente | Modelo | Estado |
|
||||
|------------|--------|--------|
|
||||
| Detección | CRAFT | Correcto |
|
||||
| Reconocimiento | `latin_g2.pth` | Correcto para español |
|
||||
| Idiomas | `es,en` | Correcto |
|
||||
|
||||
El modelo `latin_g2.pth` está optimizado para idiomas con escritura latina incluyendo español. **El modelo de reconocimiento es correcto** - los problemas observados (caracteres espurios `0`, `;`, `g`) son del **detector CRAFT**, no del modelo de reconocimiento.
|
||||
|
||||
### No Se Requiere Cambio de Modelo
|
||||
|
||||
A diferencia de DocTR, EasyOCR usa el modelo correcto para español. Los problemas son de detección (umbrales del CRAFT), no de reconocimiento.
|
||||
|
||||
## Análisis de Errores del Debugset
|
||||
|
||||
### Errores Observados
|
||||
|
||||
| Ground Truth | EasyOCR | Tipo de Error |
|
||||
|--------------|---------|---------------|
|
||||
| `o figura` | `0 figura` | Letra `o` → número `0` |
|
||||
| `tabla o figura` | `tabla 0 figura` | Letra `o` → número `0` |
|
||||
| `grupal,` | `grupal;` | Coma → punto y coma |
|
||||
| `páginas,` | `páginas;` | Puntuación incorrecta |
|
||||
| (ninguno) | `g`, `1`, `2` | **Caracteres espurios insertados** |
|
||||
| Líneas separadas | Todo en una línea | **Estructura perdida** |
|
||||
|
||||
### Problemas Críticos
|
||||
|
||||
1. **Caracteres espurios**: El detector CRAFT inserta caracteres falsos (`g`, `1`, `2`, `;`) que no existen en el documento
|
||||
2. **Confusión letra/número**: Consistentemente confunde `o` con `0`
|
||||
3. **Puntuación incorrecta**: Reemplaza comas por punto y coma
|
||||
4. **Pérdida de estructura**: Todo el texto se colapsa en una línea
|
||||
|
||||
### ¿Fine-tuning Recomendado?
|
||||
|
||||
**Sí.** EasyOCR tiene problemas significativos que podrían mejorarse con fine-tuning:
|
||||
|
||||
| Problema | ¿Fine-tuning ayuda? | Explicación |
|
||||
|----------|---------------------|-------------|
|
||||
| Caracteres espurios | **Sí** | El detector CRAFT puede entrenarse para reducir falsos positivos |
|
||||
| Confusión `o`/`0` | **Sí** | El modelo de reconocimiento aprendería del contexto español |
|
||||
| Puntuación | **Sí** | Corpus español enseñaría patrones de puntuación correctos |
|
||||
| Estructura | **Parcial** | Depende de parámetros de agrupación de texto |
|
||||
|
||||
### Cómo Fine-Tunear EasyOCR
|
||||
|
||||
EasyOCR permite fine-tuning del modelo de reconocimiento:
|
||||
|
||||
```bash
|
||||
# 1. Preparar dataset en formato EasyOCR
|
||||
# Estructura: images/ + labels.txt (imagen<tab>texto)
|
||||
|
||||
# 2. Entrenar modelo de reconocimiento
|
||||
python train.py \
|
||||
--train_data ./train_data \
|
||||
--valid_data ./valid_data \
|
||||
--lang_list es en \
|
||||
--saved_model ./custom_model
|
||||
```
|
||||
|
||||
Documentación: https://github.com/JaidedAI/EasyOCR/blob/master/custom_model.md
|
||||
|
||||
### Alternativa Recomendada
|
||||
|
||||
Dado el CER de 11.14% y los problemas fundamentales de EasyOCR, se recomienda **usar PaddleOCR** (7.72% CER) en lugar de invertir esfuerzo en fine-tuning de EasyOCR
|
||||
141
docs/metrics/metrics_paddle.md
Normal file
@@ -0,0 +1,141 @@
|
||||
# Resultados de Ajuste de Hiperparámetros PaddleOCR
|
||||
|
||||
> **Nota:** Los resultados de este documento corresponden a la fase de validación GPU con 45 páginas. El resultado oficial del TFM es **CER 1.49%** obtenido en la validación final de 24 páginas con la configuración optimizada (ver `docs/04_desarrollo_especifico.md`).
|
||||
|
||||
**Fecha de Ajuste:** 2026-01-19
|
||||
**Plataforma:** NVIDIA RTX 3060 Laptop GPU
|
||||
**Muestras:** 64
|
||||
**Páginas de Prueba:** 5-10 (primer documento)
|
||||
|
||||
### ¿Por Qué Solo 5 Páginas?
|
||||
|
||||
Usamos solo 5 páginas (páginas 5-10) para el ajuste de hiperparámetros porque:
|
||||
|
||||
1. **Velocidad**: 64 pruebas × 5 páginas = 320 evaluaciones de página. Con 45 páginas, serían 2,880 evaluaciones (~9x más tiempo)
|
||||
2. **Eficiencia de recursos**: Cada prueba toma ~3-10 segundos en GPU; el dataset completo tomaría ~1 hora por prueba en CPU
|
||||
|
||||
**Riesgo de Sobreajuste**: El ajuste de hiperparámetros en un subconjunto pequeño PUEDE causar sobreajuste. Nuestros resultados confirman esto:
|
||||
- Subconjunto de ajuste: **90% mejora** (0.79% CER)
|
||||
- Dataset completo: **12.8% mejora** (7.72% CER)
|
||||
|
||||
La diferencia dramática muestra que los hiperparámetros se sobreajustaron parcialmente a las páginas 5-10. Un subconjunto de ajuste más grande (ej. 15-20 páginas) podría producir parámetros que generalicen mejor, pero aumentaría el tiempo de ajuste proporcionalmente.
|
||||
|
||||
## Evaluación del Dataset Completo (45 páginas)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 8.85% | **7.72%** | **12.8%** |
|
||||
| **WER** | 13.05% | **11.40%** | **12.6%** |
|
||||
| Tiempo/Página | 0.51s | 0.55s | - |
|
||||
|
||||
## Resultados del Subconjunto de Ajuste (páginas 5-10)
|
||||
|
||||
| Métrica | Base | Ajustado | Mejora |
|
||||
|---------|------|----------|--------|
|
||||
| **CER** | 7.76% | **0.79%** | **90%** |
|
||||
| **WER** | 11.62% | **7.78%** | **33%** |
|
||||
|
||||
> Nota: El subconjunto de ajuste mostró mayores mejoras, sugiriendo que algunos hiperparámetros son específicos de la página.
|
||||
|
||||
## Mejor Configuración Encontrada
|
||||
|
||||
```json
|
||||
{
|
||||
"use_doc_orientation_classify": true,
|
||||
"use_doc_unwarping": false,
|
||||
"textline_orientation": true,
|
||||
"text_det_thresh": 0.0462,
|
||||
"text_det_box_thresh": 0.4862,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.5658
|
||||
}
|
||||
```
|
||||
|
||||
## Hallazgos Clave
|
||||
|
||||
1. **textline_orientation: true** - Crítico para la precisión
|
||||
2. **use_doc_orientation_classify: true** - Ayuda con la detección de orientación de página
|
||||
3. **use_doc_unwarping: false** - El enderezamiento de documentos perjudica la precisión en este dataset
|
||||
4. **Bajo text_det_thresh (0.0462)** - Detección de texto más sensible ayuda
|
||||
5. **Mayor text_rec_score_thresh (0.5658)** - Filtra reconocimientos de baja confianza
|
||||
|
||||
## Impacto de Parámetros
|
||||
|
||||
Parámetros que mejoraron la precisión:
|
||||
- `textline_orientation=True` consistentemente en los mejores resultados
|
||||
- `use_doc_orientation_classify=True` en las mejores pruebas
|
||||
- Valores más bajos de `text_det_thresh` (0.04-0.10)
|
||||
|
||||
Parámetros que perjudicaron la precisión:
|
||||
- `use_doc_unwarping=True` aumentó el CER significativamente
|
||||
- `text_det_box_thresh` muy bajo (<0.01) causó problemas
|
||||
|
||||
## Evaluación del Dataset Completo
|
||||
|
||||
**Estado:** Completado
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8002/evaluate_full \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": true,
|
||||
"use_doc_unwarping": false,
|
||||
"textline_orientation": true,
|
||||
"text_det_thresh": 0.0462,
|
||||
"text_det_box_thresh": 0.4862,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.5658,
|
||||
"save_output": true
|
||||
}'
|
||||
```
|
||||
|
||||
**Resultado:** CER 7.72%, WER 11.40%, 0.55s/página
|
||||
|
||||
## Configuración del Modelo
|
||||
|
||||
### Modelo Actual (Correcto para Español)
|
||||
|
||||
| Componente | Modelo | Estado |
|
||||
|------------|--------|--------|
|
||||
| Detección | `PP-OCRv5_mobile_det` | Correcto |
|
||||
| Reconocimiento | `PP-OCRv5_mobile_rec` | Correcto |
|
||||
|
||||
Los modelos PP-OCRv5 mobile soportan múltiples idiomas incluyendo español con buen manejo de diacríticos.
|
||||
|
||||
### Nota sobre Modelos Server
|
||||
|
||||
PaddleOCR ofrece modelos "server" más precisos:
|
||||
- `PP-OCRv5_server_det` + `PP-OCRv5_server_rec`
|
||||
- Requieren ~5.3 GB VRAM
|
||||
|
||||
**Limitación:** En la RTX 3060 (5.66 GB VRAM) los modelos server causan **OOM (Out of Memory)** en la página 2. Los modelos mobile usados (7.72% CER) son la mejor opción práctica para este hardware.
|
||||
|
||||
Para hardware con más VRAM (8+ GB), los modelos server podrían mejorar la precisión.
|
||||
|
||||
## Análisis de Errores del Debugset
|
||||
|
||||
### Errores Observados
|
||||
|
||||
| Ground Truth | PaddleOCR | Tipo de Error |
|
||||
|--------------|-----------|---------------|
|
||||
| `bibliografía` | `bibliografia` | Acento omitido |
|
||||
| `amplían` | `amplian` | Acento omitido |
|
||||
| `, debes` | `, debes` | Coma Unicode china |
|
||||
| Líneas separadas | Footer fusionado | Estructura menor |
|
||||
|
||||
### Fortalezas
|
||||
|
||||
- **Preserva estructura de líneas**: Mantiene saltos de línea correctamente
|
||||
- **Buen manejo de español**: La mayoría de acentos se reconocen bien
|
||||
- **Bajo ruido**: No inserta caracteres espurios
|
||||
|
||||
### ¿Fine-tuning Recomendado?
|
||||
|
||||
**No.** Con 7.72% CER, PaddleOCR ya tiene excelente precisión para documentos españoles. Los errores observados son menores:
|
||||
|
||||
- Acentos omitidos: ~5% de casos
|
||||
- Puntuación Unicode: Muy ocasional
|
||||
- Impacto en legibilidad: Mínimo
|
||||
|
||||
El esfuerzo de fine-tuning no se justifica para ganancias marginales. Para casos de uso críticos donde se requiera <5% CER, considerar post-procesamiento con corrector ortográfico
|
||||
@@ -6,7 +6,7 @@ import re
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
BASE_DIR = '/Users/sergio/Desktop/MastersThesis'
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DOCS_DIR = os.path.join(BASE_DIR, 'docs')
|
||||
OUTPUT_DIR = os.path.join(BASE_DIR, 'thesis_output/figures')
|
||||
MMDC = os.path.join(BASE_DIR, 'node_modules/.bin/mmdc')
|
||||
|
||||
153
src/README.md
Normal file
@@ -0,0 +1,153 @@
|
||||
# OCR Hyperparameter Tuning with Ray Tune
|
||||
|
||||
This directory contains the Docker setup for running automated hyperparameter optimization on OCR services using Ray Tune with Optuna.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker with NVIDIA GPU support (`nvidia-container-toolkit`)
|
||||
- NVIDIA GPU with CUDA support
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
cd src
|
||||
|
||||
# Start PaddleOCR service and run tuning (images pulled from registry)
|
||||
docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
|
||||
docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
|
||||
```
|
||||
|
||||
## Available Services
|
||||
|
||||
| Service | Port | Compose File |
|
||||
|---------|------|--------------|
|
||||
| PaddleOCR | 8002 | `docker-compose.tuning.paddle.yml` |
|
||||
| DocTR | 8003 | `docker-compose.tuning.doctr.yml` |
|
||||
| EasyOCR | 8002 | `docker-compose.tuning.easyocr.yml` |
|
||||
|
||||
**Note:** PaddleOCR and EasyOCR both use port 8002. Run them separately.
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### PaddleOCR Tuning
|
||||
|
||||
```bash
|
||||
# Start service
|
||||
docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
|
||||
|
||||
# Wait for health check (check with)
|
||||
curl http://localhost:8002/health
|
||||
|
||||
# Run tuning (64 samples)
|
||||
docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
|
||||
|
||||
# Stop service
|
||||
docker compose -f docker-compose.tuning.paddle.yml down
|
||||
```
|
||||
|
||||
### DocTR Tuning
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
|
||||
curl http://localhost:8003/health
|
||||
docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
|
||||
docker compose -f docker-compose.tuning.doctr.yml down
|
||||
```
|
||||
|
||||
### EasyOCR Tuning
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
|
||||
curl http://localhost:8002/health
|
||||
docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
|
||||
docker compose -f docker-compose.tuning.easyocr.yml down
|
||||
```
|
||||
|
||||
### Run Multiple Services (PaddleOCR + DocTR)
|
||||
|
||||
```bash
|
||||
# Start both services
|
||||
docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
|
||||
|
||||
# Run tuning for each
|
||||
docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
|
||||
docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
|
||||
|
||||
# Stop all
|
||||
docker compose -f docker-compose.tuning.yml down
|
||||
```
|
||||
|
||||
## Command Line Options
|
||||
|
||||
```bash
|
||||
docker compose -f <compose-file> run raytune --service <service> --samples <n>
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
|--------|-------------|---------|
|
||||
| `--service` | OCR service: `paddle`, `doctr`, `easyocr` | Required |
|
||||
| `--samples` | Number of hyperparameter trials | 64 |
|
||||
|
||||
## Output
|
||||
|
||||
Results are saved to `src/results/` as CSV files:
|
||||
- `raytune_paddle_results_<timestamp>.csv`
|
||||
- `raytune_doctr_results_<timestamp>.csv`
|
||||
- `raytune_easyocr_results_<timestamp>.csv`
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── docker-compose.tuning.yml # All services (PaddleOCR + DocTR)
|
||||
├── docker-compose.tuning.paddle.yml # PaddleOCR only
|
||||
├── docker-compose.tuning.doctr.yml # DocTR only
|
||||
├── docker-compose.tuning.easyocr.yml # EasyOCR only
|
||||
├── raytune/
|
||||
│ ├── Dockerfile
|
||||
│ ├── requirements.txt
|
||||
│ ├── raytune_ocr.py
|
||||
│ └── run_tuning.py
|
||||
├── dataset/ # Input images and ground truth
|
||||
├── results/ # Output CSV files
|
||||
└── debugset/ # Debug output
|
||||
```
|
||||
|
||||
## Docker Images
|
||||
|
||||
All images are pre-built and pulled from registry:
|
||||
- `seryus.ddns.net/unir/raytune:latest` - Ray Tune tuning service
|
||||
- `seryus.ddns.net/unir/paddle-ocr-gpu:latest` - PaddleOCR GPU
|
||||
- `seryus.ddns.net/unir/doctr-gpu:latest` - DocTR GPU
|
||||
- `seryus.ddns.net/unir/easyocr-gpu:latest` - EasyOCR GPU
|
||||
|
||||
### Build locally (development)
|
||||
|
||||
```bash
|
||||
# Build raytune image locally
|
||||
docker build -t seryus.ddns.net/unir/raytune:latest ./raytune
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Service not ready
|
||||
Wait for the health check to pass before running tuning:
|
||||
```bash
|
||||
# Check service health
|
||||
curl http://localhost:8002/health
|
||||
# Expected: {"status": "ok", "model_loaded": true, ...}
|
||||
```
|
||||
|
||||
### GPU not detected
|
||||
Ensure `nvidia-container-toolkit` is installed:
|
||||
```bash
|
||||
nvidia-smi # Should show your GPU
|
||||
docker run --rm --gpus all nvidia/cuda:12.4.1-base nvidia-smi
|
||||
```
|
||||
|
||||
### Port already in use
|
||||
Stop any running OCR services:
|
||||
```bash
|
||||
docker compose -f docker-compose.tuning.paddle.yml down
|
||||
docker compose -f docker-compose.tuning.easyocr.yml down
|
||||
```
|
||||
138
src/dataset_formatting/convert_to_hf_dataset.py
Executable file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Convert custom OCR dataset to Hugging Face format."""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def convert_dataset(source_dir: str, output_dir: str):
|
||||
"""Convert folder-based dataset to HF ImageFolder format."""
|
||||
|
||||
source = Path(source_dir)
|
||||
output = Path(output_dir)
|
||||
data_dir = output / "data"
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
metadata = []
|
||||
|
||||
for doc_folder in sorted(source.iterdir()):
|
||||
if not doc_folder.is_dir():
|
||||
continue
|
||||
|
||||
doc_id = doc_folder.name
|
||||
img_dir = doc_folder / "img"
|
||||
txt_dir = doc_folder / "txt"
|
||||
|
||||
if not img_dir.exists() or not txt_dir.exists():
|
||||
continue
|
||||
|
||||
for img_file in sorted(img_dir.glob("*.png")):
|
||||
txt_file = txt_dir / f"{img_file.stem}.txt"
|
||||
if not txt_file.exists():
|
||||
continue
|
||||
|
||||
# Extract page number
|
||||
page_num = int(img_file.stem.split("_")[-1])
|
||||
|
||||
# New filename: page_{doc_id}_{page_num:04d}.png
|
||||
new_name = f"page_{doc_id}_{page_num:04d}.png"
|
||||
|
||||
# Copy image
|
||||
shutil.copy(img_file, data_dir / new_name)
|
||||
|
||||
# Read text
|
||||
text = txt_file.read_text(encoding="utf-8").strip()
|
||||
|
||||
# Add metadata entry
|
||||
metadata.append({
|
||||
"file_name": f"data/{new_name}",
|
||||
"text": text,
|
||||
"document_id": doc_id,
|
||||
"page_number": page_num
|
||||
})
|
||||
|
||||
# Write metadata.jsonl
|
||||
with open(output / "metadata.jsonl", "w", encoding="utf-8") as f:
|
||||
for entry in metadata:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
# Write dataset card
|
||||
write_dataset_card(output, len(metadata))
|
||||
|
||||
print(f"Converted {len(metadata)} samples to {output}")
|
||||
|
||||
|
||||
def write_dataset_card(output_dir: Path, num_samples: int):
|
||||
"""Write HF dataset card."""
|
||||
card = f'''---
|
||||
dataset_info:
|
||||
features:
|
||||
- name: image
|
||||
dtype: image
|
||||
- name: text
|
||||
dtype: string
|
||||
- name: document_id
|
||||
dtype: string
|
||||
- name: page_number
|
||||
dtype: int32
|
||||
splits:
|
||||
- name: train
|
||||
num_examples: {num_samples}
|
||||
license: cc-by-4.0
|
||||
language:
|
||||
- es
|
||||
task_categories:
|
||||
- image-to-text
|
||||
tags:
|
||||
- ocr
|
||||
- spanish
|
||||
- academic-documents
|
||||
- unir
|
||||
---
|
||||
|
||||
# UNIR OCR Dataset
|
||||
|
||||
Dataset de documentos académicos en español para evaluación de sistemas OCR.
|
||||
|
||||
## Descripción
|
||||
|
||||
- **Idioma**: Español
|
||||
- **Dominio**: Documentos académicos (instrucciones TFE de UNIR)
|
||||
- **Formato**: Imágenes PNG (300 DPI) + texto ground truth
|
||||
- **Total**: {num_samples} pares imagen-texto
|
||||
|
||||
## Uso
|
||||
|
||||
```python
|
||||
from datasets import load_dataset
|
||||
|
||||
dataset = load_dataset("path/to/dataset")
|
||||
|
||||
for sample in dataset["train"]:
|
||||
image = sample["image"]
|
||||
text = sample["text"]
|
||||
```
|
||||
|
||||
## Estructura
|
||||
|
||||
Cada muestra contiene:
|
||||
- `image`: Imagen de la página (PIL.Image)
|
||||
- `text`: Texto ground truth extraído del PDF
|
||||
- `document_id`: ID del documento fuente
|
||||
- `page_number`: Número de página
|
||||
|
||||
## Citación
|
||||
|
||||
Parte del TFM "Optimización de Hiperparámetros OCR con Ray Tune" - UNIR 2025
|
||||
'''
|
||||
(output_dir / "README.md").write_text(card, encoding="utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
source = sys.argv[1] if len(sys.argv) > 1 else "src/dataset"
|
||||
output = sys.argv[2] if len(sys.argv) > 2 else "src/dataset_hf"
|
||||
|
||||
convert_dataset(source, output)
|
||||
63
src/dataset_formatting/upload-dataset.sh
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
# Upload OCR dataset to Gitea generic packages
|
||||
#
|
||||
# Usage:
|
||||
# ./src/dataset_formatting/upload-dataset.sh [token]
|
||||
#
|
||||
# Environment variables:
|
||||
# GITEA_TOKEN - Gitea API token
|
||||
|
||||
set -e
|
||||
|
||||
GITEA_URL="https://seryus.ddns.net"
|
||||
GITEA_ORG="unir"
|
||||
PACKAGE_NAME="ocr-dataset-spanish"
|
||||
VERSION="1.0.0"
|
||||
DATASET_DIR="src/dataset_hf"
|
||||
TARBALL="dataset-${VERSION}.tar.gz"
|
||||
|
||||
# Get token
|
||||
TOKEN="${1:-${GITEA_TOKEN}}"
|
||||
if [ -z "$TOKEN" ]; then
|
||||
echo "Error: No token provided"
|
||||
echo "Usage: $0 [token]"
|
||||
echo " or set GITEA_TOKEN environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check dataset exists
|
||||
if [ ! -d "$DATASET_DIR" ]; then
|
||||
echo "Error: Dataset not found at $DATASET_DIR"
|
||||
echo "Run: python src/convert_to_hf_dataset.py first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create tarball
|
||||
echo "Creating tarball..."
|
||||
tar -czvf "$TARBALL" -C "$DATASET_DIR" .
|
||||
echo "Created: $TARBALL ($(du -h $TARBALL | cut -f1))"
|
||||
|
||||
# Upload
|
||||
echo "Uploading to Gitea packages..."
|
||||
echo " URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL"
|
||||
|
||||
HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
|
||||
-X PUT \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@$TARBALL" \
|
||||
"$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "Success! Dataset uploaded."
|
||||
echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$TARBALL"
|
||||
rm "$TARBALL"
|
||||
elif [ "$HTTP_CODE" = "409" ]; then
|
||||
echo "Package version already exists (HTTP 409)"
|
||||
rm "$TARBALL"
|
||||
else
|
||||
echo "Error: Upload failed with HTTP $HTTP_CODE"
|
||||
cat /tmp/upload_response.txt
|
||||
rm "$TARBALL"
|
||||
exit 1
|
||||
fi
|
||||
@@ -42,4 +42,33 @@ class ImageTextDataset:
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
return image, text
|
||||
return image, text
|
||||
|
||||
def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
|
||||
"""Get output path for saving OCR result to debugset folder.
|
||||
|
||||
Args:
|
||||
idx: Sample index
|
||||
output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
|
||||
debugset_root: Root folder for debug output (default: /app/debugset)
|
||||
|
||||
Returns:
|
||||
Path like /app/debugset/doc1/{output_subdir}/page_001.txt
|
||||
"""
|
||||
img_path, _ = self.samples[idx]
|
||||
# img_path: /app/dataset/doc1/img/page_001.png
|
||||
# Extract relative path: doc1/img/page_001.png
|
||||
parts = img_path.split("/dataset/", 1)
|
||||
if len(parts) == 2:
|
||||
rel_path = parts[1] # doc1/img/page_001.png
|
||||
else:
|
||||
rel_path = os.path.basename(img_path)
|
||||
|
||||
# Replace /img/ with /{output_subdir}/
|
||||
rel_parts = rel_path.rsplit("/img/", 1)
|
||||
doc_folder = rel_parts[0] # doc1
|
||||
fname = os.path.splitext(rel_parts[1])[0] + ".txt" # page_001.txt
|
||||
|
||||
out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
return os.path.join(out_dir, fname)
|
||||
50
src/docker-compose.tuning.doctr.yml
Normal file
@@ -0,0 +1,50 @@
|
||||
# docker-compose.tuning.doctr.yml - Ray Tune with DocTR GPU
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
|
||||
# docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
|
||||
# docker compose -f docker-compose.tuning.doctr.yml down
|
||||
|
||||
services:
|
||||
raytune:
|
||||
image: seryus.ddns.net/unir/raytune:latest
|
||||
command: ["--service", "doctr", "--host", "doctr-gpu", "--port", "8000", "--samples", "64"]
|
||||
volumes:
|
||||
- ./results:/app/results:rw
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
depends_on:
|
||||
doctr-gpu:
|
||||
condition: service_healthy
|
||||
|
||||
doctr-gpu:
|
||||
image: seryus.ddns.net/unir/doctr-gpu:latest
|
||||
container_name: doctr-gpu-tuning
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- doctr-cache:/root/.cache/doctr
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- DOCTR_DET_ARCH=db_resnet50
|
||||
- DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 180s
|
||||
|
||||
volumes:
|
||||
doctr-cache:
|
||||
name: doctr-model-cache
|
||||
51
src/docker-compose.tuning.easyocr.yml
Normal file
@@ -0,0 +1,51 @@
|
||||
# docker-compose.tuning.easyocr.yml - Ray Tune with EasyOCR GPU
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
|
||||
# docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
|
||||
# docker compose -f docker-compose.tuning.easyocr.yml down
|
||||
#
|
||||
# Note: EasyOCR uses port 8002 (same as PaddleOCR). Cannot run simultaneously.
|
||||
|
||||
services:
|
||||
raytune:
|
||||
image: seryus.ddns.net/unir/raytune:latest
|
||||
command: ["--service", "easyocr", "--host", "easyocr-gpu", "--port", "8000", "--samples", "64"]
|
||||
volumes:
|
||||
- ./results:/app/results:rw
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
depends_on:
|
||||
easyocr-gpu:
|
||||
condition: service_healthy
|
||||
|
||||
easyocr-gpu:
|
||||
image: seryus.ddns.net/unir/easyocr-gpu:latest
|
||||
container_name: easyocr-gpu-tuning
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- easyocr-cache:/root/.EasyOCR
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- EASYOCR_LANGUAGES=es,en
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
volumes:
|
||||
easyocr-cache:
|
||||
name: easyocr-model-cache
|
||||
50
src/docker-compose.tuning.paddle.yml
Normal file
@@ -0,0 +1,50 @@
|
||||
# docker-compose.tuning.paddle.yml - Ray Tune with PaddleOCR GPU
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
|
||||
# docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
|
||||
# docker compose -f docker-compose.tuning.paddle.yml down
|
||||
|
||||
services:
|
||||
raytune:
|
||||
image: seryus.ddns.net/unir/raytune:latest
|
||||
command: ["--service", "paddle", "--host", "paddle-ocr-gpu", "--port", "8000", "--samples", "64"]
|
||||
volumes:
|
||||
- ./results:/app/results:rw
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
depends_on:
|
||||
paddle-ocr-gpu:
|
||||
condition: service_healthy
|
||||
|
||||
paddle-ocr-gpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
container_name: paddle-ocr-gpu-tuning
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
82
src/docker-compose.tuning.yml
Normal file
@@ -0,0 +1,82 @@
|
||||
# docker-compose.tuning.yml - Ray Tune with all OCR services (PaddleOCR + DocTR)
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.tuning.yml up -d paddle-ocr-gpu doctr-gpu
|
||||
# docker compose -f docker-compose.tuning.yml run raytune --service paddle --samples 64
|
||||
# docker compose -f docker-compose.tuning.yml run raytune --service doctr --samples 64
|
||||
# docker compose -f docker-compose.tuning.yml down
|
||||
#
|
||||
# Note: EasyOCR uses port 8002 (same as PaddleOCR). Use docker-compose.tuning.easyocr.yml separately.
|
||||
|
||||
services:
|
||||
raytune:
|
||||
image: seryus.ddns.net/unir/raytune:latest
|
||||
network_mode: host
|
||||
shm_size: '5gb'
|
||||
volumes:
|
||||
- ./results:/app/results:rw
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
|
||||
paddle-ocr-gpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
container_name: paddle-ocr-gpu-tuning
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
doctr-gpu:
|
||||
image: seryus.ddns.net/unir/doctr-gpu:latest
|
||||
container_name: doctr-gpu-tuning
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ./dataset:/app/dataset:ro
|
||||
- ./debugset:/app/debugset:rw
|
||||
- doctr-cache:/root/.cache/doctr
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- DOCTR_DET_ARCH=db_resnet50
|
||||
- DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 180s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
doctr-cache:
|
||||
name: doctr-model-cache
|
||||
109
src/doctr_raytune_rest.ipynb
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "header",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DocTR Hyperparameter Optimization via REST API\n",
|
||||
"\n",
|
||||
"Uses Ray Tune + Optuna to find optimal DocTR parameters.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"cd src/doctr_service\n",
|
||||
"docker compose up ocr-cpu # or ocr-gpu\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Service runs on port 8003."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "deps",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "setup",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from raytune_ocr import (\n",
|
||||
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
|
||||
" doctr_payload, DOCTR_SEARCH_SPACE, DOCTR_CONFIG_KEYS,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Worker ports\n",
|
||||
"PORTS = [8003]\n",
|
||||
"\n",
|
||||
"# Check workers are running\n",
|
||||
"healthy = check_workers(PORTS, \"DocTR\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "tune",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create trainable and run tuning\n",
|
||||
"trainable = create_trainable(PORTS, doctr_payload)\n",
|
||||
"\n",
|
||||
"results = run_tuner(\n",
|
||||
" trainable=trainable,\n",
|
||||
" search_space=DOCTR_SEARCH_SPACE,\n",
|
||||
" num_samples=64,\n",
|
||||
" num_workers=len(healthy),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "analysis",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Analyze results\n",
|
||||
"df = analyze_results(\n",
|
||||
" results,\n",
|
||||
" prefix=\"raytune_doctr\",\n",
|
||||
" config_keys=DOCTR_CONFIG_KEYS,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "correlation",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Correlation analysis\n",
|
||||
"correlation_analysis(df, DOCTR_CONFIG_KEYS)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
49
src/doctr_service/Dockerfile
Normal file
@@ -0,0 +1,49 @@
|
||||
# Dockerfile - DocTR Tuning REST API
|
||||
#
|
||||
# Build:
|
||||
# docker build -t doctr-api:latest .
|
||||
#
|
||||
# Run:
|
||||
# docker run -p 8003:8000 -v ./dataset:/app/dataset doctr-api:latest
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="DocTR Tuning REST API"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV DOCTR_DET_ARCH=db_resnet50
|
||||
ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
|
||||
# Install system dependencies for OpenCV and image processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY doctr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.cache/doctr"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check (longer start period for model download)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
63
src/doctr_service/Dockerfile.gpu
Normal file
@@ -0,0 +1,63 @@
|
||||
# Dockerfile.gpu - DocTR GPU Dockerfile for amd64/arm64
|
||||
#
|
||||
# Build:
|
||||
# docker build -t doctr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Run:
|
||||
# docker run --gpus all -p 8003:8000 -v ./dataset:/app/dataset doctr-gpu:latest
|
||||
|
||||
# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
|
||||
FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="DocTR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV DOCTR_DET_ARCH=db_resnet50
|
||||
ENV DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
|
||||
# Install Python 3.12 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.12 \
|
||||
python3.12-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.12 /usr/bin/python
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install PyTorch with CUDA support first (cu128 index has amd64 + arm64 wheels)
|
||||
RUN python -m pip install --no-cache-dir --break-system-packages \
|
||||
torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
# Install remaining dependencies from requirements.txt (skip torch, already installed)
|
||||
RUN grep -v "^torch" requirements.txt | python -m pip install --no-cache-dir --break-system-packages -r /dev/stdin
|
||||
|
||||
# Copy application code
|
||||
COPY doctr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.cache/doctr"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check (longer start period for model download)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=180s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "doctr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
261
src/doctr_service/README.md
Normal file
@@ -0,0 +1,261 @@
|
||||
# DocTR Tuning REST API
|
||||
|
||||
REST API service for DocTR (Document Text Recognition) hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### CPU Version
|
||||
|
||||
```bash
|
||||
cd src/doctr_service
|
||||
|
||||
# Build
|
||||
docker build -t doctr-api:cpu .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8003:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v doctr-cache:/root/.cache/doctr \
|
||||
doctr-api:cpu
|
||||
|
||||
# Test
|
||||
curl http://localhost:8003/health
|
||||
```
|
||||
|
||||
### GPU Version
|
||||
|
||||
```bash
|
||||
# Build GPU image
|
||||
docker build -f Dockerfile.gpu -t doctr-api:gpu .
|
||||
|
||||
# Run with GPU
|
||||
docker run -d -p 8003:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v doctr-cache:/root/.cache/doctr \
|
||||
doctr-api:gpu
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `doctr_tuning_rest.py` | FastAPI REST service with 9 tunable hyperparameters |
|
||||
| `dataset_manager.py` | Dataset loader (shared with other services) |
|
||||
| `Dockerfile` | CPU-only image (amd64 + arm64) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
|
||||
| `requirements.txt` | Python dependencies |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"model_loaded": true,
|
||||
"dataset_loaded": true,
|
||||
"dataset_size": 24,
|
||||
"det_arch": "db_resnet50",
|
||||
"reco_arch": "crnn_vgg16_bn",
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10"
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request (9 tunable parameters):**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"assume_straight_pages": true,
|
||||
"straighten_pages": false,
|
||||
"preserve_aspect_ratio": true,
|
||||
"symmetric_pad": true,
|
||||
"disable_page_orientation": false,
|
||||
"disable_crop_orientation": false,
|
||||
"resolve_lines": true,
|
||||
"resolve_blocks": false,
|
||||
"paragraph_break": 0.035,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"CER": 0.0189,
|
||||
"WER": 0.1023,
|
||||
"TIME": 52.3,
|
||||
"PAGES": 5,
|
||||
"TIME_PER_PAGE": 10.46,
|
||||
"model_reinitialized": false
|
||||
}
|
||||
```
|
||||
|
||||
**Note:** `model_reinitialized` indicates if the model was reloaded due to changed processing flags (adds ~2-5s overhead).
|
||||
|
||||
## Debug Output (debugset)
|
||||
|
||||
The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
|
||||
|
||||
### Enable Debug Output
|
||||
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"save_output": true,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Output Structure
|
||||
|
||||
```
|
||||
debugset/
|
||||
├── doc1/
|
||||
│ └── doctr/
|
||||
│ ├── page_0005.txt
|
||||
│ ├── page_0006.txt
|
||||
│ └── ...
|
||||
├── doc2/
|
||||
│ └── doctr/
|
||||
│ └── ...
|
||||
```
|
||||
|
||||
Each `.txt` file contains the OCR-extracted text for that page.
|
||||
|
||||
### Docker Mount
|
||||
|
||||
Add the debugset volume to your docker run command:
|
||||
|
||||
```bash
|
||||
docker run -d -p 8003:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v $(pwd)/../debugset:/app/debugset:rw \
|
||||
-v doctr-cache:/root/.cache/doctr \
|
||||
doctr-api:cpu
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
|
||||
- **Debug hyperparameters**: See how different settings affect text extraction
|
||||
- **Ground truth comparison**: Compare predictions against expected output
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
### Processing Flags (Require Model Reinitialization)
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `assume_straight_pages` | true | Skip rotation handling for straight documents |
|
||||
| `straighten_pages` | false | Pre-straighten pages before detection |
|
||||
| `preserve_aspect_ratio` | true | Maintain document proportions during resize |
|
||||
| `symmetric_pad` | true | Use symmetric padding when preserving aspect ratio |
|
||||
|
||||
**Note:** Changing these flags requires model reinitialization (~2-5s).
|
||||
|
||||
### Orientation Flags
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `disable_page_orientation` | false | Skip page orientation classification |
|
||||
| `disable_crop_orientation` | false | Skip crop orientation detection |
|
||||
|
||||
### Output Grouping
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `resolve_lines` | true | bool | Group words into lines |
|
||||
| `resolve_blocks` | false | bool | Group lines into blocks |
|
||||
| `paragraph_break` | 0.035 | 0.0-1.0 | Minimum space ratio separating paragraphs |
|
||||
|
||||
## Model Architecture
|
||||
|
||||
DocTR uses a two-stage pipeline:
|
||||
|
||||
1. **Detection** (`det_arch`): Localizes text regions
|
||||
- Default: `db_resnet50` (DBNet with ResNet-50 backbone)
|
||||
- Alternatives: `linknet_resnet18`, `db_mobilenet_v3_large`
|
||||
|
||||
2. **Recognition** (`reco_arch`): Recognizes characters
|
||||
- Default: `crnn_vgg16_bn` (CRNN with VGG-16 backbone)
|
||||
- Alternatives: `sar_resnet31`, `master`, `vitstr_small`
|
||||
|
||||
Architecture is set via environment variables (fixed at startup).
|
||||
|
||||
## GPU Support
|
||||
|
||||
### Platform Support
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
|
||||
| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
|
||||
### PyTorch CUDA on ARM64
|
||||
|
||||
Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
|
||||
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
```
|
||||
|
||||
This works on both amd64 and arm64 platforms with CUDA support.
|
||||
|
||||
### GPU Detection
|
||||
|
||||
DocTR automatically uses GPU when available:
|
||||
|
||||
```python
|
||||
import torch
|
||||
print(torch.cuda.is_available()) # True if GPU available
|
||||
|
||||
# DocTR model moves to GPU
|
||||
model = ocr_predictor(pretrained=True)
|
||||
if torch.cuda.is_available():
|
||||
model = model.cuda()
|
||||
```
|
||||
|
||||
The `/health` endpoint shows GPU status:
|
||||
```json
|
||||
{
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10",
|
||||
"gpu_memory_total": "128.00 GB"
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `DOCTR_DET_ARCH` | `db_resnet50` | Detection architecture |
|
||||
| `DOCTR_RECO_ARCH` | `crnn_vgg16_bn` | Recognition architecture |
|
||||
| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
|
||||
|
||||
## CI/CD
|
||||
|
||||
Built images available from registry:
|
||||
|
||||
| Image | Architecture |
|
||||
|-------|--------------|
|
||||
| `seryus.ddns.net/unir/doctr-cpu:latest` | amd64, arm64 |
|
||||
| `seryus.ddns.net/unir/doctr-gpu:latest` | amd64, arm64 |
|
||||
|
||||
## Sources
|
||||
|
||||
- [DocTR Documentation](https://mindee.github.io/doctr/)
|
||||
- [DocTR GitHub](https://github.com/mindee/doctr)
|
||||
- [DocTR Model Usage](https://mindee.github.io/doctr/latest/using_doctr/using_models.html)
|
||||
- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
|
||||
74
src/doctr_service/dataset_manager.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Imports
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
self.samples = []
|
||||
|
||||
for folder in sorted(os.listdir(root)):
|
||||
sub = os.path.join(root, folder)
|
||||
img_dir = os.path.join(sub, "img")
|
||||
txt_dir = os.path.join(sub, "txt")
|
||||
|
||||
if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
|
||||
continue
|
||||
|
||||
for fname in sorted(os.listdir(img_dir)):
|
||||
if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
continue
|
||||
|
||||
img_path = os.path.join(img_dir, fname)
|
||||
|
||||
# text file must have same name but .txt
|
||||
txt_name = os.path.splitext(fname)[0] + ".txt"
|
||||
txt_path = os.path.join(txt_dir, txt_name)
|
||||
|
||||
if not os.path.exists(txt_path):
|
||||
continue
|
||||
|
||||
self.samples.append((img_path, txt_path))
|
||||
def __len__(self):
|
||||
return len(self.samples)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img_path, txt_path = self.samples[idx]
|
||||
|
||||
# Load image
|
||||
image = Image.open(img_path).convert("RGB")
|
||||
|
||||
# Load text
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
return image, text
|
||||
|
||||
def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
|
||||
"""Get output path for saving OCR result to debugset folder.
|
||||
|
||||
Args:
|
||||
idx: Sample index
|
||||
output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
|
||||
debugset_root: Root folder for debug output (default: /app/debugset)
|
||||
|
||||
Returns:
|
||||
Path like /app/debugset/doc1/{output_subdir}/page_001.txt
|
||||
"""
|
||||
img_path, _ = self.samples[idx]
|
||||
# img_path: /app/dataset/doc1/img/page_001.png
|
||||
# Extract relative path: doc1/img/page_001.png
|
||||
parts = img_path.split("/dataset/", 1)
|
||||
if len(parts) == 2:
|
||||
rel_path = parts[1] # doc1/img/page_001.png
|
||||
else:
|
||||
rel_path = os.path.basename(img_path)
|
||||
|
||||
# Replace /img/ with /{output_subdir}/
|
||||
rel_parts = rel_path.rsplit("/img/", 1)
|
||||
doc_folder = rel_parts[0] # doc1
|
||||
fname = os.path.splitext(rel_parts[1])[0] + ".txt" # page_001.txt
|
||||
|
||||
out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
return os.path.join(out_dir, fname)
|
||||
63
src/doctr_service/docker-compose.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
# docker-compose.yml - DocTR REST API
|
||||
# Usage:
|
||||
# CPU: docker compose up ocr-cpu
|
||||
# GPU: docker compose up ocr-gpu
|
||||
#
|
||||
# Port: 8003
|
||||
|
||||
services:
|
||||
# CPU-only service
|
||||
ocr-cpu:
|
||||
image: seryus.ddns.net/unir/doctr-cpu:latest
|
||||
container_name: doctr-cpu
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- doctr-cache:/root/.cache/doctr
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- DOCTR_DET_ARCH=db_resnet50
|
||||
- DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 180s
|
||||
|
||||
# GPU service (requires NVIDIA Container Toolkit)
|
||||
ocr-gpu:
|
||||
image: seryus.ddns.net/unir/doctr-gpu:latest
|
||||
container_name: doctr-gpu
|
||||
ports:
|
||||
- "8003:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- doctr-cache:/root/.cache/doctr
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- DOCTR_DET_ARCH=db_resnet50
|
||||
- DOCTR_RECO_ARCH=crnn_vgg16_bn
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 180s
|
||||
|
||||
volumes:
|
||||
doctr-cache:
|
||||
name: doctr-model-cache
|
||||
336
src/doctr_service/doctr_tuning_rest.py
Normal file
@@ -0,0 +1,336 @@
|
||||
# doctr_tuning_rest.py
|
||||
# FastAPI REST service for DocTR hyperparameter evaluation
|
||||
# Usage: uvicorn doctr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
from typing import Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from doctr.models import ocr_predictor
|
||||
from jiwer import wer, cer
|
||||
from dataset_manager import ImageTextDataset
|
||||
|
||||
|
||||
def get_gpu_info() -> dict:
|
||||
"""Get GPU status information from PyTorch."""
|
||||
info = {
|
||||
"cuda_available": torch.cuda.is_available(),
|
||||
"device": "cuda" if torch.cuda.is_available() else "cpu",
|
||||
"gpu_count": 0,
|
||||
"gpu_name": None,
|
||||
"gpu_memory_total": None,
|
||||
"gpu_memory_used": None,
|
||||
}
|
||||
|
||||
if info["cuda_available"]:
|
||||
try:
|
||||
info["gpu_count"] = torch.cuda.device_count()
|
||||
if info["gpu_count"] > 0:
|
||||
info["gpu_name"] = torch.cuda.get_device_name(0)
|
||||
info["gpu_memory_total"] = f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB"
|
||||
info["gpu_memory_used"] = f"{torch.cuda.memory_allocated(0) / (1024**3):.2f} GB"
|
||||
except Exception as e:
|
||||
info["gpu_error"] = str(e)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
# Model configuration via environment variables
|
||||
DEFAULT_DET_ARCH = os.environ.get("DOCTR_DET_ARCH", "db_resnet50")
|
||||
DEFAULT_RECO_ARCH = os.environ.get("DOCTR_RECO_ARCH", "crnn_vgg16_bn")
|
||||
|
||||
|
||||
# Global state for model and dataset
|
||||
class AppState:
|
||||
model: Optional[object] = None
|
||||
dataset: Optional[ImageTextDataset] = None
|
||||
dataset_path: Optional[str] = None
|
||||
det_arch: str = DEFAULT_DET_ARCH
|
||||
reco_arch: str = DEFAULT_RECO_ARCH
|
||||
# Track current model config for cache invalidation
|
||||
current_config: Optional[dict] = None
|
||||
device: str = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
lock: threading.Lock = None # Protects OCR model from concurrent access
|
||||
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
state = AppState()
|
||||
|
||||
|
||||
def create_model(
|
||||
assume_straight_pages: bool = True,
|
||||
straighten_pages: bool = False,
|
||||
preserve_aspect_ratio: bool = True,
|
||||
symmetric_pad: bool = True,
|
||||
disable_page_orientation: bool = False,
|
||||
disable_crop_orientation: bool = False,
|
||||
) -> object:
|
||||
"""Create DocTR model with given configuration."""
|
||||
model = ocr_predictor(
|
||||
det_arch=state.det_arch,
|
||||
reco_arch=state.reco_arch,
|
||||
pretrained=True,
|
||||
assume_straight_pages=assume_straight_pages,
|
||||
straighten_pages=straighten_pages,
|
||||
preserve_aspect_ratio=preserve_aspect_ratio,
|
||||
symmetric_pad=symmetric_pad,
|
||||
)
|
||||
|
||||
# Apply orientation settings if supported
|
||||
if hasattr(model, 'disable_page_orientation'):
|
||||
model.disable_page_orientation = disable_page_orientation
|
||||
if hasattr(model, 'disable_crop_orientation'):
|
||||
model.disable_crop_orientation = disable_crop_orientation
|
||||
|
||||
# Move to GPU if available
|
||||
if state.device == "cuda":
|
||||
model = model.cuda()
|
||||
|
||||
return model
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load DocTR model at startup with default configuration."""
|
||||
gpu_info = get_gpu_info()
|
||||
print("=" * 50)
|
||||
print("GPU STATUS")
|
||||
print("=" * 50)
|
||||
print(f" CUDA available: {gpu_info['cuda_available']}")
|
||||
print(f" Device: {gpu_info['device']}")
|
||||
if gpu_info['cuda_available']:
|
||||
print(f" GPU count: {gpu_info['gpu_count']}")
|
||||
print(f" GPU name: {gpu_info['gpu_name']}")
|
||||
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
|
||||
print("=" * 50)
|
||||
|
||||
print(f"Loading DocTR models...")
|
||||
print(f" Detection: {state.det_arch}")
|
||||
print(f" Recognition: {state.reco_arch}")
|
||||
|
||||
# Load with default config
|
||||
state.model = create_model()
|
||||
state.current_config = {
|
||||
"assume_straight_pages": True,
|
||||
"straighten_pages": False,
|
||||
"preserve_aspect_ratio": True,
|
||||
"symmetric_pad": True,
|
||||
"disable_page_orientation": False,
|
||||
"disable_crop_orientation": False,
|
||||
}
|
||||
|
||||
if gpu_info['cuda_available']:
|
||||
gpu_after = get_gpu_info()
|
||||
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
|
||||
|
||||
print("Model loaded successfully!")
|
||||
yield
|
||||
state.model = None
|
||||
state.dataset = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="DocTR Tuning API",
|
||||
description="REST API for DocTR hyperparameter evaluation",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
class EvaluateRequest(BaseModel):
|
||||
"""Request schema with all tunable DocTR hyperparameters."""
|
||||
pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
|
||||
|
||||
# Processing flags (require model reinit)
|
||||
assume_straight_pages: bool = Field(True, description="Skip rotation handling for straight documents")
|
||||
straighten_pages: bool = Field(False, description="Pre-straighten pages before detection")
|
||||
preserve_aspect_ratio: bool = Field(True, description="Maintain document proportions during resize")
|
||||
symmetric_pad: bool = Field(True, description="Use symmetric padding when preserving aspect ratio")
|
||||
|
||||
# Orientation flags
|
||||
disable_page_orientation: bool = Field(False, description="Skip page orientation classification")
|
||||
disable_crop_orientation: bool = Field(False, description="Skip crop orientation detection")
|
||||
|
||||
# Output grouping
|
||||
resolve_lines: bool = Field(True, description="Group words into lines")
|
||||
resolve_blocks: bool = Field(False, description="Group lines into blocks")
|
||||
paragraph_break: float = Field(0.035, ge=0.0, le=1.0, description="Minimum space ratio separating paragraphs")
|
||||
|
||||
# Page range
|
||||
start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
|
||||
end_page: int = Field(10, ge=1, description="End page index (exclusive)")
|
||||
save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
|
||||
|
||||
|
||||
class EvaluateResponse(BaseModel):
|
||||
"""Response schema matching CLI output."""
|
||||
CER: float
|
||||
WER: float
|
||||
TIME: float
|
||||
PAGES: int
|
||||
TIME_PER_PAGE: float
|
||||
model_reinitialized: bool = False
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
model_loaded: bool
|
||||
dataset_loaded: bool
|
||||
dataset_size: Optional[int] = None
|
||||
det_arch: Optional[str] = None
|
||||
reco_arch: Optional[str] = None
|
||||
cuda_available: Optional[bool] = None
|
||||
device: Optional[str] = None
|
||||
gpu_name: Optional[str] = None
|
||||
gpu_memory_used: Optional[str] = None
|
||||
gpu_memory_total: Optional[str] = None
|
||||
|
||||
|
||||
def doctr_result_to_text(result, resolve_lines: bool = True, resolve_blocks: bool = False) -> str:
|
||||
"""
|
||||
Convert DocTR result to plain text.
|
||||
Structure: Document -> pages -> blocks -> lines -> words
|
||||
"""
|
||||
lines = []
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
line_text = " ".join([w.value for w in line.words])
|
||||
lines.append(line_text)
|
||||
if resolve_blocks:
|
||||
lines.append("") # paragraph separator
|
||||
|
||||
text = " ".join([l for l in lines if l]).strip()
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def evaluate_text(reference: str, prediction: str) -> dict:
|
||||
"""Calculate WER and CER metrics."""
|
||||
return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
def health_check():
|
||||
"""Check if the service is ready."""
|
||||
gpu_info = get_gpu_info()
|
||||
return HealthResponse(
|
||||
status="ok" if state.model is not None else "initializing",
|
||||
model_loaded=state.model is not None,
|
||||
dataset_loaded=state.dataset is not None,
|
||||
dataset_size=len(state.dataset) if state.dataset else None,
|
||||
det_arch=state.det_arch,
|
||||
reco_arch=state.reco_arch,
|
||||
cuda_available=gpu_info.get("cuda_available"),
|
||||
device=gpu_info.get("device"),
|
||||
gpu_name=gpu_info.get("gpu_name"),
|
||||
gpu_memory_used=gpu_info.get("gpu_memory_used"),
|
||||
gpu_memory_total=gpu_info.get("gpu_memory_total"),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate", response_model=EvaluateResponse)
|
||||
def evaluate(request: EvaluateRequest):
|
||||
"""
|
||||
Evaluate OCR with given hyperparameters.
|
||||
Returns CER, WER, and timing metrics.
|
||||
Note: Model will be reinitialized if processing flags change.
|
||||
"""
|
||||
if state.model is None:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
||||
|
||||
# Load or reload dataset if path changed
|
||||
if state.dataset is None or state.dataset_path != request.pdf_folder:
|
||||
if not os.path.isdir(request.pdf_folder):
|
||||
raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
|
||||
state.dataset = ImageTextDataset(request.pdf_folder)
|
||||
state.dataset_path = request.pdf_folder
|
||||
|
||||
if len(state.dataset) == 0:
|
||||
raise HTTPException(status_code=400, detail="Dataset is empty")
|
||||
|
||||
# Validate page range
|
||||
start = request.start_page
|
||||
end = min(request.end_page, len(state.dataset))
|
||||
if start >= end:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
|
||||
|
||||
cer_list, wer_list = [], []
|
||||
time_per_page_list = []
|
||||
t0 = time.time()
|
||||
|
||||
# Lock to prevent concurrent OCR access (model is not thread-safe)
|
||||
with state.lock:
|
||||
# Check if model needs to be reinitialized
|
||||
new_config = {
|
||||
"assume_straight_pages": request.assume_straight_pages,
|
||||
"straighten_pages": request.straighten_pages,
|
||||
"preserve_aspect_ratio": request.preserve_aspect_ratio,
|
||||
"symmetric_pad": request.symmetric_pad,
|
||||
"disable_page_orientation": request.disable_page_orientation,
|
||||
"disable_crop_orientation": request.disable_crop_orientation,
|
||||
}
|
||||
|
||||
model_reinitialized = False
|
||||
if state.current_config != new_config:
|
||||
print(f"Model config changed, reinitializing...")
|
||||
state.model = create_model(**new_config)
|
||||
state.current_config = new_config
|
||||
model_reinitialized = True
|
||||
|
||||
for idx in range(start, end):
|
||||
img, ref = state.dataset[idx]
|
||||
arr = np.array(img)
|
||||
|
||||
tp0 = time.time()
|
||||
# DocTR expects a list of images
|
||||
result = state.model([arr])
|
||||
|
||||
pred = doctr_result_to_text(
|
||||
result,
|
||||
resolve_lines=request.resolve_lines,
|
||||
resolve_blocks=request.resolve_blocks,
|
||||
)
|
||||
time_per_page_list.append(float(time.time() - tp0))
|
||||
|
||||
# Save prediction to debugset if requested
|
||||
if request.save_output:
|
||||
out_path = state.dataset.get_output_path(idx, "doctr_text")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
f.write(pred)
|
||||
|
||||
m = evaluate_text(ref, pred)
|
||||
cer_list.append(m["CER"])
|
||||
wer_list.append(m["WER"])
|
||||
|
||||
return EvaluateResponse(
|
||||
CER=float(np.mean(cer_list)) if cer_list else 1.0,
|
||||
WER=float(np.mean(wer_list)) if wer_list else 1.0,
|
||||
TIME=float(time.time() - t0),
|
||||
PAGES=len(cer_list),
|
||||
TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
|
||||
model_reinitialized=model_reinitialized,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate_full", response_model=EvaluateResponse)
|
||||
def evaluate_full(request: EvaluateRequest):
|
||||
"""Evaluate on ALL pages (ignores start_page/end_page)."""
|
||||
request.start_page = 0
|
||||
request.end_page = 9999
|
||||
return evaluate(request)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
8
src/doctr_service/requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
python-doctr[torch]>=0.8.0
|
||||
fastapi>=0.104.0
|
||||
uvicorn>=0.24.0
|
||||
pydantic>=2.0.0
|
||||
jiwer>=3.0.0
|
||||
numpy>=1.24.0
|
||||
pillow>=10.0.0
|
||||
torch>=2.0.0
|
||||
109
src/easyocr_raytune_rest.ipynb
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "header",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# EasyOCR Hyperparameter Optimization via REST API\n",
|
||||
"\n",
|
||||
"Uses Ray Tune + Optuna to find optimal EasyOCR parameters.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"cd src/easyocr_service\n",
|
||||
"docker compose up ocr-cpu # or ocr-gpu\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Service runs on port 8002."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "deps",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "setup",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from raytune_ocr import (\n",
|
||||
" check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n",
|
||||
" easyocr_payload, EASYOCR_SEARCH_SPACE, EASYOCR_CONFIG_KEYS,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Worker ports\n",
|
||||
"PORTS = [8002]\n",
|
||||
"\n",
|
||||
"# Check workers are running\n",
|
||||
"healthy = check_workers(PORTS, \"EasyOCR\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "tune",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create trainable and run tuning\n",
|
||||
"trainable = create_trainable(PORTS, easyocr_payload)\n",
|
||||
"\n",
|
||||
"results = run_tuner(\n",
|
||||
" trainable=trainable,\n",
|
||||
" search_space=EASYOCR_SEARCH_SPACE,\n",
|
||||
" num_samples=64,\n",
|
||||
" num_workers=len(healthy),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "analysis",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Analyze results\n",
|
||||
"df = analyze_results(\n",
|
||||
" results,\n",
|
||||
" prefix=\"raytune_easyocr\",\n",
|
||||
" config_keys=EASYOCR_CONFIG_KEYS,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "correlation",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Correlation analysis\n",
|
||||
"correlation_analysis(df, EASYOCR_CONFIG_KEYS)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
48
src/easyocr_service/Dockerfile
Normal file
@@ -0,0 +1,48 @@
|
||||
# Dockerfile - EasyOCR Tuning REST API
|
||||
#
|
||||
# Build:
|
||||
# docker build -t easyocr-api:latest .
|
||||
#
|
||||
# Run:
|
||||
# docker run -p 8002:8000 -v ./dataset:/app/dataset easyocr-api:latest
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="EasyOCR Tuning REST API"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV EASYOCR_LANGUAGES=es,en
|
||||
|
||||
# Install system dependencies for OpenCV and image processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY easyocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.EasyOCR"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
62
src/easyocr_service/Dockerfile.gpu
Normal file
@@ -0,0 +1,62 @@
|
||||
# Dockerfile.gpu - EasyOCR GPU Dockerfile for amd64/arm64
|
||||
#
|
||||
# Build:
|
||||
# docker build -t easyocr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Run:
|
||||
# docker run --gpus all -p 8002:8000 -v ./dataset:/app/dataset easyocr-gpu:latest
|
||||
|
||||
# CUDA 13.0 for Blackwell (sm_121) and GH200/GB200 support
|
||||
FROM nvidia/cuda:13.0.2-cudnn-runtime-ubuntu24.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="EasyOCR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV EASYOCR_LANGUAGES=es,en
|
||||
|
||||
# Install Python 3.12 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.12 \
|
||||
python3.12-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.12 /usr/bin/python
|
||||
|
||||
# Copy requirements first for better caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install PyTorch with CUDA support first (cu128 index has amd64 + arm64 wheels)
|
||||
RUN python -m pip install --no-cache-dir --break-system-packages \
|
||||
torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
# Install remaining dependencies from requirements.txt (skip torch, already installed)
|
||||
RUN grep -v "^torch" requirements.txt | python -m pip install --no-cache-dir --break-system-packages -r /dev/stdin
|
||||
|
||||
# Copy application code
|
||||
COPY easyocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.EasyOCR"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "easyocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
248
src/easyocr_service/README.md
Normal file
@@ -0,0 +1,248 @@
|
||||
# EasyOCR Tuning REST API
|
||||
|
||||
REST API service for EasyOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### CPU Version
|
||||
|
||||
```bash
|
||||
cd src/easyocr_service
|
||||
|
||||
# Build
|
||||
docker build -t easyocr-api:cpu .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8002:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v easyocr-cache:/root/.EasyOCR \
|
||||
easyocr-api:cpu
|
||||
|
||||
# Test
|
||||
curl http://localhost:8002/health
|
||||
```
|
||||
|
||||
### GPU Version
|
||||
|
||||
```bash
|
||||
# Build GPU image
|
||||
docker build -f Dockerfile.gpu -t easyocr-api:gpu .
|
||||
|
||||
# Run with GPU
|
||||
docker run -d -p 8002:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v easyocr-cache:/root/.EasyOCR \
|
||||
easyocr-api:gpu
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `easyocr_tuning_rest.py` | FastAPI REST service with 14 tunable hyperparameters |
|
||||
| `dataset_manager.py` | Dataset loader (shared with other services) |
|
||||
| `Dockerfile` | CPU-only image (amd64 + arm64) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (amd64 + arm64) |
|
||||
| `requirements.txt` | Python dependencies |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"model_loaded": true,
|
||||
"dataset_loaded": true,
|
||||
"dataset_size": 24,
|
||||
"languages": ["es", "en"],
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10"
|
||||
}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request (14 tunable parameters):**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"text_threshold": 0.7,
|
||||
"low_text": 0.4,
|
||||
"link_threshold": 0.4,
|
||||
"slope_ths": 0.1,
|
||||
"ycenter_ths": 0.5,
|
||||
"height_ths": 0.5,
|
||||
"width_ths": 0.5,
|
||||
"add_margin": 0.1,
|
||||
"contrast_ths": 0.1,
|
||||
"adjust_contrast": 0.5,
|
||||
"decoder": "greedy",
|
||||
"beamWidth": 5,
|
||||
"min_size": 10,
|
||||
"rotation_info": null,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{"CER": 0.0234, "WER": 0.1156, "TIME": 45.2, "PAGES": 5, "TIME_PER_PAGE": 9.04}
|
||||
```
|
||||
|
||||
## Debug Output (debugset)
|
||||
|
||||
The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
|
||||
|
||||
### Enable Debug Output
|
||||
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"save_output": true,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Output Structure
|
||||
|
||||
```
|
||||
debugset/
|
||||
├── doc1/
|
||||
│ └── easyocr/
|
||||
│ ├── page_0005.txt
|
||||
│ ├── page_0006.txt
|
||||
│ └── ...
|
||||
├── doc2/
|
||||
│ └── easyocr/
|
||||
│ └── ...
|
||||
```
|
||||
|
||||
Each `.txt` file contains the OCR-extracted text for that page.
|
||||
|
||||
### Docker Mount
|
||||
|
||||
Add the debugset volume to your docker run command:
|
||||
|
||||
```bash
|
||||
docker run -d -p 8002:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v $(pwd)/../debugset:/app/debugset:rw \
|
||||
-v easyocr-cache:/root/.EasyOCR \
|
||||
easyocr-api:cpu
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
|
||||
- **Debug hyperparameters**: See how different settings affect text extraction
|
||||
- **Ground truth comparison**: Compare predictions against expected output
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
### Detection (CRAFT Algorithm)
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `text_threshold` | 0.7 | 0.0-1.0 | Text confidence threshold |
|
||||
| `low_text` | 0.4 | 0.0-1.0 | Text lower-bound score |
|
||||
| `link_threshold` | 0.4 | 0.0-1.0 | Link confidence threshold |
|
||||
|
||||
### Bounding Box Merging
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `slope_ths` | 0.1 | 0.0-1.0 | Max slope for merging |
|
||||
| `ycenter_ths` | 0.5 | 0.0-2.0 | Max vertical shift |
|
||||
| `height_ths` | 0.5 | 0.0-2.0 | Max height variance |
|
||||
| `width_ths` | 0.5 | 0.0-2.0 | Max horizontal distance |
|
||||
| `add_margin` | 0.1 | 0.0-1.0 | Bounding box extension |
|
||||
|
||||
### Contrast
|
||||
|
||||
| Parameter | Default | Range | Description |
|
||||
|-----------|---------|-------|-------------|
|
||||
| `contrast_ths` | 0.1 | 0.0-1.0 | Contrast threshold for dual-pass |
|
||||
| `adjust_contrast` | 0.5 | 0.0-1.0 | Target contrast level |
|
||||
|
||||
### Decoder
|
||||
|
||||
| Parameter | Default | Options | Description |
|
||||
|-----------|---------|---------|-------------|
|
||||
| `decoder` | "greedy" | greedy, beamsearch, wordbeamsearch | Decoding method |
|
||||
| `beamWidth` | 5 | 1-20 | Beam width (for beam search) |
|
||||
|
||||
### Other
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `min_size` | 10 | Minimum text box pixels |
|
||||
| `rotation_info` | null | Rotation angles to try: [90, 180, 270] |
|
||||
|
||||
## GPU Support
|
||||
|
||||
### Platform Support
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 (amd64) | ✅ | ✅ PyTorch CUDA |
|
||||
| Linux ARM64 (GH200/GB200/DGX Spark) | ✅ | ✅ PyTorch CUDA (cu128 index) |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
|
||||
### PyTorch CUDA on ARM64
|
||||
|
||||
Unlike PaddlePaddle, PyTorch provides **official ARM64 CUDA wheels** on the cu128 index:
|
||||
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
|
||||
```
|
||||
|
||||
This works on both amd64 and arm64 platforms with CUDA support.
|
||||
|
||||
### GPU Detection
|
||||
|
||||
EasyOCR automatically uses GPU when PyTorch CUDA is available:
|
||||
|
||||
```python
|
||||
import torch
|
||||
print(torch.cuda.is_available()) # True if GPU available
|
||||
```
|
||||
|
||||
The `/health` endpoint shows GPU status:
|
||||
```json
|
||||
{
|
||||
"cuda_available": true,
|
||||
"device": "cuda",
|
||||
"gpu_name": "NVIDIA GB10",
|
||||
"gpu_memory_total": "128.00 GB"
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `EASYOCR_LANGUAGES` | `es,en` | Comma-separated language codes |
|
||||
| `CUDA_VISIBLE_DEVICES` | `0` | GPU device selection |
|
||||
|
||||
## CI/CD
|
||||
|
||||
Built images available from registry:
|
||||
|
||||
| Image | Architecture |
|
||||
|-------|--------------|
|
||||
| `seryus.ddns.net/unir/easyocr-cpu:latest` | amd64, arm64 |
|
||||
| `seryus.ddns.net/unir/easyocr-gpu:latest` | amd64, arm64 |
|
||||
|
||||
## Sources
|
||||
|
||||
- [EasyOCR Documentation](https://www.jaided.ai/easyocr/documentation/)
|
||||
- [EasyOCR GitHub](https://github.com/JaidedAI/EasyOCR)
|
||||
- [PyTorch ARM64 CUDA Wheels](https://github.com/pytorch/pytorch/issues/160162)
|
||||
74
src/easyocr_service/dataset_manager.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Imports
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
self.samples = []
|
||||
|
||||
for folder in sorted(os.listdir(root)):
|
||||
sub = os.path.join(root, folder)
|
||||
img_dir = os.path.join(sub, "img")
|
||||
txt_dir = os.path.join(sub, "txt")
|
||||
|
||||
if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
|
||||
continue
|
||||
|
||||
for fname in sorted(os.listdir(img_dir)):
|
||||
if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
continue
|
||||
|
||||
img_path = os.path.join(img_dir, fname)
|
||||
|
||||
# text file must have same name but .txt
|
||||
txt_name = os.path.splitext(fname)[0] + ".txt"
|
||||
txt_path = os.path.join(txt_dir, txt_name)
|
||||
|
||||
if not os.path.exists(txt_path):
|
||||
continue
|
||||
|
||||
self.samples.append((img_path, txt_path))
|
||||
def __len__(self):
|
||||
return len(self.samples)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img_path, txt_path = self.samples[idx]
|
||||
|
||||
# Load image
|
||||
image = Image.open(img_path).convert("RGB")
|
||||
|
||||
# Load text
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
return image, text
|
||||
|
||||
def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
|
||||
"""Get output path for saving OCR result to debugset folder.
|
||||
|
||||
Args:
|
||||
idx: Sample index
|
||||
output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
|
||||
debugset_root: Root folder for debug output (default: /app/debugset)
|
||||
|
||||
Returns:
|
||||
Path like /app/debugset/doc1/{output_subdir}/page_001.txt
|
||||
"""
|
||||
img_path, _ = self.samples[idx]
|
||||
# img_path: /app/dataset/doc1/img/page_001.png
|
||||
# Extract relative path: doc1/img/page_001.png
|
||||
parts = img_path.split("/dataset/", 1)
|
||||
if len(parts) == 2:
|
||||
rel_path = parts[1] # doc1/img/page_001.png
|
||||
else:
|
||||
rel_path = os.path.basename(img_path)
|
||||
|
||||
# Replace /img/ with /{output_subdir}/
|
||||
rel_parts = rel_path.rsplit("/img/", 1)
|
||||
doc_folder = rel_parts[0] # doc1
|
||||
fname = os.path.splitext(rel_parts[1])[0] + ".txt" # page_001.txt
|
||||
|
||||
out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
return os.path.join(out_dir, fname)
|
||||
61
src/easyocr_service/docker-compose.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
# docker-compose.yml - EasyOCR REST API
|
||||
# Usage:
|
||||
# CPU: docker compose up ocr-cpu
|
||||
# GPU: docker compose up ocr-gpu
|
||||
#
|
||||
# Port: 8002
|
||||
|
||||
services:
|
||||
# CPU-only service
|
||||
ocr-cpu:
|
||||
image: seryus.ddns.net/unir/easyocr-cpu:latest
|
||||
container_name: easyocr-cpu
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- easyocr-cache:/root/.EasyOCR
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- EASYOCR_LANGUAGES=es,en
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
# GPU service (requires NVIDIA Container Toolkit)
|
||||
ocr-gpu:
|
||||
image: seryus.ddns.net/unir/easyocr-gpu:latest
|
||||
container_name: easyocr-gpu
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- easyocr-cache:/root/.EasyOCR
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- EASYOCR_LANGUAGES=es,en
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
volumes:
|
||||
easyocr-cache:
|
||||
name: easyocr-model-cache
|
||||
334
src/easyocr_service/easyocr_tuning_rest.py
Normal file
@@ -0,0 +1,334 @@
|
||||
# easyocr_tuning_rest.py
|
||||
# FastAPI REST service for EasyOCR hyperparameter evaluation
|
||||
# Usage: uvicorn easyocr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
from typing import Optional, List
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
import easyocr
|
||||
from jiwer import wer, cer
|
||||
from dataset_manager import ImageTextDataset
|
||||
|
||||
|
||||
def get_gpu_info() -> dict:
|
||||
"""Get GPU status information from PyTorch."""
|
||||
info = {
|
||||
"cuda_available": torch.cuda.is_available(),
|
||||
"device": "cuda" if torch.cuda.is_available() else "cpu",
|
||||
"gpu_count": 0,
|
||||
"gpu_name": None,
|
||||
"gpu_memory_total": None,
|
||||
"gpu_memory_used": None,
|
||||
}
|
||||
|
||||
if info["cuda_available"]:
|
||||
try:
|
||||
info["gpu_count"] = torch.cuda.device_count()
|
||||
if info["gpu_count"] > 0:
|
||||
info["gpu_name"] = torch.cuda.get_device_name(0)
|
||||
info["gpu_memory_total"] = f"{torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB"
|
||||
info["gpu_memory_used"] = f"{torch.cuda.memory_allocated(0) / (1024**3):.2f} GB"
|
||||
except Exception as e:
|
||||
info["gpu_error"] = str(e)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
# Model configuration via environment variables
|
||||
DEFAULT_LANGUAGES = os.environ.get("EASYOCR_LANGUAGES", "es,en").split(",")
|
||||
|
||||
|
||||
# Global state for model and dataset
|
||||
class AppState:
|
||||
reader: Optional[easyocr.Reader] = None
|
||||
dataset: Optional[ImageTextDataset] = None
|
||||
dataset_path: Optional[str] = None
|
||||
languages: List[str] = DEFAULT_LANGUAGES
|
||||
lock: threading.Lock = None # Protects OCR model from concurrent access
|
||||
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
state = AppState()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load EasyOCR model at startup."""
|
||||
gpu_info = get_gpu_info()
|
||||
print("=" * 50)
|
||||
print("GPU STATUS")
|
||||
print("=" * 50)
|
||||
print(f" CUDA available: {gpu_info['cuda_available']}")
|
||||
print(f" Device: {gpu_info['device']}")
|
||||
if gpu_info['cuda_available']:
|
||||
print(f" GPU count: {gpu_info['gpu_count']}")
|
||||
print(f" GPU name: {gpu_info['gpu_name']}")
|
||||
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
|
||||
print("=" * 50)
|
||||
|
||||
print(f"Loading EasyOCR models...")
|
||||
print(f" Languages: {state.languages}")
|
||||
state.reader = easyocr.Reader(
|
||||
state.languages,
|
||||
gpu=gpu_info['cuda_available'],
|
||||
)
|
||||
|
||||
if gpu_info['cuda_available']:
|
||||
gpu_after = get_gpu_info()
|
||||
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
|
||||
|
||||
print("Model loaded successfully!")
|
||||
yield
|
||||
state.reader = None
|
||||
state.dataset = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="EasyOCR Tuning API",
|
||||
description="REST API for EasyOCR hyperparameter evaluation",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
class EvaluateRequest(BaseModel):
|
||||
"""Request schema with all tunable EasyOCR hyperparameters."""
|
||||
pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
|
||||
|
||||
# Detection thresholds (CRAFT algorithm)
|
||||
text_threshold: float = Field(0.7, ge=0.0, le=1.0, description="Text confidence threshold")
|
||||
low_text: float = Field(0.4, ge=0.0, le=1.0, description="Text lower-bound score")
|
||||
link_threshold: float = Field(0.4, ge=0.0, le=1.0, description="Link confidence threshold")
|
||||
|
||||
# Bounding box merging
|
||||
slope_ths: float = Field(0.1, ge=0.0, le=1.0, description="Maximum slope for box merging")
|
||||
ycenter_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum vertical shift for merging")
|
||||
height_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum height variance for merging")
|
||||
width_ths: float = Field(0.5, ge=0.0, le=2.0, description="Maximum horizontal distance for merging")
|
||||
add_margin: float = Field(0.1, ge=0.0, le=1.0, description="Bounding box extension margin")
|
||||
|
||||
# Contrast handling
|
||||
contrast_ths: float = Field(0.1, ge=0.0, le=1.0, description="Contrast threshold for dual-pass")
|
||||
adjust_contrast: float = Field(0.5, ge=0.0, le=1.0, description="Target contrast adjustment level")
|
||||
|
||||
# Decoder options
|
||||
decoder: str = Field("greedy", description="Decoder type: greedy, beamsearch, wordbeamsearch")
|
||||
beamWidth: int = Field(5, ge=1, le=20, description="Beam width for beam search decoders")
|
||||
|
||||
# Other
|
||||
min_size: int = Field(10, ge=1, description="Minimum text box size in pixels")
|
||||
rotation_info: Optional[List[int]] = Field(None, description="Rotation angles to try: [90, 180, 270]")
|
||||
|
||||
# Page range
|
||||
start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
|
||||
end_page: int = Field(10, ge=1, description="End page index (exclusive)")
|
||||
save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
|
||||
|
||||
|
||||
class EvaluateResponse(BaseModel):
|
||||
"""Response schema matching CLI output."""
|
||||
CER: float
|
||||
WER: float
|
||||
TIME: float
|
||||
PAGES: int
|
||||
TIME_PER_PAGE: float
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
model_loaded: bool
|
||||
dataset_loaded: bool
|
||||
dataset_size: Optional[int] = None
|
||||
languages: Optional[List[str]] = None
|
||||
cuda_available: Optional[bool] = None
|
||||
device: Optional[str] = None
|
||||
gpu_name: Optional[str] = None
|
||||
gpu_memory_used: Optional[str] = None
|
||||
gpu_memory_total: Optional[str] = None
|
||||
|
||||
|
||||
def assemble_easyocr_result(result: list) -> str:
|
||||
"""
|
||||
Assemble EasyOCR result into text.
|
||||
EasyOCR returns: [(bbox, text, confidence), ...]
|
||||
"""
|
||||
if not result:
|
||||
return ""
|
||||
|
||||
# Sort by vertical position (y), then horizontal (x)
|
||||
# bbox format: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||
def get_y_center(item):
|
||||
bbox = item[0]
|
||||
return (bbox[0][1] + bbox[2][1]) / 2
|
||||
|
||||
def get_x(item):
|
||||
return item[0][0][0]
|
||||
|
||||
# Group by lines based on y-center
|
||||
sorted_items = sorted(result, key=lambda x: (get_y_center(x), get_x(x)))
|
||||
|
||||
if not sorted_items:
|
||||
return ""
|
||||
|
||||
# Adaptive line tolerance
|
||||
heights = []
|
||||
for item in sorted_items:
|
||||
bbox = item[0]
|
||||
h = abs(bbox[2][1] - bbox[0][1])
|
||||
heights.append(h)
|
||||
|
||||
median_h = float(np.median(heights)) if heights else 20.0
|
||||
line_tol = max(8.0, 0.6 * median_h)
|
||||
|
||||
lines, cur_line, last_y = [], [], None
|
||||
for item in sorted_items:
|
||||
y_center = get_y_center(item)
|
||||
text = item[1]
|
||||
|
||||
if last_y is None or abs(y_center - last_y) <= line_tol:
|
||||
cur_line.append((get_x(item), text))
|
||||
else:
|
||||
cur_line.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur_line))
|
||||
cur_line = [(get_x(item), text)]
|
||||
last_y = y_center
|
||||
|
||||
if cur_line:
|
||||
cur_line.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur_line))
|
||||
|
||||
text = " ".join(lines)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def evaluate_text(reference: str, prediction: str) -> dict:
|
||||
"""Calculate WER and CER metrics."""
|
||||
return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
def health_check():
|
||||
"""Check if the service is ready."""
|
||||
gpu_info = get_gpu_info()
|
||||
return HealthResponse(
|
||||
status="ok" if state.reader is not None else "initializing",
|
||||
model_loaded=state.reader is not None,
|
||||
dataset_loaded=state.dataset is not None,
|
||||
dataset_size=len(state.dataset) if state.dataset else None,
|
||||
languages=state.languages,
|
||||
cuda_available=gpu_info.get("cuda_available"),
|
||||
device=gpu_info.get("device"),
|
||||
gpu_name=gpu_info.get("gpu_name"),
|
||||
gpu_memory_used=gpu_info.get("gpu_memory_used"),
|
||||
gpu_memory_total=gpu_info.get("gpu_memory_total"),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate", response_model=EvaluateResponse)
|
||||
def evaluate(request: EvaluateRequest):
|
||||
"""
|
||||
Evaluate OCR with given hyperparameters.
|
||||
Returns CER, WER, and timing metrics.
|
||||
"""
|
||||
if state.reader is None:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
||||
|
||||
# Validate decoder
|
||||
if request.decoder not in ["greedy", "beamsearch", "wordbeamsearch"]:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid decoder: {request.decoder}")
|
||||
|
||||
# Load or reload dataset if path changed
|
||||
if state.dataset is None or state.dataset_path != request.pdf_folder:
|
||||
if not os.path.isdir(request.pdf_folder):
|
||||
raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
|
||||
state.dataset = ImageTextDataset(request.pdf_folder)
|
||||
state.dataset_path = request.pdf_folder
|
||||
|
||||
if len(state.dataset) == 0:
|
||||
raise HTTPException(status_code=400, detail="Dataset is empty")
|
||||
|
||||
# Validate page range
|
||||
start = request.start_page
|
||||
end = min(request.end_page, len(state.dataset))
|
||||
if start >= end:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
|
||||
|
||||
cer_list, wer_list = [], []
|
||||
time_per_page_list = []
|
||||
t0 = time.time()
|
||||
|
||||
# Lock to prevent concurrent OCR access (model is not thread-safe)
|
||||
with state.lock:
|
||||
for idx in range(start, end):
|
||||
img, ref = state.dataset[idx]
|
||||
arr = np.array(img)
|
||||
|
||||
tp0 = time.time()
|
||||
result = state.reader.readtext(
|
||||
arr,
|
||||
# Detection thresholds
|
||||
text_threshold=request.text_threshold,
|
||||
low_text=request.low_text,
|
||||
link_threshold=request.link_threshold,
|
||||
# Bounding box merging
|
||||
slope_ths=request.slope_ths,
|
||||
ycenter_ths=request.ycenter_ths,
|
||||
height_ths=request.height_ths,
|
||||
width_ths=request.width_ths,
|
||||
add_margin=request.add_margin,
|
||||
# Contrast
|
||||
contrast_ths=request.contrast_ths,
|
||||
adjust_contrast=request.adjust_contrast,
|
||||
# Decoder
|
||||
decoder=request.decoder,
|
||||
beamWidth=request.beamWidth,
|
||||
# Other
|
||||
min_size=request.min_size,
|
||||
rotation_info=request.rotation_info,
|
||||
)
|
||||
|
||||
pred = assemble_easyocr_result(result)
|
||||
time_per_page_list.append(float(time.time() - tp0))
|
||||
|
||||
# Save prediction to debugset if requested
|
||||
if request.save_output:
|
||||
out_path = state.dataset.get_output_path(idx, "easyocr_text")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
f.write(pred)
|
||||
|
||||
m = evaluate_text(ref, pred)
|
||||
cer_list.append(m["CER"])
|
||||
wer_list.append(m["WER"])
|
||||
|
||||
return EvaluateResponse(
|
||||
CER=float(np.mean(cer_list)) if cer_list else 1.0,
|
||||
WER=float(np.mean(wer_list)) if wer_list else 1.0,
|
||||
TIME=float(time.time() - t0),
|
||||
PAGES=len(cer_list),
|
||||
TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate_full", response_model=EvaluateResponse)
|
||||
def evaluate_full(request: EvaluateRequest):
|
||||
"""Evaluate on ALL pages (ignores start_page/end_page)."""
|
||||
request.start_page = 0
|
||||
request.end_page = 9999
|
||||
return evaluate(request)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
8
src/easyocr_service/requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
easyocr>=1.7.0
|
||||
fastapi>=0.104.0
|
||||
uvicorn>=0.24.0
|
||||
pydantic>=2.0.0
|
||||
jiwer>=3.0.0
|
||||
numpy>=1.24.0
|
||||
pillow>=10.0.0
|
||||
torch>=2.0.0
|
||||
213
src/paddle_ocr/Dockerfile.build-paddle
Normal file
@@ -0,0 +1,213 @@
|
||||
# Dockerfile.build-paddle - Build PaddlePaddle GPU wheel for ARM64
|
||||
#
|
||||
# This Dockerfile compiles PaddlePaddle from source with CUDA support for ARM64.
|
||||
# The resulting wheel can be used in Dockerfile.gpu for ARM64 GPU acceleration.
|
||||
#
|
||||
# Build time: ~1-2 hours with caching, 2-4 hours first build
|
||||
# Output: /output/paddlepaddle_gpu-*.whl
|
||||
#
|
||||
# Usage:
|
||||
# CUDA_ARCH=90 docker compose --profile build run --rm build-paddle
|
||||
#
|
||||
# Features:
|
||||
# - ccache for compiler caching (survives rebuilds)
|
||||
# - Split build stages for better layer caching
|
||||
# - ARM64 -m64 patch applied automatically
|
||||
|
||||
# syntax=docker/dockerfile:1.4
|
||||
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddlePaddle GPU wheel builder for ARM64"
|
||||
|
||||
# Build arguments
|
||||
ARG PADDLE_VERSION=v3.0.0
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG CUDA_ARCH=90
|
||||
|
||||
# Environment setup
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
# Install build dependencies + ccache
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python3-pip \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
pkg-config \
|
||||
ccache \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
zlib1g-dev \
|
||||
libbz2-dev \
|
||||
libreadline-dev \
|
||||
libsqlite3-dev \
|
||||
liblzma-dev \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
libgflags-dev \
|
||||
libgoogle-glog-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
patchelf \
|
||||
libopenblas-dev \
|
||||
liblapack-dev \
|
||||
swig \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
||||
|
||||
# Setup ccache symlinks for CUDA
|
||||
RUN mkdir -p /usr/lib/ccache && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/nvcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
|
||||
|
||||
# Upgrade pip and install Python build dependencies
|
||||
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Clone PaddlePaddle repository
|
||||
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
|
||||
# Patch for ARM64: Remove -m64 flag (x86_64 specific, causes build failure on aarch64)
|
||||
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
|
||||
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
|
||||
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true && \
|
||||
echo "Patched -m64 flag for ARM64 compatibility"
|
||||
|
||||
# Patch for ARM64: Install sse2neon to translate x86 SSE intrinsics to ARM NEON
|
||||
# sse2neon provides drop-in replacements for x86 SIMD headers
|
||||
RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
|
||||
mkdir -p /usr/local/include/sse2neon && \
|
||||
cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
|
||||
rm -rf /tmp/sse2neon && \
|
||||
echo "Installed sse2neon for x86->ARM NEON translation"
|
||||
|
||||
# Create wrapper headers that use sse2neon for ARM64
|
||||
RUN mkdir -p /usr/local/include/x86_stubs && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#include_next <immintrin.h>" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/immintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#include_next <xmmintrin.h>" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/xmmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#include_next <emmintrin.h>" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/emmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#include_next <pmmintrin.h>" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/pmmintrin.h && \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#include_next <smmintrin.h>" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/smmintrin.h && \
|
||||
echo "Created x86 intrinsic wrapper headers for ARM64 using sse2neon"
|
||||
|
||||
# Install additional Python requirements for building
|
||||
RUN pip install -r python/requirements.txt || true
|
||||
|
||||
# Create build directory
|
||||
RUN mkdir -p build
|
||||
WORKDIR /build/Paddle/build
|
||||
|
||||
# Configure CMake for ARM64 + CUDA build
|
||||
# Note: -Wno-class-memaccess fixes Eigen NEON warning on ARM64
|
||||
RUN echo "Building for CUDA architecture: sm_${CUDA_ARCH}" && \
|
||||
cmake .. \
|
||||
-GNinja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DPY_VERSION=${PYTHON_VERSION} \
|
||||
-DWITH_GPU=ON \
|
||||
-DWITH_TESTING=OFF \
|
||||
-DWITH_DISTRIBUTE=OFF \
|
||||
-DWITH_NCCL=OFF \
|
||||
-DWITH_MKL=OFF \
|
||||
-DWITH_MKLDNN=OFF \
|
||||
-DON_INFER=OFF \
|
||||
-DWITH_PYTHON=ON \
|
||||
-DWITH_AVX=OFF \
|
||||
-DCUDA_ARCH_NAME=Manual \
|
||||
-DCUDA_ARCH_BIN="${CUDA_ARCH}" \
|
||||
-DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCH}" \
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs" \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||
|
||||
# Build external dependencies first (cacheable layer)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
|
||||
|
||||
# Build flashattn (heaviest dependency, separate layer for caching)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_flashattn
|
||||
|
||||
# Build remaining external dependencies
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo extern_xbyak
|
||||
|
||||
# Build main PaddlePaddle (with ccache, fallback to fewer jobs if OOM)
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
||||
|
||||
# Build the Python wheel
|
||||
RUN ninja paddle_python || true
|
||||
|
||||
# Create output directory
|
||||
RUN mkdir -p /output
|
||||
|
||||
# Build wheel package - try multiple methods since PaddlePaddle build structure varies
|
||||
WORKDIR /build/Paddle
|
||||
RUN echo "=== Looking for wheel build method ===" && \
|
||||
ls -la python/ 2>/dev/null && \
|
||||
ls -la build/python/ 2>/dev/null && \
|
||||
if [ -f build/python/setup.py ]; then \
|
||||
echo "Using build/python/setup.py" && \
|
||||
cd build/python && python setup.py bdist_wheel; \
|
||||
elif [ -f python/setup.py ]; then \
|
||||
echo "Using python/setup.py" && \
|
||||
cd python && python setup.py bdist_wheel; \
|
||||
else \
|
||||
echo "Looking for existing wheel..." && \
|
||||
find /build -name "paddlepaddle*.whl" -type f 2>/dev/null; \
|
||||
fi
|
||||
|
||||
# Copy wheel to output
|
||||
RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
|
||||
ls -la /output/ && \
|
||||
if [ ! "$(ls -A /output/*.whl 2>/dev/null)" ]; then \
|
||||
echo "ERROR: No wheel found!" && exit 1; \
|
||||
fi
|
||||
|
||||
# List what was built
|
||||
RUN ls -la /output/ && \
|
||||
echo "=== Build complete ===" && \
|
||||
find /build -name "*.whl" -type f 2>/dev/null
|
||||
|
||||
# Default command: copy wheel to mounted volume
|
||||
CMD ["sh", "-c", "cp /output/*.whl /wheels/ 2>/dev/null && echo 'Wheel copied to /wheels/' && ls -la /wheels/ || echo 'No wheel found in /output, checking other locations...' && find /build -name '*.whl' -exec cp {} /wheels/ \\; && ls -la /wheels/"]
|
||||
149
src/paddle_ocr/Dockerfile.build-paddle-cpu
Normal file
@@ -0,0 +1,149 @@
|
||||
# Dockerfile.build-paddle-cpu - Build PaddlePaddle CPU wheel for ARM64
|
||||
#
|
||||
# Required because PyPI wheels don't work on ARM64 (x86 SSE instructions).
|
||||
#
|
||||
# Build time: ~1-2 hours
|
||||
# Output: /output/paddlepaddle-*.whl
|
||||
#
|
||||
# Usage:
|
||||
# docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
|
||||
# docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
|
||||
|
||||
# syntax=docker/dockerfile:1.4
|
||||
FROM ubuntu:22.04
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddlePaddle CPU wheel builder for ARM64"
|
||||
|
||||
ARG PADDLE_VERSION=v3.0.0
|
||||
ARG PYTHON_VERSION=3.11
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CCACHE_DIR=/ccache
|
||||
ENV PATH="/usr/lib/ccache:${PATH}"
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
python${PYTHON_VERSION}-venv \
|
||||
python3-pip \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
pkg-config \
|
||||
ccache \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
zlib1g-dev \
|
||||
libbz2-dev \
|
||||
libreadline-dev \
|
||||
libsqlite3-dev \
|
||||
liblzma-dev \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
libgflags-dev \
|
||||
libgoogle-glog-dev \
|
||||
libprotobuf-dev \
|
||||
protobuf-compiler \
|
||||
patchelf \
|
||||
libopenblas-dev \
|
||||
liblapack-dev \
|
||||
swig \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
|
||||
|
||||
# Setup ccache
|
||||
RUN mkdir -p /usr/lib/ccache && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/gcc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/g++ && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/cc && \
|
||||
ln -sf /usr/bin/ccache /usr/lib/ccache/c++
|
||||
|
||||
RUN python -m pip install --upgrade pip setuptools wheel && \
|
||||
python -m pip install numpy protobuf pyyaml requests packaging astor decorator paddle-bfloat opt-einsum
|
||||
|
||||
WORKDIR /build
|
||||
RUN git clone --depth 1 --branch ${PADDLE_VERSION} https://github.com/PaddlePaddle/Paddle.git
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
|
||||
# Patch -m64 flag (x86_64 specific)
|
||||
RUN sed -i 's/-m64//g' cmake/flags.cmake && \
|
||||
sed -i 's/-m64//g' CMakeLists.txt 2>/dev/null || true && \
|
||||
find . -name "*.cmake" -exec sed -i 's/-m64//g' {} \; 2>/dev/null || true
|
||||
|
||||
# Install sse2neon for x86 SSE -> ARM NEON translation
|
||||
RUN git clone --depth 1 https://github.com/DLTcollab/sse2neon.git /tmp/sse2neon && \
|
||||
mkdir -p /usr/local/include/sse2neon && \
|
||||
cp /tmp/sse2neon/sse2neon.h /usr/local/include/sse2neon/ && \
|
||||
rm -rf /tmp/sse2neon
|
||||
|
||||
# Create x86 intrinsic wrapper headers
|
||||
RUN mkdir -p /usr/local/include/x86_stubs && \
|
||||
for h in immintrin xmmintrin emmintrin pmmintrin smmintrin; do \
|
||||
echo "#ifndef __x86_64__" > /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#include <sse2neon/sse2neon.h>" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#else" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#include_next <${h}.h>" >> /usr/local/include/x86_stubs/${h}.h && \
|
||||
echo "#endif" >> /usr/local/include/x86_stubs/${h}.h; \
|
||||
done
|
||||
|
||||
RUN pip install -r python/requirements.txt || true
|
||||
|
||||
RUN mkdir -p build
|
||||
WORKDIR /build/Paddle/build
|
||||
|
||||
# Configure for CPU-only ARM64 build
|
||||
# WITH_ARM=ON enables ARM NEON optimizations and disables x86-specific code (XBYAK, MKL)
|
||||
RUN cmake .. \
|
||||
-GNinja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DPY_VERSION=${PYTHON_VERSION} \
|
||||
-DWITH_GPU=OFF \
|
||||
-DWITH_ARM=ON \
|
||||
-DWITH_TESTING=OFF \
|
||||
-DWITH_DISTRIBUTE=OFF \
|
||||
-DWITH_NCCL=OFF \
|
||||
-DWITH_MKL=OFF \
|
||||
-DWITH_MKLDNN=OFF \
|
||||
-DWITH_XBYAK=OFF \
|
||||
-DON_INFER=OFF \
|
||||
-DWITH_PYTHON=ON \
|
||||
-DWITH_AVX=OFF \
|
||||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_CXX_FLAGS="-Wno-class-memaccess -Wno-error=class-memaccess -I/usr/local/include/x86_stubs"
|
||||
|
||||
# Build external dependencies
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_gflags extern_glog extern_protobuf extern_zlib extern_eigen3
|
||||
|
||||
# Note: extern_xbyak excluded - it's x86-only and disabled with WITH_ARM=ON
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja extern_openblas extern_pybind extern_utf8proc extern_xxhash extern_yaml extern_cryptopp extern_warpctc extern_warprnnt extern_gloo
|
||||
|
||||
# Build PaddlePaddle
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
ninja -j$(nproc) || ninja -j$(($(nproc)/2)) || ninja -j4
|
||||
|
||||
RUN ninja paddle_python || true
|
||||
|
||||
RUN mkdir -p /output
|
||||
|
||||
WORKDIR /build/Paddle
|
||||
RUN if [ -f build/python/setup.py ]; then \
|
||||
cd build/python && python setup.py bdist_wheel; \
|
||||
elif [ -f python/setup.py ]; then \
|
||||
cd python && python setup.py bdist_wheel; \
|
||||
fi
|
||||
|
||||
RUN find /build -name "paddlepaddle*.whl" -type f -exec cp {} /output/ \; && \
|
||||
ls -la /output/
|
||||
|
||||
CMD ["sh", "-c", "cp /output/*.whl /wheels/ && ls -la /wheels/"]
|
||||
81
src/paddle_ocr/Dockerfile.cpu
Normal file
@@ -0,0 +1,81 @@
|
||||
# Dockerfile.cpu - Multi-stage CPU Dockerfile
|
||||
#
|
||||
# Build base only (push to registry, rarely changes):
|
||||
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-cpu-base:latest -f Dockerfile.cpu .
|
||||
#
|
||||
# Build deploy (uses base, fast - code only):
|
||||
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-cpu:latest -f Dockerfile.cpu .
|
||||
#
|
||||
# Or build all at once:
|
||||
# docker build -t paddle-ocr-api:cpu -f Dockerfile.cpu .
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||
# =============================================================================
|
||||
FROM python:3.11-slim AS base
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddleOCR Base Image - CPU dependencies"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for OpenCV and PaddleOCR
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy local wheels directory (may contain ARM64 wheel from build-paddle-cpu)
|
||||
COPY wheels/ /tmp/wheels/
|
||||
|
||||
# Install paddlepaddle: prefer local wheel (ARM64), fallback to PyPI (x86_64)
|
||||
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
||||
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
||||
pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
||||
else \
|
||||
echo "=== Installing PaddlePaddle from PyPI (x86_64) ===" && \
|
||||
pip install --no-cache-dir paddlepaddle==3.0.0; \
|
||||
fi && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# Install remaining Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||
# =============================================================================
|
||||
FROM base AS deploy
|
||||
|
||||
LABEL description="PaddleOCR Tuning REST API - CPU version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code (this is the only layer that changes frequently)
|
||||
COPY paddle_ocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Build arguments for models
|
||||
ARG DET_MODEL=PP-OCRv5_server_det
|
||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||
|
||||
# Set as environment variables (can be overridden at runtime)
|
||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
105
src/paddle_ocr/Dockerfile.gpu
Normal file
@@ -0,0 +1,105 @@
|
||||
# Dockerfile.gpu - Multi-stage GPU Dockerfile
|
||||
#
|
||||
# Build base only (push to registry, rarely changes):
|
||||
# docker build --target base -t seryus.ddns.net/unir/paddle-ocr-gpu-base:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Build deploy (uses base, fast - code only):
|
||||
# docker build --target deploy -t seryus.ddns.net/unir/paddle-ocr-gpu:latest -f Dockerfile.gpu .
|
||||
#
|
||||
# Or build all at once:
|
||||
# docker build -t paddle-ocr-api:gpu -f Dockerfile.gpu .
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 1: BASE - All dependencies (rarely changes)
|
||||
# =============================================================================
|
||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base
|
||||
|
||||
LABEL maintainer="Sergio Jimenez"
|
||||
LABEL description="PaddleOCR Base Image - GPU/CUDA dependencies"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
# Install Python 3.11 and system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3.11 \
|
||||
python3.11-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& ln -sf /usr/bin/python3.11 /usr/bin/python
|
||||
|
||||
# Fix cuDNN library path for ARM64 only (PaddlePaddle looks in /usr/local/cuda/lib64)
|
||||
# x86_64 doesn't need this - PyPI wheel handles paths correctly
|
||||
RUN if [ "$(uname -m)" = "aarch64" ]; then \
|
||||
mkdir -p /usr/local/cuda/lib64 && \
|
||||
ln -sf /usr/lib/aarch64-linux-gnu/libcudnn*.so* /usr/local/cuda/lib64/ && \
|
||||
ln -sf /usr/lib/aarch64-linux-gnu/libcudnn.so.9 /usr/local/cuda/lib64/libcudnn.so && \
|
||||
ldconfig; \
|
||||
fi
|
||||
|
||||
# Copy local wheels directory (may contain ARM64 wheel from build-paddle)
|
||||
COPY wheels/ /tmp/wheels/
|
||||
|
||||
# Install paddlepaddle: prefer local wheel (ARM64), fallback to CUDA index (x86_64)
|
||||
RUN if ls /tmp/wheels/paddlepaddle*.whl 1>/dev/null 2>&1; then \
|
||||
echo "=== Installing PaddlePaddle from local wheel (ARM64) ===" && \
|
||||
python -m pip install --no-cache-dir /tmp/wheels/paddlepaddle*.whl; \
|
||||
else \
|
||||
echo "=== Installing PaddlePaddle from CUDA index (x86_64) ===" && \
|
||||
python -m pip install --no-cache-dir paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/; \
|
||||
fi && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# Install remaining dependencies
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
paddleocr==3.3.2 \
|
||||
jiwer \
|
||||
numpy \
|
||||
fastapi \
|
||||
"uvicorn[standard]" \
|
||||
pydantic \
|
||||
Pillow
|
||||
|
||||
# =============================================================================
|
||||
# STAGE 2: DEPLOY - Application code (changes frequently)
|
||||
# =============================================================================
|
||||
FROM base AS deploy
|
||||
|
||||
LABEL description="PaddleOCR Tuning REST API - GPU/CUDA version"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy application code (this is the only layer that changes frequently)
|
||||
COPY paddle_ocr_tuning_rest.py .
|
||||
COPY dataset_manager.py .
|
||||
|
||||
# Build arguments for models
|
||||
ARG DET_MODEL=PP-OCRv5_server_det
|
||||
ARG REC_MODEL=PP-OCRv5_server_rec
|
||||
|
||||
# Set as environment variables (can be overridden at runtime)
|
||||
ENV PADDLE_DET_MODEL=${DET_MODEL}
|
||||
ENV PADDLE_REC_MODEL=${REC_MODEL}
|
||||
|
||||
# Volume for dataset and model cache
|
||||
VOLUME ["/app/dataset", "/root/.paddlex"]
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
||||
|
||||
# Run the API server
|
||||
CMD ["uvicorn", "paddle_ocr_tuning_rest:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
824
src/paddle_ocr/README.md
Normal file
@@ -0,0 +1,824 @@
|
||||
# PaddleOCR Tuning REST API
|
||||
|
||||
REST API service for PaddleOCR hyperparameter evaluation. Keeps the model loaded in memory for fast repeated evaluations during hyperparameter search.
|
||||
|
||||
## Quick Start with Docker Compose
|
||||
|
||||
Docker Compose manages building and running containers. The `docker-compose.yml` defines two services:
|
||||
- `ocr-cpu` - CPU-only version (works everywhere)
|
||||
- `ocr-gpu` - GPU version (requires NVIDIA GPU + Container Toolkit)
|
||||
|
||||
### Run CPU Version
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build and start (first time takes ~2-3 min to build, ~30s to load model)
|
||||
docker compose up ocr-cpu
|
||||
|
||||
# Or run in background (detached)
|
||||
docker compose up -d ocr-cpu
|
||||
|
||||
# View logs
|
||||
docker compose logs -f ocr-cpu
|
||||
|
||||
# Stop
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### Run GPU Version
|
||||
|
||||
```bash
|
||||
# Requires: NVIDIA GPU + nvidia-container-toolkit installed
|
||||
docker compose up ocr-gpu
|
||||
```
|
||||
|
||||
### Test the API
|
||||
|
||||
Once running, test with:
|
||||
```bash
|
||||
# Check health
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Or use the test script
|
||||
pip install requests
|
||||
python test.py --url http://localhost:8000
|
||||
```
|
||||
|
||||
### What Docker Compose Does
|
||||
|
||||
```
|
||||
docker compose up ocr-cpu
|
||||
│
|
||||
├─► Builds image from Dockerfile.cpu (if not exists)
|
||||
├─► Creates container "paddle-ocr-cpu"
|
||||
├─► Mounts ../dataset → /app/dataset (your PDF images)
|
||||
├─► Mounts paddlex-cache volume (persists downloaded models)
|
||||
├─► Exposes port 8000
|
||||
└─► Runs: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
| File | Description |
|
||||
|------|-------------|
|
||||
| `paddle_ocr_tuning_rest.py` | FastAPI REST service |
|
||||
| `dataset_manager.py` | Dataset loader |
|
||||
| `test.py` | API test client |
|
||||
| `Dockerfile.cpu` | CPU-only image (x86_64 + ARM64 with local wheel) |
|
||||
| `Dockerfile.gpu` | GPU/CUDA image (x86_64 + ARM64 with local wheel) |
|
||||
| `Dockerfile.build-paddle` | PaddlePaddle GPU wheel builder for ARM64 |
|
||||
| `Dockerfile.build-paddle-cpu` | PaddlePaddle CPU wheel builder for ARM64 |
|
||||
| `docker-compose.yml` | Service orchestration |
|
||||
| `docker-compose.cpu-registry.yml` | Pull CPU image from registry |
|
||||
| `docker-compose.gpu-registry.yml` | Pull GPU image from registry |
|
||||
| `wheels/` | Local PaddlePaddle wheels (created by build-paddle) |
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### `GET /health`
|
||||
Check if service is ready.
|
||||
|
||||
```json
|
||||
{"status": "ok", "model_loaded": true, "dataset_loaded": true, "dataset_size": 24}
|
||||
```
|
||||
|
||||
### `POST /evaluate`
|
||||
Run OCR evaluation with given hyperparameters.
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"textline_orientation": true,
|
||||
"use_doc_orientation_classify": false,
|
||||
"use_doc_unwarping": false,
|
||||
"text_det_thresh": 0.469,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.635,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{"CER": 0.0115, "WER": 0.0989, "TIME": 330.5, "PAGES": 5, "TIME_PER_PAGE": 66.1}
|
||||
```
|
||||
|
||||
### `POST /evaluate_full`
|
||||
Same as `/evaluate` but runs on ALL pages (ignores start_page/end_page).
|
||||
|
||||
## Debug Output (debugset)
|
||||
|
||||
The `debugset` folder allows saving OCR predictions for debugging and analysis. When `save_output=True` is passed to `/evaluate`, predictions are written to `/app/debugset`.
|
||||
|
||||
### Enable Debug Output
|
||||
|
||||
```json
|
||||
{
|
||||
"pdf_folder": "/app/dataset",
|
||||
"save_output": true,
|
||||
"start_page": 5,
|
||||
"end_page": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Output Structure
|
||||
|
||||
```
|
||||
debugset/
|
||||
├── doc1/
|
||||
│ └── paddle_ocr/
|
||||
│ ├── page_0005.txt
|
||||
│ ├── page_0006.txt
|
||||
│ └── ...
|
||||
├── doc2/
|
||||
│ └── paddle_ocr/
|
||||
│ └── ...
|
||||
```
|
||||
|
||||
Each `.txt` file contains the OCR-extracted text for that page.
|
||||
|
||||
### Docker Mount
|
||||
|
||||
The `debugset` folder is mounted read-write in docker-compose:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- ../debugset:/app/debugset:rw
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
|
||||
- **Compare OCR engines**: Run same pages through PaddleOCR, DocTR, EasyOCR with `save_output=True`, then diff results
|
||||
- **Debug hyperparameters**: See how different settings affect text extraction
|
||||
- **Ground truth comparison**: Compare predictions against expected output
|
||||
|
||||
## Building Images
|
||||
|
||||
### CPU Image (Multi-Architecture)
|
||||
|
||||
```bash
|
||||
# Local build (current architecture)
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
|
||||
|
||||
# Multi-arch build with buildx (amd64 + arm64)
|
||||
docker buildx create --name multiarch --use
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t paddle-ocr-api:cpu \
|
||||
--push .
|
||||
```
|
||||
|
||||
### GPU Image (x86_64 + ARM64 with local wheel)
|
||||
|
||||
```bash
|
||||
docker build -f Dockerfile.gpu -t paddle-ocr-api:gpu .
|
||||
```
|
||||
|
||||
> **Note:** PaddlePaddle GPU 3.x packages are **not on PyPI**. The Dockerfile installs from PaddlePaddle's official CUDA index (`paddlepaddle.org.cn/packages/stable/cu126/`). This is handled automatically during build.
|
||||
|
||||
## Running
|
||||
|
||||
### CPU (Any machine)
|
||||
|
||||
```bash
|
||||
docker run -d -p 8000:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v paddlex-cache:/root/.paddlex \
|
||||
paddle-ocr-api:cpu
|
||||
```
|
||||
|
||||
### GPU (NVIDIA)
|
||||
|
||||
```bash
|
||||
docker run -d -p 8000:8000 --gpus all \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
-v paddlex-cache:/root/.paddlex \
|
||||
paddle-ocr-api:gpu
|
||||
```
|
||||
|
||||
## GPU Support Analysis
|
||||
|
||||
### Host System Reference (DGX Spark)
|
||||
|
||||
This section documents GPU support findings based on testing on an NVIDIA DGX Spark:
|
||||
|
||||
| Component | Value |
|
||||
|-----------|-------|
|
||||
| Architecture | ARM64 (aarch64) |
|
||||
| CPU | NVIDIA Grace (ARM) |
|
||||
| GPU | NVIDIA GB10 |
|
||||
| CUDA Version | 13.0 |
|
||||
| Driver | 580.95.05 |
|
||||
| OS | Ubuntu 24.04 LTS |
|
||||
| Container Toolkit | nvidia-container-toolkit 1.18.1 |
|
||||
| Docker | 28.5.1 |
|
||||
| Docker Compose | v2.40.0 |
|
||||
|
||||
### PaddlePaddle GPU Platform Support
|
||||
|
||||
**Note:** PaddlePaddle-GPU does NOT have prebuilt ARM64 wheels on PyPI, but ARM64 support is available via custom-built wheels.
|
||||
|
||||
| Platform | CPU | GPU |
|
||||
|----------|-----|-----|
|
||||
| Linux x86_64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
|
||||
| Windows x64 | ✅ | ✅ CUDA 10.2/11.x/12.x |
|
||||
| macOS x64 | ✅ | ❌ |
|
||||
| macOS ARM64 (M1/M2) | ✅ | ❌ |
|
||||
| Linux ARM64 (Jetson/DGX) | ✅ | ⚠️ Limited - see Blackwell note |
|
||||
|
||||
**Source:** [PaddlePaddle-GPU PyPI](https://pypi.org/project/paddlepaddle-gpu/) - only `manylinux_x86_64` and `win_amd64` wheels available on PyPI. ARM64 wheels must be built from source or downloaded from Gitea packages.
|
||||
|
||||
### ARM64 GPU Support
|
||||
|
||||
ARM64 GPU support is available but requires custom-built wheels:
|
||||
|
||||
1. **No prebuilt PyPI wheels**: `pip install paddlepaddle-gpu` fails on ARM64 - no compatible wheels exist on PyPI
|
||||
2. **Custom wheels work**: This project provides Dockerfiles to build ARM64 GPU wheels from source
|
||||
3. **CI/CD builds ARM64 GPU images**: Pre-built wheels are available from Gitea packages
|
||||
|
||||
**To use GPU on ARM64:**
|
||||
- Use the pre-built images from the container registry, or
|
||||
- Build the wheel locally using `Dockerfile.build-paddle` (see Option 2 below), or
|
||||
- Download the wheel from Gitea packages: `wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl`
|
||||
|
||||
### ⚠️ Known Limitation: Blackwell GPU (sm_121 / GB10)
|
||||
|
||||
**Status: GPU inference does NOT work on NVIDIA Blackwell GPUs (DGX Spark, GB200, etc.)**
|
||||
|
||||
#### Symptoms
|
||||
|
||||
When running PaddleOCR on Blackwell GPUs:
|
||||
- CUDA loads successfully ✅
|
||||
- Basic tensor operations work ✅
|
||||
- **Detection model outputs constant values** ❌
|
||||
- 0 text regions detected
|
||||
- CER/WER = 100% (nothing recognized)
|
||||
|
||||
#### Root Cause
|
||||
|
||||
**Confirmed:** PaddlePaddle's entire CUDA backend does NOT support Blackwell (sm_121). This is NOT just an inference model problem - even basic operations fail.
|
||||
|
||||
**Test Results (January 2026):**
|
||||
|
||||
1. **PTX JIT Test** (`CUDA_FORCE_PTX_JIT=1`):
|
||||
```
|
||||
OSError: CUDA error(209), no kernel image is available for execution on the device.
|
||||
[Hint: 'cudaErrorNoKernelImageForDevice']
|
||||
```
|
||||
→ Confirmed: No PTX code exists in PaddlePaddle binaries
|
||||
|
||||
2. **Dynamic Graph Mode Test** (bypassing inference models):
|
||||
```
|
||||
Conv2D + BatchNorm output:
|
||||
Output min: 0.0000
|
||||
Output max: 0.0000
|
||||
Output mean: 0.0000
|
||||
Dynamic graph mode: BROKEN (constant output)
|
||||
```
|
||||
→ Confirmed: Even simple nn.Conv2D produces zeros on Blackwell
|
||||
|
||||
**Conclusion:** The issue is PaddlePaddle's compiled CUDA kernels (cubins), not just the inference models. The entire framework was compiled without sm_121 support and without PTX for JIT compilation.
|
||||
|
||||
**Why building PaddlePaddle from source doesn't fix it:**
|
||||
|
||||
1. ⚠️ Building with `CUDA_ARCH=121` requires CUDA 13.0+ (PaddlePaddle only supports up to CUDA 12.6)
|
||||
2. ❌ Even if you could build it, PaddleOCR models contain pre-compiled CUDA ops
|
||||
3. ❌ These model files were exported/compiled targeting sm_80/sm_90 architectures
|
||||
4. ❌ The model kernels execute on GPU but produce garbage output on sm_121
|
||||
|
||||
**To truly fix this**, the PaddlePaddle team would need to:
|
||||
1. Add sm_121 to their model export pipeline
|
||||
2. Re-export all PaddleOCR models (PP-OCRv4, PP-OCRv5, etc.) with Blackwell support
|
||||
3. Release new model versions
|
||||
|
||||
This is tracked in [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327).
|
||||
|
||||
#### Debug Script
|
||||
|
||||
Use the included debug script to verify this issue:
|
||||
|
||||
```bash
|
||||
docker exec paddle-ocr-gpu python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
|
||||
```
|
||||
|
||||
Expected output showing the problem:
|
||||
```
|
||||
OUTPUT ANALYSIS:
|
||||
Shape: (1, 1, 640, 640)
|
||||
Min: 0.000010
|
||||
Max: 0.000010 # <-- Same as min = constant output
|
||||
Mean: 0.000010
|
||||
|
||||
DIAGNOSIS:
|
||||
PROBLEM: Output is constant - model inference is broken!
|
||||
This typically indicates GPU compute capability mismatch.
|
||||
```
|
||||
|
||||
#### Workarounds
|
||||
|
||||
1. **Use CPU mode** (recommended):
|
||||
```bash
|
||||
docker compose up ocr-cpu
|
||||
```
|
||||
The ARM Grace CPU is fast (~2-5 sec/page). This is the reliable option.
|
||||
|
||||
2. **Use EasyOCR or DocTR with GPU**:
|
||||
These use PyTorch which has official ARM64 CUDA wheels (cu128 index):
|
||||
```bash
|
||||
# EasyOCR with GPU on DGX Spark
|
||||
docker build -f ../easyocr_service/Dockerfile.gpu -t easyocr-gpu ../easyocr_service
|
||||
docker run --gpus all -p 8002:8000 easyocr-gpu
|
||||
```
|
||||
|
||||
3. **Wait for PaddlePaddle Blackwell support**:
|
||||
Track [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327) for updates.
|
||||
|
||||
#### GPU Support Matrix (Updated)
|
||||
|
||||
| GPU Architecture | Compute | CPU | GPU |
|
||||
|------------------|---------|-----|-----|
|
||||
| Ampere (A100, A10) | sm_80 | ✅ | ✅ |
|
||||
| Hopper (H100, H200) | sm_90 | ✅ | ✅ |
|
||||
| **Blackwell (GB10, GB200)** | sm_121 | ✅ | ❌ Not supported |
|
||||
|
||||
#### FAQ: Why Doesn't CUDA Backward Compatibility Work?
|
||||
|
||||
**Q: CUDA normally runs older kernels on newer GPUs. Why doesn't this work for Blackwell?**
|
||||
|
||||
Per [NVIDIA Blackwell Compatibility Guide](https://docs.nvidia.com/cuda/blackwell-compatibility-guide/):
|
||||
|
||||
CUDA **can** run older code on newer GPUs via **PTX JIT compilation**:
|
||||
1. PTX (Parallel Thread Execution) is NVIDIA's intermediate representation
|
||||
2. If an app includes PTX code, the driver JIT-compiles it for the target GPU
|
||||
3. This allows sm_80 code to run on sm_121
|
||||
|
||||
**The problem**: PaddleOCR inference models contain only pre-compiled **cubins** (SASS binary), not PTX. Without PTX, there's nothing to JIT-compile.
|
||||
|
||||
We tested PTX JIT (January 2026):
|
||||
```bash
|
||||
# Force PTX JIT compilation
|
||||
docker run --gpus all -e CUDA_FORCE_PTX_JIT=1 paddle-ocr-gpu \
|
||||
python /app/scripts/debug_gpu_detection.py /app/dataset/0/img/page_0001.png
|
||||
|
||||
# Result:
|
||||
# OSError: CUDA error(209), no kernel image is available for execution on the device.
|
||||
```
|
||||
**Confirmed: No PTX exists** in PaddlePaddle binaries. The CUDA kernels are cubins-only (SASS binary), compiled for sm_80/sm_90 without PTX fallback.
|
||||
|
||||
**Note on sm_121**: Per NVIDIA docs, "sm_121 is the same as sm_120 since the only difference is physically integrated CPU+GPU memory of Spark." The issue is general Blackwell (sm_12x) support, not Spark-specific.
|
||||
|
||||
#### FAQ: Does Dynamic Graph Mode Work on Blackwell?
|
||||
|
||||
**Q: Can I bypass inference models and use PaddlePaddle's dynamic graph mode?**
|
||||
|
||||
**No.** We tested dynamic graph mode (January 2026):
|
||||
```bash
|
||||
# Test script runs: paddle.nn.Conv2D + paddle.nn.BatchNorm2D
|
||||
python /app/scripts/test_dynamic_mode.py
|
||||
|
||||
# Result:
|
||||
# Input shape: [1, 3, 224, 224]
|
||||
# Output shape: [1, 64, 112, 112]
|
||||
# Output min: 0.0000
|
||||
# Output max: 0.0000 # <-- All zeros!
|
||||
# Output mean: 0.0000
|
||||
# Dynamic graph mode: BROKEN (constant output)
|
||||
```
|
||||
|
||||
**Conclusion:** The problem isn't limited to inference models. PaddlePaddle's core CUDA kernels (Conv2D, BatchNorm, etc.) produce garbage on sm_121. The entire framework lacks Blackwell support.
|
||||
|
||||
#### FAQ: Can I Run AMD64 Containers on ARM64 DGX Spark?
|
||||
|
||||
**Q: Can I just run the working x86_64 GPU image via emulation?**
|
||||
|
||||
**Short answer: Yes for CPU, No for GPU.**
|
||||
|
||||
You can run amd64 containers via QEMU emulation:
|
||||
```bash
|
||||
# Install QEMU
|
||||
sudo apt-get install qemu binfmt-support qemu-user-static
|
||||
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
|
||||
|
||||
# Run amd64 container
|
||||
docker run --platform linux/amd64 paddle-ocr-gpu:amd64 ...
|
||||
```
|
||||
|
||||
**But GPU doesn't work:**
|
||||
- QEMU emulates CPU instructions (x86 → ARM)
|
||||
- **QEMU user-mode does NOT support GPU passthrough**
|
||||
- GPU calls from emulated x86 code cannot reach the ARM64 GPU
|
||||
|
||||
So even if the amd64 image works on x86_64:
|
||||
- ❌ No GPU access through QEMU
|
||||
- ❌ CPU emulation is 10-100x slower than native ARM64
|
||||
- ❌ Defeats the purpose entirely
|
||||
|
||||
| Approach | CPU | GPU | Speed |
|
||||
|----------|-----|-----|-------|
|
||||
| ARM64 native (CPU) | ✅ | N/A | Fast (~2-5s/page) |
|
||||
| ARM64 native (GPU) | ✅ | ❌ Blackwell issue | - |
|
||||
| AMD64 via QEMU | ⚠️ Works | ❌ No passthrough | 10-100x slower |
|
||||
|
||||
### Options for ARM64 Systems
|
||||
|
||||
#### Option 1: CPU-Only (Recommended)
|
||||
|
||||
Use `Dockerfile.cpu` which works on ARM64:
|
||||
|
||||
```bash
|
||||
# On DGX Spark
|
||||
docker compose up ocr-cpu
|
||||
|
||||
# Or build directly
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:cpu .
|
||||
```
|
||||
|
||||
**Performance:** CPU inference on ARM64 Grace is surprisingly fast due to high core count. Expect ~2-5 seconds per page.
|
||||
|
||||
#### Option 2: Build PaddlePaddle from Source (Docker-based)
|
||||
|
||||
Use the included Docker builder to compile PaddlePaddle GPU for ARM64:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Step 1: Build the PaddlePaddle GPU wheel (one-time, 2-4 hours)
|
||||
docker compose --profile build run --rm build-paddle
|
||||
|
||||
# Verify wheel was created
|
||||
ls -la wheels/paddlepaddle*.whl
|
||||
|
||||
# Step 2: Build the GPU image (uses local wheel)
|
||||
docker compose build ocr-gpu
|
||||
|
||||
# Step 3: Run with GPU
|
||||
docker compose up ocr-gpu
|
||||
|
||||
# Verify GPU is working
|
||||
docker compose exec ocr-gpu python -c "import paddle; print(paddle.device.is_compiled_with_cuda())"
|
||||
```
|
||||
|
||||
**What this does:**
|
||||
1. `build-paddle` compiles PaddlePaddle from source inside a CUDA container
|
||||
2. The wheel is saved to `./wheels/` directory
|
||||
3. `Dockerfile.gpu` detects the local wheel and uses it instead of PyPI
|
||||
|
||||
**Caveats:**
|
||||
- Build takes 2-4 hours on first run
|
||||
- Requires ~20GB disk space during build
|
||||
- Not officially supported by PaddlePaddle team
|
||||
- May need adjustments for future PaddlePaddle versions
|
||||
|
||||
See: [GitHub Issue #17327](https://github.com/PaddlePaddle/PaddleOCR/issues/17327)
|
||||
|
||||
#### Option 3: Alternative OCR Engines
|
||||
|
||||
For ARM64 GPU acceleration, consider alternatives:
|
||||
|
||||
| Engine | ARM64 GPU | Notes |
|
||||
|--------|-----------|-------|
|
||||
| **Tesseract** | ❌ CPU-only | Good fallback, widely available |
|
||||
| **EasyOCR** | ⚠️ Via PyTorch | PyTorch has ARM64 GPU support |
|
||||
| **TrOCR** | ⚠️ Via Transformers | Hugging Face Transformers + PyTorch |
|
||||
| **docTR** | ⚠️ Via TensorFlow/PyTorch | Both backends have ARM64 support |
|
||||
|
||||
EasyOCR with PyTorch is a viable alternative:
|
||||
```bash
|
||||
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
|
||||
pip install easyocr
|
||||
```
|
||||
|
||||
### x86_64 GPU Setup (Working)
|
||||
|
||||
For x86_64 systems with NVIDIA GPU, the GPU Docker works:
|
||||
|
||||
```bash
|
||||
# Verify GPU is accessible
|
||||
nvidia-smi
|
||||
|
||||
# Verify Docker GPU access
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
|
||||
|
||||
# Build and run GPU version
|
||||
docker compose up ocr-gpu
|
||||
```
|
||||
|
||||
### GPU Docker Compose Configuration
|
||||
|
||||
The `docker-compose.yml` configures GPU access via:
|
||||
|
||||
```yaml
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
```
|
||||
|
||||
This requires Docker Compose v2 and nvidia-container-toolkit.
|
||||
|
||||
## DGX Spark / ARM64 Quick Start
|
||||
|
||||
For ARM64 systems (DGX Spark, Jetson, Graviton), use CPU-only:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build ARM64-native CPU image
|
||||
docker build -f Dockerfile.cpu -t paddle-ocr-api:arm64 .
|
||||
|
||||
# Run
|
||||
docker run -d -p 8000:8000 \
|
||||
-v $(pwd)/../dataset:/app/dataset:ro \
|
||||
paddle-ocr-api:arm64
|
||||
|
||||
# Test
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
### Cross-Compile from x86_64
|
||||
|
||||
Build ARM64 images from an x86_64 machine:
|
||||
|
||||
```bash
|
||||
# Setup buildx for multi-arch
|
||||
docker buildx create --name mybuilder --use
|
||||
|
||||
# Build ARM64 image from x86_64 machine
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/arm64 \
|
||||
-t paddle-ocr-api:arm64 \
|
||||
--load .
|
||||
|
||||
# Save and transfer to DGX Spark
|
||||
docker save paddle-ocr-api:arm64 | gzip > paddle-ocr-arm64.tar.gz
|
||||
scp paddle-ocr-arm64.tar.gz dgx-spark:~/
|
||||
|
||||
# On DGX Spark:
|
||||
docker load < paddle-ocr-arm64.tar.gz
|
||||
```
|
||||
|
||||
## Using with Ray Tune
|
||||
|
||||
### Multi-Worker Setup for Parallel Trials
|
||||
|
||||
Run multiple workers for parallel hyperparameter tuning:
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Start 2 CPU workers (ports 8001-8002)
|
||||
sudo docker compose -f docker-compose.workers.yml --profile cpu up -d
|
||||
|
||||
# Or for GPU workers (if supported)
|
||||
sudo docker compose -f docker-compose.workers.yml --profile gpu up -d
|
||||
|
||||
# Check workers are healthy
|
||||
curl http://localhost:8001/health
|
||||
curl http://localhost:8002/health
|
||||
```
|
||||
|
||||
Then run the notebook with `max_concurrent_trials=2` to use both workers in parallel.
|
||||
|
||||
### Single Worker Setup
|
||||
|
||||
Update your notebook's `trainable_paddle_ocr` function:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
API_URL = "http://localhost:8000/evaluate"
|
||||
|
||||
def trainable_paddle_ocr(config):
|
||||
"""Call OCR API instead of subprocess."""
|
||||
payload = {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
|
||||
"use_doc_unwarping": config.get("use_doc_unwarping", False),
|
||||
"textline_orientation": config.get("textline_orientation", True),
|
||||
"text_det_thresh": config.get("text_det_thresh", 0.0),
|
||||
"text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
|
||||
"text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
|
||||
"text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(API_URL, json=payload, timeout=600)
|
||||
response.raise_for_status()
|
||||
metrics = response.json()
|
||||
tune.report(metrics=metrics)
|
||||
except Exception as e:
|
||||
tune.report({"CER": 1.0, "WER": 1.0, "ERROR": str(e)[:500]})
|
||||
```
|
||||
|
||||
## Architecture: Model Lifecycle
|
||||
|
||||
The model is loaded **once** at container startup and stays in memory for all requests:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Container["Docker Container Lifecycle"]
|
||||
Start([Container Start]) --> Load[Load PaddleOCR Models<br/>~10-30s one-time cost]
|
||||
Load --> Ready[API Ready<br/>Models in RAM ~500MB]
|
||||
|
||||
subgraph Requests["Incoming Requests - Models Stay Loaded"]
|
||||
Ready --> R1[Request 1] --> Ready
|
||||
Ready --> R2[Request 2] --> Ready
|
||||
Ready --> RN[Request N...] --> Ready
|
||||
end
|
||||
|
||||
Ready --> Stop([Container Stop])
|
||||
Stop --> Free[Models Freed]
|
||||
end
|
||||
|
||||
style Load fill:#f9f,stroke:#333
|
||||
style Ready fill:#9f9,stroke:#333
|
||||
style Requests fill:#e8f4ea,stroke:#090
|
||||
```
|
||||
|
||||
**Subprocess vs REST API comparison:**
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Subprocess["❌ Subprocess Approach"]
|
||||
direction TB
|
||||
S1[Trial 1] --> L1[Load Model ~10s]
|
||||
L1 --> E1[Evaluate ~60s]
|
||||
E1 --> U1[Unload]
|
||||
U1 --> S2[Trial 2]
|
||||
S2 --> L2[Load Model ~10s]
|
||||
L2 --> E2[Evaluate ~60s]
|
||||
end
|
||||
|
||||
subgraph REST["✅ REST API Approach"]
|
||||
direction TB
|
||||
Start2[Start Container] --> Load2[Load Model ~10s]
|
||||
Load2 --> Ready2[Model in Memory]
|
||||
Ready2 --> T1[Trial 1 ~60s]
|
||||
T1 --> Ready2
|
||||
Ready2 --> T2[Trial 2 ~60s]
|
||||
T2 --> Ready2
|
||||
Ready2 --> TN[Trial N ~60s]
|
||||
end
|
||||
|
||||
style L1 fill:#faa
|
||||
style L2 fill:#faa
|
||||
style Load2 fill:#afa
|
||||
style Ready2 fill:#afa
|
||||
```
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
| Approach | Model Load | Per-Trial Overhead | 64 Trials |
|
||||
|----------|------------|-------------------|-----------|
|
||||
| Subprocess (original) | Every trial (~10s) | ~10s | ~7 hours |
|
||||
| Docker per trial | Every trial (~10s) | ~12-15s | ~7.5 hours |
|
||||
| **REST API** | **Once** | **~0.1s** | **~5.8 hours** |
|
||||
|
||||
The REST API saves ~1+ hour by loading the model only once.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Model download slow on first run
|
||||
The first run downloads ~500MB of models. Use volume `paddlex-cache` to persist them.
|
||||
|
||||
### Out of memory
|
||||
Reduce `max_concurrent_trials` in Ray Tune, or increase container memory:
|
||||
```bash
|
||||
docker run --memory=8g ...
|
||||
```
|
||||
|
||||
### GPU not detected
|
||||
Ensure NVIDIA Container Toolkit is installed:
|
||||
```bash
|
||||
nvidia-smi # Should work
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi # Should work
|
||||
```
|
||||
|
||||
### PaddlePaddle GPU installation fails
|
||||
PaddlePaddle 3.x GPU packages are **not available on PyPI**. They must be installed from PaddlePaddle's official index:
|
||||
```bash
|
||||
# For CUDA 12.x
|
||||
pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
|
||||
|
||||
# For CUDA 11.8
|
||||
pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
|
||||
```
|
||||
The Dockerfile.gpu handles this automatically.
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
The project includes a Gitea Actions workflow (`.gitea/workflows/ci.yaml`) for automated builds.
|
||||
|
||||
### What CI Builds
|
||||
|
||||
| Image | Architecture | Source |
|
||||
|-------|--------------|--------|
|
||||
| `paddle-ocr-cpu:amd64` | amd64 | PyPI paddlepaddle |
|
||||
| `paddle-ocr-cpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
|
||||
| `paddle-ocr-gpu:amd64` | amd64 | PyPI paddlepaddle-gpu |
|
||||
| `paddle-ocr-gpu:arm64` | arm64 | Pre-built wheel from Gitea packages |
|
||||
|
||||
### ARM64 Wheel Workflow
|
||||
|
||||
Since PyPI wheels don't work on ARM64 (x86 SSE instructions), wheels must be built from source using sse2neon:
|
||||
|
||||
1. Built manually on an ARM64 machine (one-time)
|
||||
2. Uploaded to Gitea generic packages
|
||||
3. Downloaded by CI when building ARM64 images
|
||||
|
||||
#### Step 1: Build ARM64 Wheels (One-time, on ARM64 machine)
|
||||
|
||||
```bash
|
||||
cd src/paddle_ocr
|
||||
|
||||
# Build GPU wheel (requires NVIDIA GPU, takes 1-2 hours)
|
||||
sudo docker build -t paddle-builder:gpu-arm64 -f Dockerfile.build-paddle .
|
||||
sudo docker run --rm -v ./wheels:/wheels paddle-builder:gpu-arm64
|
||||
|
||||
# Build CPU wheel (no GPU required, takes 1-2 hours)
|
||||
sudo docker build -t paddle-builder:cpu-arm64 -f Dockerfile.build-paddle-cpu .
|
||||
sudo docker run --rm -v ./wheels:/wheels paddle-builder:cpu-arm64
|
||||
|
||||
# Verify wheels were created
|
||||
ls -la wheels/paddlepaddle*.whl
|
||||
# paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl (GPU)
|
||||
# paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl (CPU)
|
||||
```
|
||||
|
||||
#### Step 2: Upload Wheels to Gitea Packages
|
||||
|
||||
```bash
|
||||
export GITEA_TOKEN="your-token-here"
|
||||
|
||||
# Upload GPU wheel
|
||||
curl -X PUT \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
--upload-file wheels/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl \
|
||||
"https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
|
||||
# Upload CPU wheel
|
||||
curl -X PUT \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
--upload-file wheels/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl \
|
||||
"https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
```
|
||||
|
||||
Wheels available at:
|
||||
```
|
||||
https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-gpu-arm64/3.0.0/paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
https://seryus.ddns.net/api/packages/unir/generic/paddlepaddle-cpu-arm64/3.0.0/paddlepaddle-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
```
|
||||
|
||||
#### Step 3: CI Builds Images
|
||||
|
||||
CI automatically:
|
||||
1. Downloads ARM64 wheels from Gitea packages (for arm64 builds only)
|
||||
2. Builds both CPU and GPU images for amd64 and arm64
|
||||
3. Pushes to registry with arch-specific tags
|
||||
|
||||
### Required CI Secrets
|
||||
|
||||
Configure these in Gitea repository settings:
|
||||
|
||||
| Secret | Description |
|
||||
|--------|-------------|
|
||||
| `CI_READWRITE` | Gitea token with registry read/write access |
|
||||
|
||||
### Manual Image Push
|
||||
|
||||
```bash
|
||||
# Login to registry
|
||||
docker login seryus.ddns.net
|
||||
|
||||
# Build and push CPU (multi-arch)
|
||||
docker buildx build -f Dockerfile.cpu \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t seryus.ddns.net/unir/paddle-ocr-api:cpu \
|
||||
--push .
|
||||
|
||||
# Build and push GPU (x86_64)
|
||||
docker build -f Dockerfile.gpu -t seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64 .
|
||||
docker push seryus.ddns.net/unir/paddle-ocr-api:gpu-amd64
|
||||
|
||||
# Build and push GPU (ARM64) - requires wheel in wheels/
|
||||
docker buildx build -f Dockerfile.gpu \
|
||||
--platform linux/arm64 \
|
||||
-t seryus.ddns.net/unir/paddle-ocr-api:gpu-arm64 \
|
||||
--push .
|
||||
```
|
||||
|
||||
### Updating the ARM64 Wheels
|
||||
|
||||
When PaddlePaddle releases a new version:
|
||||
|
||||
1. Update `PADDLE_VERSION` in `Dockerfile.build-paddle` and `Dockerfile.build-paddle-cpu`
|
||||
2. Rebuild both wheels on an ARM64 machine
|
||||
3. Upload to Gitea packages with new version
|
||||
4. Update `PADDLE_VERSION` in `.gitea/workflows/ci.yaml`
|
||||
74
src/paddle_ocr/dataset_manager.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# Imports
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ImageTextDataset:
|
||||
def __init__(self, root):
|
||||
self.samples = []
|
||||
|
||||
for folder in sorted(os.listdir(root)):
|
||||
sub = os.path.join(root, folder)
|
||||
img_dir = os.path.join(sub, "img")
|
||||
txt_dir = os.path.join(sub, "txt")
|
||||
|
||||
if not (os.path.isdir(img_dir) and os.path.isdir(txt_dir)):
|
||||
continue
|
||||
|
||||
for fname in sorted(os.listdir(img_dir)):
|
||||
if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
|
||||
continue
|
||||
|
||||
img_path = os.path.join(img_dir, fname)
|
||||
|
||||
# text file must have same name but .txt
|
||||
txt_name = os.path.splitext(fname)[0] + ".txt"
|
||||
txt_path = os.path.join(txt_dir, txt_name)
|
||||
|
||||
if not os.path.exists(txt_path):
|
||||
continue
|
||||
|
||||
self.samples.append((img_path, txt_path))
|
||||
def __len__(self):
|
||||
return len(self.samples)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img_path, txt_path = self.samples[idx]
|
||||
|
||||
# Load image
|
||||
image = Image.open(img_path).convert("RGB")
|
||||
|
||||
# Load text
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
text = f.read()
|
||||
|
||||
return image, text
|
||||
|
||||
def get_output_path(self, idx, output_subdir, debugset_root="/app/debugset"):
|
||||
"""Get output path for saving OCR result to debugset folder.
|
||||
|
||||
Args:
|
||||
idx: Sample index
|
||||
output_subdir: Subdirectory name (e.g., 'paddle_text', 'doctr_text')
|
||||
debugset_root: Root folder for debug output (default: /app/debugset)
|
||||
|
||||
Returns:
|
||||
Path like /app/debugset/doc1/{output_subdir}/page_001.txt
|
||||
"""
|
||||
img_path, _ = self.samples[idx]
|
||||
# img_path: /app/dataset/doc1/img/page_001.png
|
||||
# Extract relative path: doc1/img/page_001.png
|
||||
parts = img_path.split("/dataset/", 1)
|
||||
if len(parts) == 2:
|
||||
rel_path = parts[1] # doc1/img/page_001.png
|
||||
else:
|
||||
rel_path = os.path.basename(img_path)
|
||||
|
||||
# Replace /img/ with /{output_subdir}/
|
||||
rel_parts = rel_path.rsplit("/img/", 1)
|
||||
doc_folder = rel_parts[0] # doc1
|
||||
fname = os.path.splitext(rel_parts[1])[0] + ".txt" # page_001.txt
|
||||
|
||||
out_dir = os.path.join(debugset_root, doc_folder, output_subdir)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
return os.path.join(out_dir, fname)
|
||||
26
src/paddle_ocr/docker-compose.cpu-registry.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
# docker-compose.cpu-registry.yml - Pull CPU image from registry
|
||||
# Usage: docker compose -f docker-compose.cpu-registry.yml up
|
||||
|
||||
services:
|
||||
ocr-cpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
|
||||
container_name: paddle-ocr-cpu-registry
|
||||
ports:
|
||||
- "8001:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
39
src/paddle_ocr/docker-compose.gpu-registry.yml
Normal file
@@ -0,0 +1,39 @@
|
||||
# docker-compose.gpu-registry.yml - Pull GPU image from registry
|
||||
# Usage: docker compose -f docker-compose.gpu-registry.yml up
|
||||
#
|
||||
# Requires: NVIDIA GPU + nvidia-container-toolkit installed
|
||||
|
||||
services:
|
||||
ocr-gpu:
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
container_name: paddle-ocr-gpu-registry
|
||||
ports:
|
||||
- "8002:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
- ./scripts:/app/scripts:ro
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
- PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
- PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
140
src/paddle_ocr/docker-compose.workers.yml
Normal file
@@ -0,0 +1,140 @@
|
||||
# docker-compose.workers.yml - Multiple PaddleOCR workers for parallel Ray Tune
|
||||
#
|
||||
# Usage:
|
||||
# GPU (4 workers sharing GPU):
|
||||
# docker compose -f docker-compose.workers.yml up
|
||||
#
|
||||
# CPU (4 workers):
|
||||
# docker compose -f docker-compose.workers.yml --profile cpu up
|
||||
#
|
||||
# Scale workers (e.g., 8 workers):
|
||||
# NUM_WORKERS=8 docker compose -f docker-compose.workers.yml up
|
||||
#
|
||||
# Each worker runs on a separate port: 8001, 8002, 8003, 8004, ...
|
||||
|
||||
x-ocr-gpu-common: &ocr-gpu-common
|
||||
image: seryus.ddns.net/unir/paddle-ocr-gpu:latest
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
x-ocr-cpu-common: &ocr-cpu-common
|
||||
image: seryus.ddns.net/unir/paddle-ocr-cpu:latest
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 120s
|
||||
|
||||
services:
|
||||
# GPU Workers (gpu profile) - share single GPU
|
||||
ocr-worker-1:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-1
|
||||
ports:
|
||||
- "8001:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-2:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-2
|
||||
ports:
|
||||
- "8002:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-3:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-3
|
||||
ports:
|
||||
- "8003:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-4:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-4
|
||||
ports:
|
||||
- "8004:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
ocr-worker-5:
|
||||
<<: *ocr-gpu-common
|
||||
container_name: paddle-ocr-worker-5
|
||||
ports:
|
||||
- "8005:8000"
|
||||
profiles:
|
||||
- gpu
|
||||
|
||||
# CPU Workers (cpu profile) - for systems without GPU
|
||||
ocr-cpu-worker-1:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-1
|
||||
ports:
|
||||
- "8001:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-2:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-2
|
||||
ports:
|
||||
- "8002:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-3:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-3
|
||||
ports:
|
||||
- "8003:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-4:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-4
|
||||
ports:
|
||||
- "8004:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
ocr-cpu-worker-5:
|
||||
<<: *ocr-cpu-common
|
||||
container_name: paddle-ocr-cpu-worker-5
|
||||
ports:
|
||||
- "8005:8000"
|
||||
profiles:
|
||||
- cpu
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
111
src/paddle_ocr/docker-compose.yml
Normal file
@@ -0,0 +1,111 @@
|
||||
# docker-compose.yml - PaddleOCR REST API
|
||||
# Usage:
|
||||
# CPU: docker compose up ocr-cpu
|
||||
# GPU: docker compose up ocr-gpu
|
||||
# Test: docker compose run --rm test
|
||||
# Build: CUDA_ARCH=120 docker compose --profile build run --rm build-paddle
|
||||
#
|
||||
# Auto-detect CUDA arch before building:
|
||||
# export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.')
|
||||
# docker compose --profile build run --rm build-paddle
|
||||
|
||||
services:
|
||||
# PaddlePaddle GPU wheel builder (ARM64 only, one-time build)
|
||||
# Creates ./wheels/paddlepaddle_gpu-*.whl for ARM64 GPU support
|
||||
# CUDA_ARCH env var controls target GPU architecture (default: 120 for Blackwell base)
|
||||
build-paddle:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.build-paddle
|
||||
args:
|
||||
CUDA_ARCH: ${CUDA_ARCH:-120}
|
||||
volumes:
|
||||
- ./wheels:/wheels
|
||||
profiles:
|
||||
- build
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
|
||||
# CPU-only service (works on any architecture)
|
||||
ocr-cpu:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.cpu
|
||||
args:
|
||||
# Models to bake into image (change before building):
|
||||
DET_MODEL: PP-OCRv5_server_det
|
||||
REC_MODEL: PP-OCRv5_server_rec
|
||||
image: paddle-ocr-api:cpu
|
||||
container_name: paddle-ocr-cpu
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw # Your dataset
|
||||
- paddlex-cache:/root/.paddlex # For additional models at runtime
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
# Override models at runtime (uncomment to use different models):
|
||||
# - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
# - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# GPU service (requires NVIDIA Container Toolkit)
|
||||
ocr-gpu:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.gpu
|
||||
args:
|
||||
DET_MODEL: PP-OCRv5_server_det
|
||||
REC_MODEL: PP-OCRv5_server_rec
|
||||
image: paddle-ocr-api:gpu
|
||||
container_name: paddle-ocr-gpu
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ../dataset:/app/dataset:ro
|
||||
- ../debugset:/app/debugset:rw
|
||||
- paddlex-cache:/root/.paddlex
|
||||
environment:
|
||||
- PYTHONUNBUFFERED=1
|
||||
- CUDA_VISIBLE_DEVICES=0
|
||||
# Override models at runtime:
|
||||
# - PADDLE_DET_MODEL=PP-OCRv5_mobile_det
|
||||
# - PADDLE_REC_MODEL=PP-OCRv5_mobile_rec
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
# Test client (runs once and exits)
|
||||
test:
|
||||
image: python:3.11-slim
|
||||
container_name: paddle-ocr-test
|
||||
depends_on:
|
||||
ocr-cpu:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- ./test.py:/app/test.py:ro
|
||||
working_dir: /app
|
||||
command: >
|
||||
sh -c "pip install -q requests && python test.py --url http://ocr-cpu:8000 --dataset /app/dataset"
|
||||
network_mode: "service:ocr-cpu"
|
||||
|
||||
volumes:
|
||||
paddlex-cache:
|
||||
name: paddlex-model-cache
|
||||
340
src/paddle_ocr/paddle_ocr_tuning_rest.py
Normal file
@@ -0,0 +1,340 @@
|
||||
# paddle_ocr_tuning_rest.py
|
||||
# FastAPI REST service for PaddleOCR hyperparameter evaluation
|
||||
# Usage: uvicorn paddle_ocr_tuning_rest:app --host 0.0.0.0 --port 8000
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
from typing import Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
from jiwer import wer, cer
|
||||
from dataset_manager import ImageTextDataset
|
||||
|
||||
|
||||
def get_gpu_info() -> dict:
|
||||
"""Get GPU status information from PaddlePaddle."""
|
||||
info = {
|
||||
"cuda_available": paddle.device.is_compiled_with_cuda(),
|
||||
"device": str(paddle.device.get_device()),
|
||||
"gpu_count": 0,
|
||||
"gpu_name": None,
|
||||
"gpu_memory_total": None,
|
||||
"gpu_memory_used": None,
|
||||
}
|
||||
|
||||
if info["cuda_available"]:
|
||||
try:
|
||||
info["gpu_count"] = paddle.device.cuda.device_count()
|
||||
if info["gpu_count"] > 0:
|
||||
# Get GPU properties
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
info["gpu_name"] = props.name
|
||||
info["gpu_memory_total"] = f"{props.total_memory / (1024**3):.2f} GB"
|
||||
|
||||
# Get current memory usage
|
||||
mem_reserved = paddle.device.cuda.memory_reserved(0)
|
||||
mem_allocated = paddle.device.cuda.memory_allocated(0)
|
||||
info["gpu_memory_used"] = f"{mem_allocated / (1024**3):.2f} GB"
|
||||
info["gpu_memory_reserved"] = f"{mem_reserved / (1024**3):.2f} GB"
|
||||
except Exception as e:
|
||||
info["gpu_error"] = str(e)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
# Model configuration via environment variables (with defaults)
|
||||
DEFAULT_DET_MODEL = os.environ.get("PADDLE_DET_MODEL", "PP-OCRv5_server_det")
|
||||
DEFAULT_REC_MODEL = os.environ.get("PADDLE_REC_MODEL", "PP-OCRv5_server_rec")
|
||||
|
||||
|
||||
# Global state for model and dataset
|
||||
class AppState:
|
||||
ocr: Optional[PaddleOCR] = None
|
||||
dataset: Optional[ImageTextDataset] = None
|
||||
dataset_path: Optional[str] = None
|
||||
det_model: str = DEFAULT_DET_MODEL
|
||||
rec_model: str = DEFAULT_REC_MODEL
|
||||
lock: threading.Lock = None # Protects OCR model from concurrent access
|
||||
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
|
||||
|
||||
state = AppState()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Load OCR model at startup."""
|
||||
# Log GPU status
|
||||
gpu_info = get_gpu_info()
|
||||
print("=" * 50)
|
||||
print("GPU STATUS")
|
||||
print("=" * 50)
|
||||
print(f" CUDA available: {gpu_info['cuda_available']}")
|
||||
print(f" Device: {gpu_info['device']}")
|
||||
if gpu_info['cuda_available']:
|
||||
print(f" GPU count: {gpu_info['gpu_count']}")
|
||||
print(f" GPU name: {gpu_info['gpu_name']}")
|
||||
print(f" GPU memory total: {gpu_info['gpu_memory_total']}")
|
||||
print("=" * 50)
|
||||
|
||||
print(f"Loading PaddleOCR models...")
|
||||
print(f" Detection: {state.det_model}")
|
||||
print(f" Recognition: {state.rec_model}")
|
||||
state.ocr = PaddleOCR(
|
||||
text_detection_model_name=state.det_model,
|
||||
text_recognition_model_name=state.rec_model,
|
||||
)
|
||||
|
||||
# Log GPU memory after model load
|
||||
if gpu_info['cuda_available']:
|
||||
gpu_after = get_gpu_info()
|
||||
print(f" GPU memory after load: {gpu_after.get('gpu_memory_used', 'N/A')}")
|
||||
|
||||
print("Model loaded successfully!")
|
||||
yield
|
||||
# Cleanup on shutdown
|
||||
state.ocr = None
|
||||
state.dataset = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="PaddleOCR Tuning API",
|
||||
description="REST API for OCR hyperparameter evaluation",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
class EvaluateRequest(BaseModel):
|
||||
"""Request schema matching CLI arguments."""
|
||||
pdf_folder: str = Field("/app/dataset", description="Path to dataset folder")
|
||||
use_doc_orientation_classify: bool = Field(False, description="Use document orientation classification")
|
||||
use_doc_unwarping: bool = Field(False, description="Use document unwarping")
|
||||
textline_orientation: bool = Field(True, description="Use textline orientation classification")
|
||||
text_det_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection pixel threshold")
|
||||
text_det_box_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Detection box threshold")
|
||||
text_det_unclip_ratio: float = Field(1.5, ge=0.0, description="Text detection expansion coefficient")
|
||||
text_rec_score_thresh: float = Field(0.0, ge=0.0, le=1.0, description="Recognition score threshold")
|
||||
start_page: int = Field(5, ge=0, description="Start page index (inclusive)")
|
||||
end_page: int = Field(10, ge=1, description="End page index (exclusive)")
|
||||
save_output: bool = Field(False, description="Save OCR predictions to debugset folder")
|
||||
|
||||
|
||||
class EvaluateResponse(BaseModel):
|
||||
"""Response schema matching CLI output."""
|
||||
CER: float
|
||||
WER: float
|
||||
TIME: float
|
||||
PAGES: int
|
||||
TIME_PER_PAGE: float
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
model_loaded: bool
|
||||
dataset_loaded: bool
|
||||
dataset_size: Optional[int] = None
|
||||
det_model: Optional[str] = None
|
||||
rec_model: Optional[str] = None
|
||||
# GPU info
|
||||
cuda_available: Optional[bool] = None
|
||||
device: Optional[str] = None
|
||||
gpu_name: Optional[str] = None
|
||||
gpu_memory_used: Optional[str] = None
|
||||
gpu_memory_total: Optional[str] = None
|
||||
|
||||
|
||||
def _normalize_box_xyxy(box):
|
||||
"""Normalize bounding box to (x0, y0, x1, y1) format."""
|
||||
if isinstance(box, (list, tuple)) and box and isinstance(box[0], (list, tuple)):
|
||||
xs = [p[0] for p in box]
|
||||
ys = [p[1] for p in box]
|
||||
return min(xs), min(ys), max(xs), max(ys)
|
||||
|
||||
if isinstance(box, (list, tuple)):
|
||||
if len(box) == 4:
|
||||
x0, y0, x1, y1 = box
|
||||
return min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1)
|
||||
if len(box) == 8:
|
||||
xs = box[0::2]
|
||||
ys = box[1::2]
|
||||
return min(xs), min(ys), max(xs), max(ys)
|
||||
|
||||
raise ValueError(f"Unrecognized box format: {box!r}")
|
||||
|
||||
|
||||
def assemble_from_paddle_result(paddleocr_predict, min_score=0.0, line_tol_factor=0.6):
|
||||
"""
|
||||
Robust line grouping for PaddleOCR outputs.
|
||||
Normalizes boxes, groups by line, and returns assembled text.
|
||||
"""
|
||||
boxes_all = []
|
||||
for item in paddleocr_predict:
|
||||
res = item.json.get("res", {})
|
||||
boxes = res.get("rec_boxes", []) or []
|
||||
texts = res.get("rec_texts", []) or []
|
||||
scores = res.get("rec_scores", None)
|
||||
|
||||
for i, (box, text) in enumerate(zip(boxes, texts)):
|
||||
try:
|
||||
x0, y0, x1, y1 = _normalize_box_xyxy(box)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
y_mid = 0.5 * (y0 + y1)
|
||||
score = float(scores[i]) if (scores is not None and i < len(scores)) else 1.0
|
||||
|
||||
t = re.sub(r"\s+", " ", str(text)).strip()
|
||||
if not t:
|
||||
continue
|
||||
|
||||
boxes_all.append((x0, y0, x1, y1, y_mid, t, score))
|
||||
|
||||
if min_score > 0:
|
||||
boxes_all = [b for b in boxes_all if b[6] >= min_score]
|
||||
|
||||
if not boxes_all:
|
||||
return ""
|
||||
|
||||
# Adaptive line tolerance
|
||||
heights = [b[3] - b[1] for b in boxes_all]
|
||||
median_h = float(np.median(heights)) if heights else 20.0
|
||||
line_tol = max(8.0, line_tol_factor * median_h)
|
||||
|
||||
# Sort by vertical mid, then x0
|
||||
boxes_all.sort(key=lambda b: (b[4], b[0]))
|
||||
|
||||
# Group into lines
|
||||
lines, cur, last_y = [], [], None
|
||||
for x0, y0, x1, y1, y_mid, text, score in boxes_all:
|
||||
if last_y is None or abs(y_mid - last_y) <= line_tol:
|
||||
cur.append((x0, text))
|
||||
else:
|
||||
cur.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur))
|
||||
cur = [(x0, text)]
|
||||
last_y = y_mid
|
||||
|
||||
if cur:
|
||||
cur.sort(key=lambda t: t[0])
|
||||
lines.append(" ".join(t[1] for t in cur))
|
||||
|
||||
res = "\n".join(lines)
|
||||
res = re.sub(r"\s+\n", "\n", res).strip()
|
||||
return res
|
||||
|
||||
|
||||
def evaluate_text(reference: str, prediction: str) -> dict:
|
||||
"""Calculate WER and CER metrics."""
|
||||
return {"WER": wer(reference, prediction), "CER": cer(reference, prediction)}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
def health_check():
|
||||
"""Check if the service is ready."""
|
||||
gpu_info = get_gpu_info()
|
||||
return HealthResponse(
|
||||
status="ok" if state.ocr is not None else "initializing",
|
||||
model_loaded=state.ocr is not None,
|
||||
dataset_loaded=state.dataset is not None,
|
||||
dataset_size=len(state.dataset) if state.dataset else None,
|
||||
det_model=state.det_model,
|
||||
rec_model=state.rec_model,
|
||||
cuda_available=gpu_info.get("cuda_available"),
|
||||
device=gpu_info.get("device"),
|
||||
gpu_name=gpu_info.get("gpu_name"),
|
||||
gpu_memory_used=gpu_info.get("gpu_memory_used"),
|
||||
gpu_memory_total=gpu_info.get("gpu_memory_total"),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate", response_model=EvaluateResponse)
|
||||
def evaluate(request: EvaluateRequest):
|
||||
"""
|
||||
Evaluate OCR with given hyperparameters.
|
||||
Returns CER, WER, and timing metrics.
|
||||
"""
|
||||
if state.ocr is None:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded yet")
|
||||
|
||||
# Load or reload dataset if path changed
|
||||
if state.dataset is None or state.dataset_path != request.pdf_folder:
|
||||
if not os.path.isdir(request.pdf_folder):
|
||||
raise HTTPException(status_code=400, detail=f"Dataset folder not found: {request.pdf_folder}")
|
||||
state.dataset = ImageTextDataset(request.pdf_folder)
|
||||
state.dataset_path = request.pdf_folder
|
||||
|
||||
if len(state.dataset) == 0:
|
||||
raise HTTPException(status_code=400, detail="Dataset is empty")
|
||||
|
||||
# Validate page range
|
||||
start = request.start_page
|
||||
end = min(request.end_page, len(state.dataset))
|
||||
if start >= end:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid page range: {start}-{end}")
|
||||
|
||||
cer_list, wer_list = [], []
|
||||
time_per_page_list = []
|
||||
t0 = time.time()
|
||||
|
||||
# Lock to prevent concurrent OCR access (model is not thread-safe)
|
||||
with state.lock:
|
||||
for idx in range(start, end):
|
||||
img, ref = state.dataset[idx]
|
||||
arr = np.array(img)
|
||||
|
||||
tp0 = time.time()
|
||||
out = state.ocr.predict(
|
||||
arr,
|
||||
use_doc_orientation_classify=request.use_doc_orientation_classify,
|
||||
use_doc_unwarping=request.use_doc_unwarping,
|
||||
use_textline_orientation=request.textline_orientation,
|
||||
text_det_thresh=request.text_det_thresh,
|
||||
text_det_box_thresh=request.text_det_box_thresh,
|
||||
text_det_unclip_ratio=request.text_det_unclip_ratio,
|
||||
text_rec_score_thresh=request.text_rec_score_thresh,
|
||||
)
|
||||
|
||||
pred = assemble_from_paddle_result(out)
|
||||
time_per_page_list.append(float(time.time() - tp0))
|
||||
|
||||
# Save prediction to debugset if requested
|
||||
if request.save_output:
|
||||
out_path = state.dataset.get_output_path(idx, "paddle_text")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
f.write(pred)
|
||||
|
||||
m = evaluate_text(ref, pred)
|
||||
cer_list.append(m["CER"])
|
||||
wer_list.append(m["WER"])
|
||||
|
||||
return EvaluateResponse(
|
||||
CER=float(np.mean(cer_list)) if cer_list else 1.0,
|
||||
WER=float(np.mean(wer_list)) if wer_list else 1.0,
|
||||
TIME=float(time.time() - t0),
|
||||
PAGES=len(cer_list),
|
||||
TIME_PER_PAGE=float(np.mean(time_per_page_list)) if time_per_page_list else 0.0,
|
||||
)
|
||||
|
||||
|
||||
@app.post("/evaluate_full", response_model=EvaluateResponse)
|
||||
def evaluate_full(request: EvaluateRequest):
|
||||
"""Evaluate on ALL pages (ignores start_page/end_page)."""
|
||||
request.start_page = 0
|
||||
request.end_page = 9999 # Will be clamped to dataset size
|
||||
return evaluate(request)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
22
src/paddle_ocr/requirements-gpu.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
# PaddleOCR REST API - GPU Requirements
|
||||
# Install: pip install -r requirements-gpu.txt
|
||||
|
||||
# PaddlePaddle (GPU version with CUDA)
|
||||
paddlepaddle-gpu==3.2.0
|
||||
|
||||
# PaddleOCR
|
||||
paddleocr==3.3.2
|
||||
|
||||
# OCR evaluation metrics
|
||||
jiwer
|
||||
|
||||
# Numerical computing
|
||||
numpy
|
||||
|
||||
# REST API framework
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic
|
||||
|
||||
# Image processing
|
||||
Pillow
|
||||
22
src/paddle_ocr/requirements.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
# PaddleOCR REST API - CPU Requirements
|
||||
# Install: pip install -r requirements.txt
|
||||
|
||||
# PaddlePaddle (CPU version)
|
||||
paddlepaddle==3.2.2
|
||||
|
||||
# PaddleOCR
|
||||
paddleocr==3.3.2
|
||||
|
||||
# OCR evaluation metrics
|
||||
jiwer
|
||||
|
||||
# Numerical computing
|
||||
numpy
|
||||
|
||||
# REST API framework
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic
|
||||
|
||||
# Image processing (pulled by paddleocr, but explicit)
|
||||
Pillow
|
||||
199
src/paddle_ocr/scripts/debug_gpu_detection.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script for GPU OCR detection issues.
|
||||
|
||||
This script tests the raw inference output from PaddlePaddle detection models
|
||||
to diagnose why detection might fail on certain GPU architectures (e.g., Blackwell/sm_121).
|
||||
|
||||
Usage:
|
||||
docker exec paddle-ocr-gpu python /app/debug_gpu_detection.py [image_path]
|
||||
|
||||
Expected behavior:
|
||||
- Working GPU: Output stats should show min close to 0, max close to 1, mean ~0.1-0.5
|
||||
- Broken GPU: Output stats show constant values (e.g., min=max=mean=0.00001)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def check_gpu_status():
|
||||
"""Check GPU availability and properties."""
|
||||
print("=" * 60)
|
||||
print("GPU STATUS")
|
||||
print("=" * 60)
|
||||
print(f"Device: {paddle.device.get_device()}")
|
||||
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||||
|
||||
if paddle.device.is_compiled_with_cuda():
|
||||
print(f"GPU count: {paddle.device.cuda.device_count()}")
|
||||
if paddle.device.cuda.device_count() > 0:
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
print(f"GPU name: {props.name}")
|
||||
print(f"Compute capability: {props.major}.{props.minor}")
|
||||
print(f"Total memory: {props.total_memory / (1024**3):.2f} GB")
|
||||
print()
|
||||
|
||||
|
||||
def test_basic_ops():
|
||||
"""Test basic GPU tensor operations."""
|
||||
print("=" * 60)
|
||||
print("BASIC GPU OPERATIONS")
|
||||
print("=" * 60)
|
||||
|
||||
# Test tensor creation
|
||||
x = paddle.randn([2, 3])
|
||||
print(f"Tensor place: {x.place}")
|
||||
|
||||
# Test conv2d
|
||||
x = paddle.randn([1, 3, 64, 64])
|
||||
conv = paddle.nn.Conv2D(3, 16, 3, padding=1)
|
||||
y = conv(x)
|
||||
print(f"Conv2d output shape: {y.shape}, place: {y.place}")
|
||||
|
||||
# Test softmax
|
||||
s = paddle.nn.functional.softmax(y, axis=1)
|
||||
print(f"Softmax output shape: {s.shape}")
|
||||
print("Basic operations: OK")
|
||||
print()
|
||||
|
||||
|
||||
def test_detection_model(image_path: str):
|
||||
"""Test detection model raw output."""
|
||||
print("=" * 60)
|
||||
print("DETECTION MODEL TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddle.inference import Config, create_predictor
|
||||
|
||||
model_dir = '/root/.paddlex/official_models/PP-OCRv4_mobile_det'
|
||||
inference_file = f'{model_dir}/inference.json'
|
||||
params_file = f'{model_dir}/inference.pdiparams'
|
||||
|
||||
if not os.path.exists(inference_file):
|
||||
print(f"Model not found at {model_dir}")
|
||||
print("Run PaddleOCR once to download models first.")
|
||||
return
|
||||
|
||||
# Create config
|
||||
config = Config()
|
||||
config.set_prog_file(inference_file)
|
||||
config.set_params_file(params_file)
|
||||
config.enable_use_gpu(1024, 0)
|
||||
|
||||
print("Creating predictor...")
|
||||
predictor = create_predictor(config)
|
||||
|
||||
# Get input/output names
|
||||
input_names = predictor.get_input_names()
|
||||
output_names = predictor.get_output_names()
|
||||
print(f"Input names: {input_names}")
|
||||
print(f"Output names: {output_names}")
|
||||
|
||||
# Load and preprocess image
|
||||
img = Image.open(image_path)
|
||||
img = img.resize((640, 640))
|
||||
arr = np.array(img).astype('float32')
|
||||
arr = arr / 255.0
|
||||
arr = arr.transpose(2, 0, 1)[np.newaxis, ...] # NCHW
|
||||
print(f"Input tensor shape: {arr.shape}")
|
||||
|
||||
# Set input
|
||||
input_handle = predictor.get_input_handle(input_names[0])
|
||||
input_handle.reshape(arr.shape)
|
||||
input_handle.copy_from_cpu(arr)
|
||||
|
||||
# Run prediction
|
||||
print("Running inference...")
|
||||
predictor.run()
|
||||
|
||||
# Get output
|
||||
output_handle = predictor.get_output_handle(output_names[0])
|
||||
output = output_handle.copy_to_cpu()
|
||||
|
||||
print()
|
||||
print("OUTPUT ANALYSIS:")
|
||||
print(f" Shape: {output.shape}")
|
||||
print(f" Min: {output.min():.6f}")
|
||||
print(f" Max: {output.max():.6f}")
|
||||
print(f" Mean: {output.mean():.6f}")
|
||||
print(f" Std: {output.std():.6f}")
|
||||
print(f" Has NaN: {np.isnan(output).any()}")
|
||||
print(f" Has Inf: {np.isinf(output).any()}")
|
||||
|
||||
# Diagnosis
|
||||
print()
|
||||
print("DIAGNOSIS:")
|
||||
if output.min() == output.max():
|
||||
print(" PROBLEM: Output is constant - model inference is broken!")
|
||||
print(" This typically indicates GPU compute capability mismatch.")
|
||||
print(" GB10 (sm_121) may need CUDA 13.0+ for native support.")
|
||||
elif output.max() < 0.01:
|
||||
print(" PROBLEM: Output values too low - detection will find nothing.")
|
||||
elif np.isnan(output).any() or np.isinf(output).any():
|
||||
print(" PROBLEM: Output contains NaN/Inf - numerical instability.")
|
||||
else:
|
||||
print(" OK: Output values look reasonable.")
|
||||
print(f" Detection threshold typically 0.3-0.6, max output is {output.max():.3f}")
|
||||
|
||||
|
||||
def test_paddleocr_output(image_path: str):
|
||||
"""Test full PaddleOCR pipeline."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PADDLEOCR PIPELINE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name='PP-OCRv4_mobile_det',
|
||||
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||||
)
|
||||
|
||||
img = Image.open(image_path)
|
||||
arr = np.array(img)
|
||||
|
||||
out = ocr.predict(arr)
|
||||
res = out[0].json['res']
|
||||
|
||||
dt_polys = res.get('dt_polys', [])
|
||||
rec_texts = res.get('rec_texts', [])
|
||||
|
||||
print(f"Detection polygons: {len(dt_polys)}")
|
||||
print(f"Recognition texts: {len(rec_texts)}")
|
||||
|
||||
if rec_texts:
|
||||
print(f"Sample texts: {rec_texts[:5]}")
|
||||
else:
|
||||
print("No text detected!")
|
||||
|
||||
|
||||
def main():
|
||||
# Default test image
|
||||
image_path = '/app/dataset/0/img/page_0001.png'
|
||||
if len(sys.argv) > 1:
|
||||
image_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Image not found: {image_path}")
|
||||
print("Usage: python debug_gpu_detection.py [image_path]")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Testing with image: {image_path}")
|
||||
print()
|
||||
|
||||
check_gpu_status()
|
||||
test_basic_ops()
|
||||
test_detection_model(image_path)
|
||||
test_paddleocr_output(image_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
207
src/paddle_ocr/scripts/test_dynamic_mode.py
Normal file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test PaddleOCR in dynamic graph mode (not inference mode).
|
||||
|
||||
Dynamic mode compiles kernels at runtime, which may work on Blackwell.
|
||||
Inference mode uses pre-compiled kernels which fail on sm_121.
|
||||
|
||||
Usage:
|
||||
python test_dynamic_mode.py [image_path]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
os.environ['DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
||||
# Force dynamic graph mode
|
||||
os.environ['FLAGS_enable_pir_api'] = '0'
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def check_gpu():
|
||||
"""Check GPU status."""
|
||||
print("=" * 60)
|
||||
print("GPU STATUS")
|
||||
print("=" * 60)
|
||||
print(f"Device: {paddle.device.get_device()}")
|
||||
print(f"CUDA compiled: {paddle.device.is_compiled_with_cuda()}")
|
||||
|
||||
if paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0:
|
||||
props = paddle.device.cuda.get_device_properties(0)
|
||||
print(f"GPU: {props.name} (sm_{props.major}{props.minor})")
|
||||
print(f"Memory: {props.total_memory / (1024**3):.1f} GB")
|
||||
print()
|
||||
|
||||
|
||||
def test_paddleocr_dynamic(image_path: str):
|
||||
"""Test PaddleOCR with dynamic execution."""
|
||||
print("=" * 60)
|
||||
print("PADDLEOCR DYNAMIC MODE TEST")
|
||||
print("=" * 60)
|
||||
|
||||
# Import PaddleOCR
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
# Try to force dynamic mode by setting use_static=False if available
|
||||
# or by using the model in eval mode directly
|
||||
|
||||
print("Creating PaddleOCR instance...")
|
||||
print("(This may download models on first run)")
|
||||
|
||||
try:
|
||||
# Create OCR instance - this might still use inference internally
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name='PP-OCRv4_mobile_det',
|
||||
text_recognition_model_name='PP-OCRv4_mobile_rec',
|
||||
use_angle_cls=False, # Simplify
|
||||
lang='es',
|
||||
)
|
||||
|
||||
# Load image
|
||||
img = Image.open(image_path)
|
||||
arr = np.array(img)
|
||||
print(f"Image shape: {arr.shape}")
|
||||
|
||||
# Run prediction
|
||||
print("Running OCR prediction...")
|
||||
result = ocr.predict(arr)
|
||||
|
||||
# Parse results
|
||||
res = result[0].json['res']
|
||||
dt_polys = res.get('dt_polys', [])
|
||||
rec_texts = res.get('rec_texts', [])
|
||||
|
||||
print()
|
||||
print("RESULTS:")
|
||||
print(f" Detected boxes: {len(dt_polys)}")
|
||||
print(f" Recognized texts: {len(rec_texts)}")
|
||||
|
||||
if rec_texts:
|
||||
print(f" First 5 texts: {rec_texts[:5]}")
|
||||
return True
|
||||
else:
|
||||
print(" WARNING: No text recognized!")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_paddle_dynamic_model():
|
||||
"""Test loading a paddle model in dynamic graph mode."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PADDLE DYNAMIC GRAPH TEST")
|
||||
print("=" * 60)
|
||||
|
||||
# Ensure we're in dynamic mode
|
||||
paddle.disable_static()
|
||||
|
||||
# Test a simple model forward pass
|
||||
print("Testing dynamic graph execution...")
|
||||
|
||||
# Create a simple ResNet-like block
|
||||
x = paddle.randn([1, 3, 224, 224])
|
||||
|
||||
# Conv -> BN -> ReLU
|
||||
conv = paddle.nn.Conv2D(3, 64, 7, stride=2, padding=3)
|
||||
bn = paddle.nn.BatchNorm2D(64)
|
||||
|
||||
# Forward pass (dynamic mode - compiles at runtime)
|
||||
y = conv(x)
|
||||
y = bn(y)
|
||||
y = paddle.nn.functional.relu(y)
|
||||
|
||||
print(f"Input shape: {x.shape}")
|
||||
print(f"Output shape: {y.shape}")
|
||||
print(f"Output min: {y.min().item():.4f}")
|
||||
print(f"Output max: {y.max().item():.4f}")
|
||||
print(f"Output mean: {y.mean().item():.4f}")
|
||||
|
||||
if y.min() != y.max():
|
||||
print("Dynamic graph mode: WORKING")
|
||||
return True
|
||||
else:
|
||||
print("Dynamic graph mode: BROKEN (constant output)")
|
||||
return False
|
||||
|
||||
|
||||
def test_ppocr_model_direct():
|
||||
"""Try loading PPOCRv4 model directly in dynamic mode."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("PPOCR MODEL DIRECT LOAD TEST")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Try to import ppocr modules directly
|
||||
# This bypasses the inference predictor
|
||||
from paddleocr.ppocr.modeling.architectures import build_model
|
||||
from paddleocr.ppocr.postprocess import build_post_process
|
||||
from paddleocr.ppocr.utils.save_load import load_model
|
||||
|
||||
print("Direct model import available")
|
||||
|
||||
# Note: This approach requires model config files
|
||||
# which may or may not be bundled with paddleocr
|
||||
|
||||
except ImportError as e:
|
||||
print(f"Direct model import not available: {e}")
|
||||
print("PaddleOCR may only support inference mode")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
# Default test image
|
||||
image_path = '/app/dataset/0/img/page_0001.png'
|
||||
if len(sys.argv) > 1:
|
||||
image_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
print(f"Image not found: {image_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Testing with image: {image_path}")
|
||||
print()
|
||||
|
||||
check_gpu()
|
||||
|
||||
# Test 1: Basic dynamic graph
|
||||
dynamic_works = test_paddle_dynamic_model()
|
||||
|
||||
if not dynamic_works:
|
||||
print("\nDynamic graph mode is broken - GPU likely unsupported")
|
||||
sys.exit(1)
|
||||
|
||||
# Test 2: Direct model load
|
||||
test_ppocr_model_direct()
|
||||
|
||||
# Test 3: PaddleOCR pipeline
|
||||
ocr_works = test_paddleocr_dynamic(image_path)
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"Dynamic graph mode: {'WORKS' if dynamic_works else 'BROKEN'}")
|
||||
print(f"PaddleOCR pipeline: {'WORKS' if ocr_works else 'BROKEN'}")
|
||||
|
||||
if dynamic_works and not ocr_works:
|
||||
print()
|
||||
print("DIAGNOSIS: Dynamic mode works but PaddleOCR fails.")
|
||||
print("This means PaddleOCR internally uses inference predictor")
|
||||
print("which has pre-compiled kernels without Blackwell support.")
|
||||
print()
|
||||
print("Potential solutions:")
|
||||
print("1. Modify PaddleOCR to use dynamic mode")
|
||||
print("2. Use ONNX export + ONNXRuntime")
|
||||
print("3. Wait for PaddlePaddle Blackwell support")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
69
src/paddle_ocr/scripts/upload-wheel.sh
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
# Upload PaddlePaddle ARM64 wheel to Gitea generic packages
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/upload-wheel.sh [wheel_file] [token]
|
||||
#
|
||||
# Environment variables (alternative to arguments):
|
||||
# GITEA_TOKEN - Gitea API token
|
||||
# WHEEL_FILE - Path to wheel file (default: auto-detect in wheels/)
|
||||
|
||||
set -e
|
||||
|
||||
GITEA_URL="https://seryus.ddns.net"
|
||||
GITEA_ORG="unir"
|
||||
PACKAGE_NAME="paddlepaddle-gpu-arm64"
|
||||
|
||||
# Get wheel file
|
||||
WHEEL_FILE="${1:-${WHEEL_FILE:-$(ls wheels/paddlepaddle*.whl 2>/dev/null | head -1)}}"
|
||||
if [ -z "$WHEEL_FILE" ] || [ ! -f "$WHEEL_FILE" ]; then
|
||||
echo "Error: No wheel file found"
|
||||
echo "Usage: $0 [wheel_file] [token]"
|
||||
echo " or set WHEEL_FILE environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get token
|
||||
TOKEN="${2:-${GITEA_TOKEN}}"
|
||||
if [ -z "$TOKEN" ]; then
|
||||
echo "Error: No token provided"
|
||||
echo "Usage: $0 [wheel_file] [token]"
|
||||
echo " or set GITEA_TOKEN environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract version from wheel filename
|
||||
# Format: paddlepaddle_gpu-3.0.0-cp311-cp311-linux_aarch64.whl
|
||||
FILENAME=$(basename "$WHEEL_FILE")
|
||||
VERSION=$(echo "$FILENAME" | sed -E 's/paddlepaddle[_-]gpu-([0-9.]+)-.*/\1/')
|
||||
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "Error: Could not extract version from filename: $FILENAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Uploading wheel to Gitea packages..."
|
||||
echo " File: $WHEEL_FILE"
|
||||
echo " Package: $PACKAGE_NAME"
|
||||
echo " Version: $VERSION"
|
||||
echo " URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
|
||||
|
||||
# Upload using PUT request
|
||||
HTTP_CODE=$(curl -sS -w "%{http_code}" -o /tmp/upload_response.txt \
|
||||
-X PUT \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@$WHEEL_FILE" \
|
||||
"$GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "Success! Wheel uploaded."
|
||||
echo "Download URL: $GITEA_URL/api/packages/$GITEA_ORG/generic/$PACKAGE_NAME/$VERSION/$FILENAME"
|
||||
elif [ "$HTTP_CODE" = "409" ]; then
|
||||
echo "Package version already exists (HTTP 409)"
|
||||
echo "To update, delete the existing version first in Gitea UI"
|
||||
else
|
||||
echo "Error: Upload failed with HTTP $HTTP_CODE"
|
||||
cat /tmp/upload_response.txt
|
||||
exit 1
|
||||
fi
|
||||
114
src/paddle_ocr/test.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# test.py - Simple client to test PaddleOCR REST API
|
||||
# Usage: python test.py [--url URL] [--dataset PATH]
|
||||
|
||||
import argparse
|
||||
import requests
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def wait_for_health(url: str, timeout: int = 120) -> bool:
|
||||
"""Wait for API to be ready."""
|
||||
health_url = f"{url}/health"
|
||||
start = time.time()
|
||||
|
||||
print(f"Waiting for API at {health_url}...")
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
resp = requests.get(health_url, timeout=5)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
if data.get("model_loaded"):
|
||||
print(f"API ready! Model loaded in {time.time() - start:.1f}s")
|
||||
return True
|
||||
print(f" Model loading... ({time.time() - start:.0f}s)")
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(f" Connecting... ({time.time() - start:.0f}s)")
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
print("Timeout waiting for API")
|
||||
return False
|
||||
|
||||
|
||||
def test_evaluate(url: str, config: dict) -> dict:
|
||||
"""Run evaluation with given config."""
|
||||
eval_url = f"{url}/evaluate"
|
||||
|
||||
print(f"\nTesting config: {config}")
|
||||
start = time.time()
|
||||
|
||||
resp = requests.post(eval_url, json=config, timeout=600)
|
||||
resp.raise_for_status()
|
||||
|
||||
result = resp.json()
|
||||
elapsed = time.time() - start
|
||||
|
||||
print(f"Results (took {elapsed:.1f}s):")
|
||||
print(f" CER: {result['CER']:.4f} ({result['CER']*100:.2f}%)")
|
||||
print(f" WER: {result['WER']:.4f} ({result['WER']*100:.2f}%)")
|
||||
print(f" Pages: {result['PAGES']}")
|
||||
print(f" Time/page: {result['TIME_PER_PAGE']:.2f}s")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test PaddleOCR REST API")
|
||||
parser.add_argument("--url", default="http://localhost:8001", help="API base URL")
|
||||
parser.add_argument("--dataset", default="/app/dataset", help="Dataset path (inside container)")
|
||||
parser.add_argument("--skip-health", action="store_true", help="Skip health check wait")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Wait for API to be ready
|
||||
if not args.skip_health:
|
||||
if not wait_for_health(args.url):
|
||||
sys.exit(1)
|
||||
|
||||
# Test 1: Baseline config (default PaddleOCR)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 1: Baseline Configuration")
|
||||
print("="*50)
|
||||
baseline = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": False, # Baseline: disabled
|
||||
"text_det_thresh": 0.0,
|
||||
"text_det_box_thresh": 0.0,
|
||||
"text_det_unclip_ratio": 1.5,
|
||||
"text_rec_score_thresh": 0.0,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Test 2: Optimized config (from Ray Tune results)
|
||||
print("\n" + "="*50)
|
||||
print("TEST 2: Optimized Configuration")
|
||||
print("="*50)
|
||||
optimized = test_evaluate(args.url, {
|
||||
"pdf_folder": args.dataset,
|
||||
"use_doc_orientation_classify": False,
|
||||
"use_doc_unwarping": False,
|
||||
"textline_orientation": True, # KEY: enabled
|
||||
"text_det_thresh": 0.4690,
|
||||
"text_det_box_thresh": 0.5412,
|
||||
"text_det_unclip_ratio": 0.0,
|
||||
"text_rec_score_thresh": 0.6350,
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
})
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
print("SUMMARY")
|
||||
print("="*50)
|
||||
cer_reduction = (1 - optimized["CER"] / baseline["CER"]) * 100 if baseline["CER"] > 0 else 0
|
||||
print(f"Baseline CER: {baseline['CER']*100:.2f}%")
|
||||
print(f"Optimized CER: {optimized['CER']*100:.2f}%")
|
||||
print(f"Improvement: {cer_reduction:.1f}% reduction in errors")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
src/paddle_ocr/wheels/.gitkeep
Normal file
87
src/paddle_ocr_raytune_rest.ipynb
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "header",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PaddleOCR Hyperparameter Optimization via REST API\n",
|
||||
"\n",
|
||||
"Uses Ray Tune + Optuna to find optimal PaddleOCR parameters.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"cd src/paddle_ocr\n",
|
||||
"docker compose -f docker-compose.workers.yml up # GPU workers on 8001-8002\n",
|
||||
"# or: docker compose -f docker-compose.workers.yml --profile cpu up\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "deps",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Pin Ray version for API stability (tune.report takes dict, not kwargs in 2.x)\n%pip install -q \"ray[tune]==2.53.0\" optuna requests pandas"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "setup",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "from raytune_ocr import (\n check_workers, create_trainable, run_tuner, analyze_results, correlation_analysis,\n paddle_ocr_payload, PADDLE_OCR_SEARCH_SPACE, PADDLE_OCR_CONFIG_KEYS,\n)\n\n# Worker ports (3 workers to avoid OOM)\nPORTS = [8001, 8002, 8003]\n\n# Check workers are running\nhealthy = check_workers(PORTS, \"PaddleOCR\")"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "tune",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Create trainable and run tuning\ntrainable = create_trainable(PORTS, paddle_ocr_payload)\n\nresults = run_tuner(\n trainable=trainable,\n search_space=PADDLE_OCR_SEARCH_SPACE,\n num_samples=128,\n num_workers=len(healthy),\n)"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "analysis",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Analyze results\n",
|
||||
"df = analyze_results(\n",
|
||||
" results,\n",
|
||||
" prefix=\"raytune_paddle\",\n",
|
||||
" config_keys=PADDLE_OCR_CONFIG_KEYS,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "correlation",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Correlation analysis\n",
|
||||
"correlation_analysis(df, PADDLE_OCR_CONFIG_KEYS)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
18
src/raytune/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application files
|
||||
COPY raytune_ocr.py .
|
||||
COPY run_tuning.py .
|
||||
|
||||
# Create results directory
|
||||
RUN mkdir -p /app/results
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
ENTRYPOINT ["python", "run_tuning.py"]
|
||||
131
src/raytune/README.md
Normal file
@@ -0,0 +1,131 @@
|
||||
# Ray Tune OCR Hyperparameter Optimization
|
||||
|
||||
Docker-based hyperparameter tuning for OCR services using Ray Tune with Optuna search.
|
||||
|
||||
## Structure
|
||||
|
||||
```
|
||||
raytune/
|
||||
├── Dockerfile # Python 3.12-slim with Ray Tune + Optuna
|
||||
├── requirements.txt # Dependencies
|
||||
├── raytune_ocr.py # Shared utilities and search spaces
|
||||
├── run_tuning.py # CLI entry point
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
cd src
|
||||
|
||||
# Build the raytune image
|
||||
docker compose -f docker-compose.tuning.paddle.yml build raytune
|
||||
|
||||
# Or pull from registry
|
||||
docker pull seryus.ddns.net/unir/raytune:latest
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### PaddleOCR Tuning
|
||||
|
||||
```bash
|
||||
# Start PaddleOCR service
|
||||
docker compose -f docker-compose.tuning.paddle.yml up -d paddle-ocr-gpu
|
||||
|
||||
# Wait for health check, then run tuning
|
||||
docker compose -f docker-compose.tuning.paddle.yml run raytune --service paddle --samples 64
|
||||
|
||||
# Stop when done
|
||||
docker compose -f docker-compose.tuning.paddle.yml down
|
||||
```
|
||||
|
||||
### DocTR Tuning
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.tuning.doctr.yml up -d doctr-gpu
|
||||
docker compose -f docker-compose.tuning.doctr.yml run raytune --service doctr --samples 64
|
||||
docker compose -f docker-compose.tuning.doctr.yml down
|
||||
```
|
||||
|
||||
### EasyOCR Tuning
|
||||
|
||||
```bash
|
||||
# Note: EasyOCR uses port 8002 (same as PaddleOCR). Cannot run simultaneously.
|
||||
docker compose -f docker-compose.tuning.easyocr.yml up -d easyocr-gpu
|
||||
docker compose -f docker-compose.tuning.easyocr.yml run raytune --service easyocr --samples 64
|
||||
docker compose -f docker-compose.tuning.easyocr.yml down
|
||||
```
|
||||
|
||||
## CLI Options
|
||||
|
||||
```
|
||||
python run_tuning.py --service {paddle,doctr,easyocr} --samples N
|
||||
```
|
||||
|
||||
| Option | Description | Default |
|
||||
|------------|--------------------------------------|---------|
|
||||
| --service | OCR service to tune (required) | - |
|
||||
| --samples | Number of hyperparameter trials | 64 |
|
||||
|
||||
## Search Spaces
|
||||
|
||||
### PaddleOCR
|
||||
- `use_doc_orientation_classify`: [True, False]
|
||||
- `use_doc_unwarping`: [True, False]
|
||||
- `textline_orientation`: [True, False]
|
||||
- `text_det_thresh`: uniform(0.0, 0.7)
|
||||
- `text_det_box_thresh`: uniform(0.0, 0.7)
|
||||
- `text_rec_score_thresh`: uniform(0.0, 0.7)
|
||||
|
||||
### DocTR
|
||||
- `assume_straight_pages`: [True, False]
|
||||
- `straighten_pages`: [True, False]
|
||||
- `preserve_aspect_ratio`: [True, False]
|
||||
- `symmetric_pad`: [True, False]
|
||||
- `disable_page_orientation`: [True, False]
|
||||
- `disable_crop_orientation`: [True, False]
|
||||
- `resolve_lines`: [True, False]
|
||||
- `resolve_blocks`: [True, False]
|
||||
- `paragraph_break`: uniform(0.01, 0.1)
|
||||
|
||||
### EasyOCR
|
||||
- `text_threshold`: uniform(0.3, 0.9)
|
||||
- `low_text`: uniform(0.2, 0.6)
|
||||
- `link_threshold`: uniform(0.2, 0.6)
|
||||
- `slope_ths`: uniform(0.0, 0.3)
|
||||
- `ycenter_ths`: uniform(0.3, 1.0)
|
||||
- `height_ths`: uniform(0.3, 1.0)
|
||||
- `width_ths`: uniform(0.3, 1.0)
|
||||
- `add_margin`: uniform(0.0, 0.3)
|
||||
- `contrast_ths`: uniform(0.05, 0.3)
|
||||
- `adjust_contrast`: uniform(0.3, 0.8)
|
||||
- `decoder`: ["greedy", "beamsearch"]
|
||||
- `beamWidth`: [3, 5, 7, 10]
|
||||
- `min_size`: [5, 10, 15, 20]
|
||||
|
||||
## Output
|
||||
|
||||
Results are saved to `src/results/` as CSV files:
|
||||
- `raytune_paddle_results_YYYYMMDD_HHMMSS.csv`
|
||||
- `raytune_doctr_results_YYYYMMDD_HHMMSS.csv`
|
||||
- `raytune_easyocr_results_YYYYMMDD_HHMMSS.csv`
|
||||
|
||||
Each row contains:
|
||||
- Configuration parameters (prefixed with `config/`)
|
||||
- Metrics: CER, WER, TIME, PAGES, TIME_PER_PAGE
|
||||
- Worker URL used for the trial
|
||||
|
||||
## Network Mode
|
||||
|
||||
The raytune container uses `network_mode: host` to access OCR services on localhost ports:
|
||||
- PaddleOCR: port 8002
|
||||
- DocTR: port 8003
|
||||
- EasyOCR: port 8002 (conflicts with PaddleOCR)
|
||||
|
||||
## Dependencies
|
||||
|
||||
- ray[tune]==2.52.1
|
||||
- optuna==4.7.0
|
||||
- requests>=2.28.0
|
||||
- pandas>=2.0.0
|
||||
371
src/raytune/raytune_ocr.py
Normal file
@@ -0,0 +1,371 @@
|
||||
# raytune_ocr.py
|
||||
# Shared Ray Tune utilities for OCR hyperparameter optimization
|
||||
#
|
||||
# Usage:
|
||||
# from raytune_ocr import check_workers, create_trainable, run_tuner, analyze_results
|
||||
#
|
||||
# Environment variables:
|
||||
# OCR_HOST: Host for OCR services (default: localhost)
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Callable
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.search.optuna import OptunaSearch
|
||||
|
||||
|
||||
def check_workers(
|
||||
ports: List[int],
|
||||
service_name: str = "OCR",
|
||||
timeout: int = 180,
|
||||
interval: int = 5,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Wait for workers to be fully ready (model + dataset loaded) and return healthy URLs.
|
||||
|
||||
Args:
|
||||
ports: List of port numbers to check
|
||||
service_name: Name for error messages
|
||||
timeout: Max seconds to wait for each worker
|
||||
interval: Seconds between retries
|
||||
|
||||
Returns:
|
||||
List of healthy worker URLs
|
||||
|
||||
Raises:
|
||||
RuntimeError if no healthy workers found after timeout
|
||||
"""
|
||||
import time
|
||||
|
||||
host = os.environ.get("OCR_HOST", "localhost")
|
||||
worker_urls = [f"http://{host}:{port}" for port in ports]
|
||||
healthy_workers = []
|
||||
|
||||
for url in worker_urls:
|
||||
print(f"Waiting for {url}...")
|
||||
start = time.time()
|
||||
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
health = requests.get(f"{url}/health", timeout=10).json()
|
||||
model_ok = health.get('model_loaded', False)
|
||||
dataset_ok = health.get('dataset_loaded', False)
|
||||
|
||||
if health.get('status') == 'ok' and model_ok:
|
||||
gpu = health.get('gpu_name', 'CPU')
|
||||
print(f"✓ {url}: ready ({gpu})")
|
||||
healthy_workers.append(url)
|
||||
break
|
||||
|
||||
elapsed = int(time.time() - start)
|
||||
print(f" [{elapsed}s] model={model_ok}")
|
||||
except requests.exceptions.RequestException:
|
||||
elapsed = int(time.time() - start)
|
||||
print(f" [{elapsed}s] not reachable")
|
||||
|
||||
time.sleep(interval)
|
||||
else:
|
||||
print(f"✗ {url}: timeout after {timeout}s")
|
||||
|
||||
if not healthy_workers:
|
||||
raise RuntimeError(
|
||||
f"No healthy {service_name} workers found.\n"
|
||||
f"Checked ports: {ports}"
|
||||
)
|
||||
|
||||
print(f"\n{len(healthy_workers)}/{len(worker_urls)} workers ready\n")
|
||||
return healthy_workers
|
||||
|
||||
|
||||
def create_trainable(ports: List[int], payload_fn: Callable[[Dict], Dict]) -> Callable:
|
||||
"""
|
||||
Factory to create a trainable function for Ray Tune.
|
||||
|
||||
Args:
|
||||
ports: List of worker ports for load balancing
|
||||
payload_fn: Function that takes config dict and returns API payload dict
|
||||
|
||||
Returns:
|
||||
Trainable function for Ray Tune
|
||||
|
||||
Note:
|
||||
Ray Tune 2.x API: tune.report(metrics_dict) - pass dict directly, NOT kwargs.
|
||||
See: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.report.html
|
||||
"""
|
||||
def trainable(config):
|
||||
import os
|
||||
import random
|
||||
import requests
|
||||
from ray.tune import report # Ray 2.x: report(dict), not report(**kwargs)
|
||||
|
||||
host = os.environ.get("OCR_HOST", "localhost")
|
||||
api_url = f"http://{host}:{random.choice(ports)}"
|
||||
payload = payload_fn(config)
|
||||
|
||||
try:
|
||||
response = requests.post(f"{api_url}/evaluate", json=payload, timeout=None)
|
||||
response.raise_for_status()
|
||||
metrics = response.json()
|
||||
metrics["worker"] = api_url
|
||||
report(metrics) # Ray 2.x API: pass dict directly
|
||||
except Exception as e:
|
||||
report({ # Ray 2.x API: pass dict directly
|
||||
"CER": 1.0,
|
||||
"WER": 1.0,
|
||||
"TIME": 0.0,
|
||||
"PAGES": 0,
|
||||
"TIME_PER_PAGE": 0,
|
||||
"worker": api_url,
|
||||
"ERROR": str(e)[:500]
|
||||
})
|
||||
|
||||
return trainable
|
||||
|
||||
|
||||
def run_tuner(
|
||||
trainable: Callable,
|
||||
search_space: Dict[str, Any],
|
||||
num_samples: int = 64,
|
||||
num_workers: int = 1,
|
||||
metric: str = "CER",
|
||||
mode: str = "min",
|
||||
) -> tune.ResultGrid:
|
||||
"""
|
||||
Initialize Ray and run hyperparameter tuning.
|
||||
|
||||
Args:
|
||||
trainable: Trainable function from create_trainable()
|
||||
search_space: Dict of parameter names to tune.* search spaces
|
||||
num_samples: Number of trials to run
|
||||
num_workers: Max concurrent trials
|
||||
metric: Metric to optimize
|
||||
mode: "min" or "max"
|
||||
|
||||
Returns:
|
||||
Ray Tune ResultGrid
|
||||
"""
|
||||
ray.init(
|
||||
ignore_reinit_error=True,
|
||||
include_dashboard=False,
|
||||
configure_logging=False,
|
||||
_metrics_export_port=0, # Disable metrics export to avoid connection warnings
|
||||
)
|
||||
print(f"Ray Tune ready (version: {ray.__version__})")
|
||||
|
||||
tuner = tune.Tuner(
|
||||
trainable,
|
||||
tune_config=tune.TuneConfig(
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=OptunaSearch(),
|
||||
num_samples=num_samples,
|
||||
max_concurrent_trials=num_workers,
|
||||
),
|
||||
param_space=search_space,
|
||||
)
|
||||
|
||||
return tuner.fit()
|
||||
|
||||
|
||||
def analyze_results(
|
||||
results: tune.ResultGrid,
|
||||
output_folder: str = "results",
|
||||
prefix: str = "raytune",
|
||||
config_keys: List[str] = None,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Analyze and save tuning results.
|
||||
|
||||
Args:
|
||||
results: Ray Tune ResultGrid
|
||||
output_folder: Directory to save CSV
|
||||
prefix: Filename prefix
|
||||
config_keys: List of config keys to show in best result (without 'config/' prefix)
|
||||
|
||||
Returns:
|
||||
Results DataFrame
|
||||
"""
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
df = results.get_dataframe()
|
||||
|
||||
# Save to CSV
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{prefix}_results_{timestamp}.csv"
|
||||
filepath = os.path.join(output_folder, filename)
|
||||
df.to_csv(filepath, index=False)
|
||||
print(f"Results saved: {filepath}")
|
||||
|
||||
# Best configuration
|
||||
best = df.loc[df["CER"].idxmin()]
|
||||
print(f"\nBest CER: {best['CER']:.6f}")
|
||||
print(f"Best WER: {best['WER']:.6f}")
|
||||
|
||||
if config_keys:
|
||||
print(f"\nOptimal Configuration:")
|
||||
for key in config_keys:
|
||||
col = f"config/{key}"
|
||||
if col in best:
|
||||
val = best[col]
|
||||
if isinstance(val, float):
|
||||
print(f" {key}: {val:.4f}")
|
||||
else:
|
||||
print(f" {key}: {val}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def correlation_analysis(df: pd.DataFrame, param_keys: List[str]) -> None:
|
||||
"""
|
||||
Print correlation of numeric parameters with CER/WER.
|
||||
|
||||
Args:
|
||||
df: Results DataFrame
|
||||
param_keys: List of config keys (without 'config/' prefix)
|
||||
"""
|
||||
param_cols = [f"config/{k}" for k in param_keys if f"config/{k}" in df.columns]
|
||||
numeric_cols = [c for c in param_cols if df[c].dtype in ['float64', 'int64']]
|
||||
|
||||
if not numeric_cols:
|
||||
print("No numeric parameters for correlation analysis")
|
||||
return
|
||||
|
||||
corr_cer = df[numeric_cols + ["CER"]].corr()["CER"].sort_values(ascending=False)
|
||||
corr_wer = df[numeric_cols + ["WER"]].corr()["WER"].sort_values(ascending=False)
|
||||
|
||||
print("Correlation with CER:")
|
||||
print(corr_cer)
|
||||
print("\nCorrelation with WER:")
|
||||
print(corr_wer)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OCR-specific payload functions
|
||||
# =============================================================================
|
||||
|
||||
def paddle_ocr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for PaddleOCR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
|
||||
"use_doc_unwarping": config.get("use_doc_unwarping", False),
|
||||
"textline_orientation": config.get("textline_orientation", True),
|
||||
"text_det_thresh": config.get("text_det_thresh", 0.0),
|
||||
"text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
|
||||
"text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
|
||||
"text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
def doctr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for DocTR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"assume_straight_pages": config.get("assume_straight_pages", True),
|
||||
"straighten_pages": config.get("straighten_pages", False),
|
||||
"preserve_aspect_ratio": config.get("preserve_aspect_ratio", True),
|
||||
"symmetric_pad": config.get("symmetric_pad", True),
|
||||
"disable_page_orientation": config.get("disable_page_orientation", False),
|
||||
"disable_crop_orientation": config.get("disable_crop_orientation", False),
|
||||
"resolve_lines": config.get("resolve_lines", True),
|
||||
"resolve_blocks": config.get("resolve_blocks", False),
|
||||
"paragraph_break": config.get("paragraph_break", 0.035),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
def easyocr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for EasyOCR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"text_threshold": config.get("text_threshold", 0.7),
|
||||
"low_text": config.get("low_text", 0.4),
|
||||
"link_threshold": config.get("link_threshold", 0.4),
|
||||
"slope_ths": config.get("slope_ths", 0.1),
|
||||
"ycenter_ths": config.get("ycenter_ths", 0.5),
|
||||
"height_ths": config.get("height_ths", 0.5),
|
||||
"width_ths": config.get("width_ths", 0.5),
|
||||
"add_margin": config.get("add_margin", 0.1),
|
||||
"contrast_ths": config.get("contrast_ths", 0.1),
|
||||
"adjust_contrast": config.get("adjust_contrast", 0.5),
|
||||
"decoder": config.get("decoder", "greedy"),
|
||||
"beamWidth": config.get("beamWidth", 5),
|
||||
"min_size": config.get("min_size", 10),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Search spaces
|
||||
# =============================================================================
|
||||
|
||||
PADDLE_OCR_SEARCH_SPACE = {
|
||||
"use_doc_orientation_classify": tune.choice([True, False]),
|
||||
"use_doc_unwarping": tune.choice([True, False]),
|
||||
"textline_orientation": tune.choice([True, False]),
|
||||
"text_det_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_box_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_unclip_ratio": tune.choice([0.0]),
|
||||
"text_rec_score_thresh": tune.uniform(0.0, 0.7),
|
||||
}
|
||||
|
||||
DOCTR_SEARCH_SPACE = {
|
||||
"assume_straight_pages": tune.choice([True, False]),
|
||||
"straighten_pages": tune.choice([True, False]),
|
||||
"preserve_aspect_ratio": tune.choice([True, False]),
|
||||
"symmetric_pad": tune.choice([True, False]),
|
||||
"disable_page_orientation": tune.choice([True, False]),
|
||||
"disable_crop_orientation": tune.choice([True, False]),
|
||||
"resolve_lines": tune.choice([True, False]),
|
||||
"resolve_blocks": tune.choice([True, False]),
|
||||
"paragraph_break": tune.uniform(0.01, 0.1),
|
||||
}
|
||||
|
||||
EASYOCR_SEARCH_SPACE = {
|
||||
"text_threshold": tune.uniform(0.3, 0.9),
|
||||
"low_text": tune.uniform(0.2, 0.6),
|
||||
"link_threshold": tune.uniform(0.2, 0.6),
|
||||
"slope_ths": tune.uniform(0.0, 0.3),
|
||||
"ycenter_ths": tune.uniform(0.3, 1.0),
|
||||
"height_ths": tune.uniform(0.3, 1.0),
|
||||
"width_ths": tune.uniform(0.3, 1.0),
|
||||
"add_margin": tune.uniform(0.0, 0.3),
|
||||
"contrast_ths": tune.uniform(0.05, 0.3),
|
||||
"adjust_contrast": tune.uniform(0.3, 0.8),
|
||||
"decoder": tune.choice(["greedy", "beamsearch"]),
|
||||
"beamWidth": tune.choice([3, 5, 7, 10]),
|
||||
"min_size": tune.choice([5, 10, 15, 20]),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Config keys for results display
|
||||
# =============================================================================
|
||||
|
||||
PADDLE_OCR_CONFIG_KEYS = [
|
||||
"use_doc_orientation_classify", "use_doc_unwarping", "textline_orientation",
|
||||
"text_det_thresh", "text_det_box_thresh", "text_det_unclip_ratio", "text_rec_score_thresh",
|
||||
]
|
||||
|
||||
DOCTR_CONFIG_KEYS = [
|
||||
"assume_straight_pages", "straighten_pages", "preserve_aspect_ratio", "symmetric_pad",
|
||||
"disable_page_orientation", "disable_crop_orientation", "resolve_lines", "resolve_blocks",
|
||||
"paragraph_break",
|
||||
]
|
||||
|
||||
EASYOCR_CONFIG_KEYS = [
|
||||
"text_threshold", "low_text", "link_threshold", "slope_ths", "ycenter_ths",
|
||||
"height_ths", "width_ths", "add_margin", "contrast_ths", "adjust_contrast",
|
||||
"decoder", "beamWidth", "min_size",
|
||||
]
|
||||
4
src/raytune/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
ray[tune]==2.52.1
|
||||
optuna==4.7.0
|
||||
requests>=2.28.0
|
||||
pandas>=2.0.0
|
||||
80
src/raytune/run_tuning.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run hyperparameter tuning for OCR services."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from raytune_ocr import (
|
||||
check_workers, create_trainable, run_tuner, analyze_results,
|
||||
paddle_ocr_payload, doctr_payload, easyocr_payload,
|
||||
PADDLE_OCR_SEARCH_SPACE, DOCTR_SEARCH_SPACE, EASYOCR_SEARCH_SPACE,
|
||||
PADDLE_OCR_CONFIG_KEYS, DOCTR_CONFIG_KEYS, EASYOCR_CONFIG_KEYS,
|
||||
)
|
||||
|
||||
SERVICES = {
|
||||
"paddle": {
|
||||
"payload_fn": paddle_ocr_payload,
|
||||
"search_space": PADDLE_OCR_SEARCH_SPACE,
|
||||
"config_keys": PADDLE_OCR_CONFIG_KEYS,
|
||||
"name": "PaddleOCR",
|
||||
},
|
||||
"doctr": {
|
||||
"payload_fn": doctr_payload,
|
||||
"search_space": DOCTR_SEARCH_SPACE,
|
||||
"config_keys": DOCTR_CONFIG_KEYS,
|
||||
"name": "DocTR",
|
||||
},
|
||||
"easyocr": {
|
||||
"payload_fn": easyocr_payload,
|
||||
"search_space": EASYOCR_SEARCH_SPACE,
|
||||
"config_keys": EASYOCR_CONFIG_KEYS,
|
||||
"name": "EasyOCR",
|
||||
},
|
||||
}
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run OCR hyperparameter tuning")
|
||||
parser.add_argument("--service", choices=["paddle", "doctr", "easyocr"], required=True)
|
||||
parser.add_argument("--host", type=str, default="localhost", help="OCR service host")
|
||||
parser.add_argument("--port", type=int, default=8000, help="OCR service port")
|
||||
parser.add_argument("--samples", type=int, default=64, help="Number of samples")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set environment variable for raytune_ocr module
|
||||
os.environ["OCR_HOST"] = args.host
|
||||
|
||||
cfg = SERVICES[args.service]
|
||||
ports = [args.port]
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Hyperparameter Tuning: {cfg['name']}")
|
||||
print(f"Host: {args.host}:{args.port}")
|
||||
print(f"Samples: {args.samples}")
|
||||
print(f"{'='*50}\n")
|
||||
|
||||
# Check workers
|
||||
healthy = check_workers(ports, cfg["name"])
|
||||
|
||||
# Create trainable and run tuning
|
||||
trainable = create_trainable(ports, cfg["payload_fn"])
|
||||
results = run_tuner(
|
||||
trainable=trainable,
|
||||
search_space=cfg["search_space"],
|
||||
num_samples=args.samples,
|
||||
num_workers=len(healthy),
|
||||
)
|
||||
|
||||
# Analyze results
|
||||
df = analyze_results(
|
||||
results,
|
||||
output_folder="results",
|
||||
prefix=f"raytune_{args.service}",
|
||||
config_keys=cfg["config_keys"],
|
||||
)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print("Tuning complete!")
|
||||
print(f"{'='*50}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
365
src/raytune_ocr.py
Normal file
@@ -0,0 +1,365 @@
|
||||
# raytune_ocr.py
|
||||
# Shared Ray Tune utilities for OCR hyperparameter optimization
|
||||
#
|
||||
# Usage:
|
||||
# from raytune_ocr import check_workers, create_trainable, run_tuner, analyze_results
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Callable
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.search.optuna import OptunaSearch
|
||||
|
||||
|
||||
def check_workers(
|
||||
ports: List[int],
|
||||
service_name: str = "OCR",
|
||||
timeout: int = 180,
|
||||
interval: int = 5,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Wait for workers to be fully ready (model + dataset loaded) and return healthy URLs.
|
||||
|
||||
Args:
|
||||
ports: List of port numbers to check
|
||||
service_name: Name for error messages
|
||||
timeout: Max seconds to wait for each worker
|
||||
interval: Seconds between retries
|
||||
|
||||
Returns:
|
||||
List of healthy worker URLs
|
||||
|
||||
Raises:
|
||||
RuntimeError if no healthy workers found after timeout
|
||||
"""
|
||||
import time
|
||||
|
||||
worker_urls = [f"http://localhost:{port}" for port in ports]
|
||||
healthy_workers = []
|
||||
|
||||
for url in worker_urls:
|
||||
print(f"Waiting for {url}...")
|
||||
start = time.time()
|
||||
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
health = requests.get(f"{url}/health", timeout=10).json()
|
||||
model_ok = health.get('model_loaded', False)
|
||||
dataset_ok = health.get('dataset_loaded', False)
|
||||
|
||||
if health.get('status') == 'ok' and model_ok:
|
||||
gpu = health.get('gpu_name', 'CPU')
|
||||
print(f"✓ {url}: ready ({gpu})")
|
||||
healthy_workers.append(url)
|
||||
break
|
||||
|
||||
elapsed = int(time.time() - start)
|
||||
print(f" [{elapsed}s] model={model_ok}")
|
||||
except requests.exceptions.RequestException:
|
||||
elapsed = int(time.time() - start)
|
||||
print(f" [{elapsed}s] not reachable")
|
||||
|
||||
time.sleep(interval)
|
||||
else:
|
||||
print(f"✗ {url}: timeout after {timeout}s")
|
||||
|
||||
if not healthy_workers:
|
||||
raise RuntimeError(
|
||||
f"No healthy {service_name} workers found.\n"
|
||||
f"Checked ports: {ports}"
|
||||
)
|
||||
|
||||
print(f"\n{len(healthy_workers)}/{len(worker_urls)} workers ready\n")
|
||||
return healthy_workers
|
||||
|
||||
|
||||
def create_trainable(ports: List[int], payload_fn: Callable[[Dict], Dict]) -> Callable:
|
||||
"""
|
||||
Factory to create a trainable function for Ray Tune.
|
||||
|
||||
Args:
|
||||
ports: List of worker ports for load balancing
|
||||
payload_fn: Function that takes config dict and returns API payload dict
|
||||
|
||||
Returns:
|
||||
Trainable function for Ray Tune
|
||||
|
||||
Note:
|
||||
Ray Tune 2.x API: tune.report(metrics_dict) - pass dict directly, NOT kwargs.
|
||||
See: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.report.html
|
||||
"""
|
||||
def trainable(config):
|
||||
import random
|
||||
import requests
|
||||
from ray.tune import report # Ray 2.x: report(dict), not report(**kwargs)
|
||||
|
||||
api_url = f"http://localhost:{random.choice(ports)}"
|
||||
payload = payload_fn(config)
|
||||
|
||||
try:
|
||||
response = requests.post(f"{api_url}/evaluate", json=payload, timeout=None)
|
||||
response.raise_for_status()
|
||||
metrics = response.json()
|
||||
metrics["worker"] = api_url
|
||||
report(metrics) # Ray 2.x API: pass dict directly
|
||||
except Exception as e:
|
||||
report({ # Ray 2.x API: pass dict directly
|
||||
"CER": 1.0,
|
||||
"WER": 1.0,
|
||||
"TIME": 0.0,
|
||||
"PAGES": 0,
|
||||
"TIME_PER_PAGE": 0,
|
||||
"worker": api_url,
|
||||
"ERROR": str(e)[:500]
|
||||
})
|
||||
|
||||
return trainable
|
||||
|
||||
|
||||
def run_tuner(
|
||||
trainable: Callable,
|
||||
search_space: Dict[str, Any],
|
||||
num_samples: int = 64,
|
||||
num_workers: int = 1,
|
||||
metric: str = "CER",
|
||||
mode: str = "min",
|
||||
) -> tune.ResultGrid:
|
||||
"""
|
||||
Initialize Ray and run hyperparameter tuning.
|
||||
|
||||
Args:
|
||||
trainable: Trainable function from create_trainable()
|
||||
search_space: Dict of parameter names to tune.* search spaces
|
||||
num_samples: Number of trials to run
|
||||
num_workers: Max concurrent trials
|
||||
metric: Metric to optimize
|
||||
mode: "min" or "max"
|
||||
|
||||
Returns:
|
||||
Ray Tune ResultGrid
|
||||
"""
|
||||
ray.init(
|
||||
ignore_reinit_error=True,
|
||||
include_dashboard=False,
|
||||
configure_logging=False,
|
||||
_metrics_export_port=0, # Disable metrics export to avoid connection warnings
|
||||
)
|
||||
print(f"Ray Tune ready (version: {ray.__version__})")
|
||||
|
||||
tuner = tune.Tuner(
|
||||
trainable,
|
||||
tune_config=tune.TuneConfig(
|
||||
metric=metric,
|
||||
mode=mode,
|
||||
search_alg=OptunaSearch(),
|
||||
num_samples=num_samples,
|
||||
max_concurrent_trials=num_workers,
|
||||
),
|
||||
param_space=search_space,
|
||||
)
|
||||
|
||||
return tuner.fit()
|
||||
|
||||
|
||||
def analyze_results(
|
||||
results: tune.ResultGrid,
|
||||
output_folder: str = "results",
|
||||
prefix: str = "raytune",
|
||||
config_keys: List[str] = None,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Analyze and save tuning results.
|
||||
|
||||
Args:
|
||||
results: Ray Tune ResultGrid
|
||||
output_folder: Directory to save CSV
|
||||
prefix: Filename prefix
|
||||
config_keys: List of config keys to show in best result (without 'config/' prefix)
|
||||
|
||||
Returns:
|
||||
Results DataFrame
|
||||
"""
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
df = results.get_dataframe()
|
||||
|
||||
# Save to CSV
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{prefix}_results_{timestamp}.csv"
|
||||
filepath = os.path.join(output_folder, filename)
|
||||
df.to_csv(filepath, index=False)
|
||||
print(f"Results saved: {filepath}")
|
||||
|
||||
# Best configuration
|
||||
best = df.loc[df["CER"].idxmin()]
|
||||
print(f"\nBest CER: {best['CER']:.6f}")
|
||||
print(f"Best WER: {best['WER']:.6f}")
|
||||
|
||||
if config_keys:
|
||||
print(f"\nOptimal Configuration:")
|
||||
for key in config_keys:
|
||||
col = f"config/{key}"
|
||||
if col in best:
|
||||
val = best[col]
|
||||
if isinstance(val, float):
|
||||
print(f" {key}: {val:.4f}")
|
||||
else:
|
||||
print(f" {key}: {val}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def correlation_analysis(df: pd.DataFrame, param_keys: List[str]) -> None:
|
||||
"""
|
||||
Print correlation of numeric parameters with CER/WER.
|
||||
|
||||
Args:
|
||||
df: Results DataFrame
|
||||
param_keys: List of config keys (without 'config/' prefix)
|
||||
"""
|
||||
param_cols = [f"config/{k}" for k in param_keys if f"config/{k}" in df.columns]
|
||||
numeric_cols = [c for c in param_cols if df[c].dtype in ['float64', 'int64']]
|
||||
|
||||
if not numeric_cols:
|
||||
print("No numeric parameters for correlation analysis")
|
||||
return
|
||||
|
||||
corr_cer = df[numeric_cols + ["CER"]].corr()["CER"].sort_values(ascending=False)
|
||||
corr_wer = df[numeric_cols + ["WER"]].corr()["WER"].sort_values(ascending=False)
|
||||
|
||||
print("Correlation with CER:")
|
||||
print(corr_cer)
|
||||
print("\nCorrelation with WER:")
|
||||
print(corr_wer)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OCR-specific payload functions
|
||||
# =============================================================================
|
||||
|
||||
def paddle_ocr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for PaddleOCR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"use_doc_orientation_classify": config.get("use_doc_orientation_classify", False),
|
||||
"use_doc_unwarping": config.get("use_doc_unwarping", False),
|
||||
"textline_orientation": config.get("textline_orientation", True),
|
||||
"text_det_thresh": config.get("text_det_thresh", 0.0),
|
||||
"text_det_box_thresh": config.get("text_det_box_thresh", 0.0),
|
||||
"text_det_unclip_ratio": config.get("text_det_unclip_ratio", 1.5),
|
||||
"text_rec_score_thresh": config.get("text_rec_score_thresh", 0.0),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
def doctr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for DocTR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"assume_straight_pages": config.get("assume_straight_pages", True),
|
||||
"straighten_pages": config.get("straighten_pages", False),
|
||||
"preserve_aspect_ratio": config.get("preserve_aspect_ratio", True),
|
||||
"symmetric_pad": config.get("symmetric_pad", True),
|
||||
"disable_page_orientation": config.get("disable_page_orientation", False),
|
||||
"disable_crop_orientation": config.get("disable_crop_orientation", False),
|
||||
"resolve_lines": config.get("resolve_lines", True),
|
||||
"resolve_blocks": config.get("resolve_blocks", False),
|
||||
"paragraph_break": config.get("paragraph_break", 0.035),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
def easyocr_payload(config: Dict) -> Dict:
|
||||
"""Create payload for EasyOCR API. Uses pages 5-10 (first doc) for tuning."""
|
||||
return {
|
||||
"pdf_folder": "/app/dataset",
|
||||
"text_threshold": config.get("text_threshold", 0.7),
|
||||
"low_text": config.get("low_text", 0.4),
|
||||
"link_threshold": config.get("link_threshold", 0.4),
|
||||
"slope_ths": config.get("slope_ths", 0.1),
|
||||
"ycenter_ths": config.get("ycenter_ths", 0.5),
|
||||
"height_ths": config.get("height_ths", 0.5),
|
||||
"width_ths": config.get("width_ths", 0.5),
|
||||
"add_margin": config.get("add_margin", 0.1),
|
||||
"contrast_ths": config.get("contrast_ths", 0.1),
|
||||
"adjust_contrast": config.get("adjust_contrast", 0.5),
|
||||
"decoder": config.get("decoder", "greedy"),
|
||||
"beamWidth": config.get("beamWidth", 5),
|
||||
"min_size": config.get("min_size", 10),
|
||||
"start_page": 5,
|
||||
"end_page": 10,
|
||||
"save_output": False,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Search spaces
|
||||
# =============================================================================
|
||||
|
||||
PADDLE_OCR_SEARCH_SPACE = {
|
||||
"use_doc_orientation_classify": tune.choice([True, False]),
|
||||
"use_doc_unwarping": tune.choice([True, False]),
|
||||
"textline_orientation": tune.choice([True, False]),
|
||||
"text_det_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_box_thresh": tune.uniform(0.0, 0.7),
|
||||
"text_det_unclip_ratio": tune.choice([0.0]),
|
||||
"text_rec_score_thresh": tune.uniform(0.0, 0.7),
|
||||
}
|
||||
|
||||
DOCTR_SEARCH_SPACE = {
|
||||
"assume_straight_pages": tune.choice([True, False]),
|
||||
"straighten_pages": tune.choice([True, False]),
|
||||
"preserve_aspect_ratio": tune.choice([True, False]),
|
||||
"symmetric_pad": tune.choice([True, False]),
|
||||
"disable_page_orientation": tune.choice([True, False]),
|
||||
"disable_crop_orientation": tune.choice([True, False]),
|
||||
"resolve_lines": tune.choice([True, False]),
|
||||
"resolve_blocks": tune.choice([True, False]),
|
||||
"paragraph_break": tune.uniform(0.01, 0.1),
|
||||
}
|
||||
|
||||
EASYOCR_SEARCH_SPACE = {
|
||||
"text_threshold": tune.uniform(0.3, 0.9),
|
||||
"low_text": tune.uniform(0.2, 0.6),
|
||||
"link_threshold": tune.uniform(0.2, 0.6),
|
||||
"slope_ths": tune.uniform(0.0, 0.3),
|
||||
"ycenter_ths": tune.uniform(0.3, 1.0),
|
||||
"height_ths": tune.uniform(0.3, 1.0),
|
||||
"width_ths": tune.uniform(0.3, 1.0),
|
||||
"add_margin": tune.uniform(0.0, 0.3),
|
||||
"contrast_ths": tune.uniform(0.05, 0.3),
|
||||
"adjust_contrast": tune.uniform(0.3, 0.8),
|
||||
"decoder": tune.choice(["greedy", "beamsearch"]),
|
||||
"beamWidth": tune.choice([3, 5, 7, 10]),
|
||||
"min_size": tune.choice([5, 10, 15, 20]),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Config keys for results display
|
||||
# =============================================================================
|
||||
|
||||
PADDLE_OCR_CONFIG_KEYS = [
|
||||
"use_doc_orientation_classify", "use_doc_unwarping", "textline_orientation",
|
||||
"text_det_thresh", "text_det_box_thresh", "text_det_unclip_ratio", "text_rec_score_thresh",
|
||||
]
|
||||
|
||||
DOCTR_CONFIG_KEYS = [
|
||||
"assume_straight_pages", "straighten_pages", "preserve_aspect_ratio", "symmetric_pad",
|
||||
"disable_page_orientation", "disable_crop_orientation", "resolve_lines", "resolve_blocks",
|
||||
"paragraph_break",
|
||||
]
|
||||
|
||||
EASYOCR_CONFIG_KEYS = [
|
||||
"text_threshold", "low_text", "link_threshold", "slope_ths", "ycenter_ths",
|
||||
"height_ths", "width_ths", "add_margin", "contrast_ths", "adjust_contrast",
|
||||
"decoder", "beamWidth", "min_size",
|
||||
]
|
||||
65
src/results/raytune_doctr_results_20260119_121445.csv
Normal file
@@ -0,0 +1,65 @@
|
||||
CER,WER,TIME,PAGES,TIME_PER_PAGE,model_reinitialized,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/assume_straight_pages,config/straighten_pages,config/preserve_aspect_ratio,config/symmetric_pad,config/disable_page_orientation,config/disable_crop_orientation,config/resolve_lines,config/resolve_blocks,config/paragraph_break,logdir
|
||||
0.07954940376147028,0.3649854752864963,20.5652813911438,5,3.1847062587738035,True,http://localhost:8003,1768820798,,False,1,c4fcad75,2026-01-19_12-06-38,20.57081699371338,20.57081699371338,154230,sergio-XPS-15-9500,192.168.1.5,20.57081699371338,1,False,False,False,True,True,True,True,False,0.09422103316797548,c4fcad75
|
||||
0.7921346697142901,1.0545568452820837,10.610254287719727,5,0.7967870712280274,True,http://localhost:8003,1768820812,,False,1,1c83f678,2026-01-19_12-06-52,10.619899988174438,10.619899988174438,156506,sergio-XPS-15-9500,192.168.1.5,10.619899988174438,1,True,True,False,True,False,True,True,False,0.05901661817569934,1c83f678
|
||||
0.7923201620478004,1.0636912186759604,5.9415740966796875,5,0.7310843944549561,True,http://localhost:8003,1768820822,,False,1,9c50442a,2026-01-19_12-07-02,5.945918560028076,5.945918560028076,158305,sergio-XPS-15-9500,192.168.1.5,5.945918560028076,1,True,True,True,True,False,False,True,False,0.011733102672610147,9c50442a
|
||||
0.7666925478783123,1.0440873928467642,11.626950025558472,5,1.8842015743255616,True,http://localhost:8003,1768820837,,False,1,68ca7089,2026-01-19_12-07-17,11.63136100769043,11.63136100769043,160066,sergio-XPS-15-9500,192.168.1.5,11.63136100769043,1,False,True,True,True,True,False,True,False,0.05057045374185024,68ca7089
|
||||
0.07451994486755961,0.3515575293610934,3.9680063724517822,5,0.3743919849395752,True,http://localhost:8003,1768820844,,False,1,8a8806b7,2026-01-19_12-07-24,3.97230863571167,3.97230863571167,162728,sergio-XPS-15-9500,192.168.1.5,3.97230863571167,1,True,False,True,False,True,False,False,False,0.023697921154561794,8a8806b7
|
||||
0.7657432112619441,1.0344358563738436,11.93731951713562,5,1.942223310470581,True,http://localhost:8003,1768820859,,False,1,f96be72a,2026-01-19_12-07-39,11.941577672958374,11.941577672958374,163962,sergio-XPS-15-9500,192.168.1.5,11.941577672958374,1,False,True,False,True,True,False,False,False,0.08588425427021348,f96be72a
|
||||
0.7918824188958541,1.0538014427522018,5.69057035446167,5,0.7088402271270752,True,http://localhost:8003,1768820868,,False,1,a832050e,2026-01-19_12-07-48,5.69484543800354,5.69484543800354,166633,sergio-XPS-15-9500,192.168.1.5,5.69484543800354,1,True,True,True,False,False,True,False,True,0.048746351477152096,a832050e
|
||||
0.08002835367212643,0.35831740099305937,8.193880081176758,5,1.19890398979187,True,http://localhost:8003,1768820880,,False,1,9719423a,2026-01-19_12-08-00,8.198804140090942,8.198804140090942,168390,sergio-XPS-15-9500,192.168.1.5,8.198804140090942,1,False,False,True,True,True,False,True,True,0.05352825040305834,9719423a
|
||||
0.7921346697142901,1.0545568452820837,5.68590521812439,5,0.7114005088806152,True,http://localhost:8003,1768820889,,False,1,fddb15d7,2026-01-19_12-08-09,5.691095352172852,5.691095352172852,170482,sergio-XPS-15-9500,192.168.1.5,5.691095352172852,1,True,True,False,False,False,True,True,True,0.07145403499389562,fddb15d7
|
||||
0.0743152533045929,0.3522593474791794,3.8518898487091064,5,0.36159796714782716,True,http://localhost:8003,1768820896,,False,1,9a805553,2026-01-19_12-08-16,3.8564491271972656,3.8564491271972656,172258,sergio-XPS-15-9500,192.168.1.5,3.8564491271972656,1,True,False,False,False,True,False,True,False,0.09773705213878954,9a805553
|
||||
0.0743152533045929,0.3522593474791794,2.2390947341918945,5,0.361072301864624,False,http://localhost:8003,1768820902,,False,1,791b8e38,2026-01-19_12-08-22,2.2431814670562744,2.2431814670562744,173474,sergio-XPS-15-9500,192.168.1.5,2.2431814670562744,1,True,False,False,False,True,False,False,True,0.09837572708177385,791b8e38
|
||||
0.0743152533045929,0.3522593474791794,2.245297431945801,5,0.36272416114807127,False,http://localhost:8003,1768820907,,False,1,7c60350c,2026-01-19_12-08-27,2.2497620582580566,2.2497620582580566,174686,sergio-XPS-15-9500,192.168.1.5,2.2497620582580566,1,True,False,False,False,True,False,False,True,0.09836846418124921,7c60350c
|
||||
0.0743152533045929,0.3522593474791794,2.276707172393799,5,0.3691234111785889,False,http://localhost:8003,1768820913,,False,1,aa5f6e40,2026-01-19_12-08-33,2.2811028957366943,2.2811028957366943,175886,sergio-XPS-15-9500,192.168.1.5,2.2811028957366943,1,True,False,False,False,True,False,False,True,0.0782267000165494,aa5f6e40
|
||||
0.0743152533045929,0.3522593474791794,2.436836004257202,5,0.3974581241607666,False,http://localhost:8003,1768820919,,False,1,be96a2fd,2026-01-19_12-08-39,2.4409751892089844,2.4409751892089844,177093,sergio-XPS-15-9500,192.168.1.5,2.4409751892089844,1,True,False,False,False,True,False,False,True,0.0988530443174727,be96a2fd
|
||||
0.0743152533045929,0.3522593474791794,6.658028841018677,5,1.2390151023864746,False,http://localhost:8003,1768820930,,False,1,8fd4d954,2026-01-19_12-08-50,7.324350118637085,7.324350118637085,178357,sergio-XPS-15-9500,192.168.1.5,7.324350118637085,1,True,False,False,False,True,False,True,False,0.07133875696594016,8fd4d954
|
||||
0.0743152533045929,0.3522593474791794,2.495633840560913,5,0.38510971069335936,False,http://localhost:8003,1768820936,,False,1,8684a874,2026-01-19_12-08-56,2.9056968688964844,2.9056968688964844,179613,sergio-XPS-15-9500,192.168.1.5,2.9056968688964844,1,True,False,False,False,True,False,False,True,0.08646785623262251,8684a874
|
||||
0.0743152533045929,0.3522593474791794,2.3146378993988037,5,0.37464404106140137,False,http://localhost:8003,1768820942,,False,1,d70fd3a2,2026-01-19_12-09-02,2.3187525272369385,2.3187525272369385,180827,sergio-XPS-15-9500,192.168.1.5,2.3187525272369385,1,True,False,False,False,True,False,False,True,0.03514348927961332,d70fd3a2
|
||||
0.0743152533045929,0.3522593474791794,2.498570680618286,5,0.37261042594909666,False,http://localhost:8003,1768820947,,False,1,abaaebf8,2026-01-19_12-09-07,2.5029470920562744,2.5029470920562744,182032,sergio-XPS-15-9500,192.168.1.5,2.5029470920562744,1,True,False,False,False,True,False,True,False,0.08582540859307525,abaaebf8
|
||||
0.07954940376147028,0.3649854752864963,9.604491949081421,5,1.1755133152008057,True,http://localhost:8003,1768820961,,False,1,5d48a7dd,2026-01-19_12-09-21,9.60886025428772,9.60886025428772,183260,sergio-XPS-15-9500,192.168.1.5,9.60886025428772,1,False,False,False,False,False,True,True,False,0.06547108266017204,5d48a7dd
|
||||
0.0743152533045929,0.3522593474791794,3.8762130737304688,5,0.3647763729095459,True,http://localhost:8003,1768820968,,False,1,d9253804,2026-01-19_12-09-28,3.880464792251587,3.880464792251587,185371,sergio-XPS-15-9500,192.168.1.5,3.880464792251587,1,True,False,False,False,True,False,False,True,0.038626059833236914,d9253804
|
||||
0.0743152533045929,0.3522593474791794,2.2411227226257324,5,0.36162853240966797,False,http://localhost:8003,1768820974,,False,1,fdb1a71c,2026-01-19_12-09-34,2.245361566543579,2.245361566543579,186592,sergio-XPS-15-9500,192.168.1.5,2.245361566543579,1,True,False,False,False,True,False,True,True,0.07944221200021404,fdb1a71c
|
||||
0.0743152533045929,0.3522593474791794,2.248905897140503,5,0.36235270500183103,False,http://localhost:8003,1768820979,,False,1,52a8f206,2026-01-19_12-09-39,2.2539358139038086,2.2539358139038086,187800,sergio-XPS-15-9500,192.168.1.5,2.2539358139038086,1,True,False,False,False,True,False,False,True,0.09978431631107665,52a8f206
|
||||
0.0743152533045929,0.3522593474791794,2.3572463989257812,5,0.3830925941467285,False,http://localhost:8003,1768820985,,False,1,02249971,2026-01-19_12-09-45,2.3617091178894043,2.3617091178894043,189026,sergio-XPS-15-9500,192.168.1.5,2.3617091178894043,1,True,False,False,False,True,False,False,True,0.09205180265684457,02249971
|
||||
0.0743152533045929,0.3522593474791794,2.3035998344421387,5,0.3717160701751709,False,http://localhost:8003,1768820991,,False,1,c3e1ed25,2026-01-19_12-09-51,2.3079335689544678,2.3079335689544678,190242,sergio-XPS-15-9500,192.168.1.5,2.3079335689544678,1,True,False,False,False,True,False,False,True,0.09144355766189398,c3e1ed25
|
||||
0.0743152533045929,0.3522593474791794,2.239521026611328,5,0.3606713771820068,False,http://localhost:8003,1768820996,,False,1,e77efbfe,2026-01-19_12-09-56,2.243769407272339,2.243769407272339,191450,sergio-XPS-15-9500,192.168.1.5,2.243769407272339,1,True,False,False,False,True,False,False,True,0.07895438944798339,e77efbfe
|
||||
0.0743152533045929,0.3522593474791794,2.289245843887329,5,0.36969733238220215,False,http://localhost:8003,1768821002,,False,1,b63d705d,2026-01-19_12-10-02,2.293459177017212,2.293459177017212,192658,sergio-XPS-15-9500,192.168.1.5,2.293459177017212,1,True,False,False,False,True,False,False,True,0.0979702817184504,b63d705d
|
||||
0.0813627928214657,0.373642333504444,8.17723536491394,5,1.1997929096221924,True,http://localhost:8003,1768821014,,False,1,c2b49d5f,2026-01-19_12-10-14,8.18145489692688,8.18145489692688,193870,sergio-XPS-15-9500,192.168.1.5,8.18145489692688,1,False,False,True,False,False,True,False,False,0.08848271842661322,c2b49d5f
|
||||
0.0743152533045929,0.3522593474791794,4.016183137893677,5,0.3882882595062256,True,http://localhost:8003,1768821021,,False,1,751e8805,2026-01-19_12-10-21,4.020460605621338,4.020460605621338,195986,sergio-XPS-15-9500,192.168.1.5,4.020460605621338,1,True,False,False,False,True,False,False,True,0.07226323202056684,751e8805
|
||||
0.0743152533045929,0.3522593474791794,2.4951744079589844,5,0.4111031532287598,False,http://localhost:8003,1768821027,,False,1,55997272,2026-01-19_12-10-27,2.4997806549072266,2.4997806549072266,197225,sergio-XPS-15-9500,192.168.1.5,2.4997806549072266,1,True,False,False,False,True,False,True,False,0.0821608621907378,55997272
|
||||
0.07954940376147028,0.3649854752864963,7.8914878368377686,5,1.1408625602722169,True,http://localhost:8003,1768821039,,False,1,c72c5c81,2026-01-19_12-10-39,7.895885229110718,7.895885229110718,198438,sergio-XPS-15-9500,192.168.1.5,7.895885229110718,1,False,False,False,True,True,True,True,False,0.09498694151430796,c72c5c81
|
||||
0.0743152533045929,0.3522593474791794,3.8655266761779785,5,0.362445068359375,True,http://localhost:8003,1768821046,,False,1,4a75d77c,2026-01-19_12-10-46,3.869797706604004,3.869797706604004,200555,sergio-XPS-15-9500,192.168.1.5,3.869797706604004,1,True,False,False,False,True,False,False,True,0.09294736151174086,4a75d77c
|
||||
0.0743152533045929,0.3522593474791794,2.3493363857269287,5,0.38149294853210447,False,http://localhost:8003,1768821051,,False,1,c2308a71,2026-01-19_12-10-51,2.353856325149536,2.353856325149536,201775,sergio-XPS-15-9500,192.168.1.5,2.353856325149536,1,True,False,False,False,True,False,False,True,0.07646901408730243,c2308a71
|
||||
0.0743152533045929,0.3522593474791794,2.2967300415039062,5,0.37166876792907716,False,http://localhost:8003,1768821057,,False,1,b39b4bbc,2026-01-19_12-10-57,2.300992012023926,2.300992012023926,202985,sergio-XPS-15-9500,192.168.1.5,2.300992012023926,1,True,False,False,False,True,False,False,True,0.06310895025982477,b39b4bbc
|
||||
0.0743152533045929,0.3522593474791794,2.3306691646575928,5,0.37825717926025393,False,http://localhost:8003,1768821063,,False,1,5c179d0f,2026-01-19_12-11-03,2.3352127075195312,2.3352127075195312,204198,sergio-XPS-15-9500,192.168.1.5,2.3352127075195312,1,True,False,False,False,True,False,False,True,0.09214745705658531,5c179d0f
|
||||
0.7921346697142901,1.0545568452820837,5.84848165512085,5,0.728736686706543,True,http://localhost:8003,1768821072,,False,1,54b75cc8,2026-01-19_12-11-12,5.8533689975738525,5.8533689975738525,205410,sergio-XPS-15-9500,192.168.1.5,5.8533689975738525,1,True,True,False,True,False,False,False,True,0.09992602021030114,54b75cc8
|
||||
0.07451994486755961,0.3515575293610934,3.878021717071533,5,0.3628075122833252,True,http://localhost:8003,1768821080,,False,1,bb5ac038,2026-01-19_12-11-20,3.8829312324523926,3.8829312324523926,207185,sergio-XPS-15-9500,192.168.1.5,3.8829312324523926,1,True,False,True,False,True,False,True,False,0.08279050013235793,bb5ac038
|
||||
0.7921346697142901,1.0545568452820837,5.78171968460083,5,0.7050829410552979,True,http://localhost:8003,1768821089,,False,1,f1c7000c,2026-01-19_12-11-29,5.786619186401367,5.786619186401367,208408,sergio-XPS-15-9500,192.168.1.5,5.786619186401367,1,True,True,False,True,True,True,False,False,0.0882484211859766,f1c7000c
|
||||
0.0813627928214657,0.373642333504444,7.883875608444214,5,1.1401109218597412,True,http://localhost:8003,1768821100,,False,1,5f64114a,2026-01-19_12-11-40,7.887973070144653,7.887973070144653,210166,sergio-XPS-15-9500,192.168.1.5,7.887973070144653,1,False,False,True,False,False,False,True,True,0.09581281484761522,5f64114a
|
||||
0.7921346697142901,1.0545568452820837,5.837187051773071,5,0.7390849590301514,True,http://localhost:8003,1768821109,,False,1,deb231ab,2026-01-19_12-11-49,5.842226028442383,5.842226028442383,212276,sergio-XPS-15-9500,192.168.1.5,5.842226028442383,1,True,True,False,True,True,False,False,False,0.014903696838843121,deb231ab
|
||||
0.07451994486755961,0.3515575293610934,3.8521182537078857,5,0.357759428024292,True,http://localhost:8003,1768821116,,False,1,8e1ad60c,2026-01-19_12-11-56,3.856376886367798,3.856376886367798,214039,sergio-XPS-15-9500,192.168.1.5,3.856376886367798,1,True,False,True,False,True,False,False,True,0.07474982974728585,8e1ad60c
|
||||
0.7657432112619441,1.0344358563738436,11.567106246948242,5,1.8771627426147461,True,http://localhost:8003,1768821131,,False,1,5c7a850a,2026-01-19_12-12-11,11.572225332260132,11.572225332260132,215255,sergio-XPS-15-9500,192.168.1.5,11.572225332260132,1,False,True,False,False,False,True,True,True,0.06667565158056586,5c7a850a
|
||||
0.0743152533045929,0.3522593474791794,3.854253053665161,5,0.36142959594726565,True,http://localhost:8003,1768821139,,False,1,41600dca,2026-01-19_12-12-19,3.858793020248413,3.858793020248413,217924,sergio-XPS-15-9500,192.168.1.5,3.858793020248413,1,True,False,False,False,True,False,False,True,0.09516963566481865,41600dca
|
||||
0.0743152533045929,0.3522593474791794,2.2381088733673096,5,0.3609159469604492,False,http://localhost:8003,1768821144,,False,1,55291f18,2026-01-19_12-12-24,2.242400646209717,2.242400646209717,219141,sergio-XPS-15-9500,192.168.1.5,2.242400646209717,1,True,False,False,False,True,False,False,True,0.09955056101622099,55291f18
|
||||
0.0743152533045929,0.3522593474791794,2.247992515563965,5,0.3638484477996826,False,http://localhost:8003,1768821150,,False,1,e05da7a3,2026-01-19_12-12-30,2.2522785663604736,2.2522785663604736,220353,sergio-XPS-15-9500,192.168.1.5,2.2522785663604736,1,True,False,False,False,True,False,False,True,0.08881643587450277,e05da7a3
|
||||
0.0743152533045929,0.3522593474791794,2.240065336227417,5,0.3607933521270752,False,http://localhost:8003,1768821155,,False,1,6773b6ef,2026-01-19_12-12-35,2.244333267211914,2.244333267211914,221554,sergio-XPS-15-9500,192.168.1.5,2.244333267211914,1,True,False,False,False,True,False,False,True,0.08162246894892994,6773b6ef
|
||||
0.0743152533045929,0.3522593474791794,2.228623628616333,5,0.3605500221252441,False,http://localhost:8003,1768821161,,False,1,88f82273,2026-01-19_12-12-41,2.233116626739502,2.233116626739502,222761,sergio-XPS-15-9500,192.168.1.5,2.233116626739502,1,True,False,False,False,True,False,False,False,0.0576590087821367,88f82273
|
||||
0.0743152533045929,0.3522593474791794,3.8948147296905518,5,0.36910762786865237,True,http://localhost:8003,1768821168,,False,1,122e7c9a,2026-01-19_12-12-48,3.898893356323242,3.898893356323242,223988,sergio-XPS-15-9500,192.168.1.5,3.898893356323242,1,True,False,False,True,True,False,True,True,0.046132117836141115,122e7c9a
|
||||
0.07451994486755961,0.3515575293610934,3.8418056964874268,5,0.3551186084747314,True,http://localhost:8003,1768821175,,False,1,6944e329,2026-01-19_12-12-55,3.846059799194336,3.846059799194336,225216,sergio-XPS-15-9500,192.168.1.5,3.846059799194336,1,True,False,True,False,True,False,False,True,0.08553768973696241,6944e329
|
||||
0.7921346697142901,1.0545568452820837,5.819804906845093,5,0.7136962413787842,True,http://localhost:8003,1768821185,,False,1,65fe9972,2026-01-19_12-13-05,5.825164794921875,5.825164794921875,226432,sergio-XPS-15-9500,192.168.1.5,5.825164794921875,1,True,True,False,False,True,False,False,False,0.09616135139330068,65fe9972
|
||||
0.07954940376147028,0.3649854752864963,7.82697319984436,5,1.1294140815734863,True,http://localhost:8003,1768821196,,False,1,e0bb2fe1,2026-01-19_12-13-16,7.831338882446289,7.831338882446289,228191,sergio-XPS-15-9500,192.168.1.5,7.831338882446289,1,False,False,False,False,True,True,True,True,0.09002271724335277,e0bb2fe1
|
||||
0.0743152533045929,0.3522593474791794,3.8710319995880127,5,0.36251654624938967,True,http://localhost:8003,1768821203,,False,1,13b36f19,2026-01-19_12-13-23,3.875239849090576,3.875239849090576,230300,sergio-XPS-15-9500,192.168.1.5,3.875239849090576,1,True,False,False,False,False,False,False,True,0.0857237854212837,13b36f19
|
||||
0.0743152533045929,0.3522593474791794,3.875215768814087,5,0.36308274269104,True,http://localhost:8003,1768821210,,False,1,9c6b5628,2026-01-19_12-13-30,3.8797342777252197,3.8797342777252197,231521,sergio-XPS-15-9500,192.168.1.5,3.8797342777252197,1,True,False,False,False,True,False,True,False,0.06880465434613751,9c6b5628
|
||||
0.0743152533045929,0.3522593474791794,2.2376744747161865,5,0.36208286285400393,False,http://localhost:8003,1768821216,,False,1,4b6d70bb,2026-01-19_12-13-36,2.242083787918091,2.242083787918091,232738,sergio-XPS-15-9500,192.168.1.5,2.242083787918091,1,True,False,False,False,True,False,True,False,0.06024639917014255,4b6d70bb
|
||||
0.0743152533045929,0.3522593474791794,2.2306642532348633,5,0.359661865234375,False,http://localhost:8003,1768821221,,False,1,ca9acee8,2026-01-19_12-13-41,2.234971046447754,2.234971046447754,233946,sergio-XPS-15-9500,192.168.1.5,2.234971046447754,1,True,False,False,False,True,False,True,False,0.0935005319022256,ca9acee8
|
||||
0.0743152533045929,0.3522593474791794,2.229747772216797,5,0.3594185829162598,False,http://localhost:8003,1768821227,,False,1,75b5c78b,2026-01-19_12-13-47,2.2341864109039307,2.2341864109039307,235172,sergio-XPS-15-9500,192.168.1.5,2.2341864109039307,1,True,False,False,False,True,False,True,False,0.05253849882367517,75b5c78b
|
||||
0.0743152533045929,0.3522593474791794,2.2397162914276123,5,0.3618612289428711,False,http://localhost:8003,1768821233,,False,1,44bf33c9,2026-01-19_12-13-53,2.243781566619873,2.243781566619873,236376,sergio-XPS-15-9500,192.168.1.5,2.243781566619873,1,True,False,False,False,True,False,True,False,0.07878420224854064,44bf33c9
|
||||
0.0743152533045929,0.3522593474791794,2.2368643283843994,5,0.36055378913879393,False,http://localhost:8003,1768821238,,False,1,f435b3b2,2026-01-19_12-13-58,2.240933895111084,2.240933895111084,237583,sergio-XPS-15-9500,192.168.1.5,2.240933895111084,1,True,False,False,False,True,False,False,True,0.07116860558400767,f435b3b2
|
||||
0.0743152533045929,0.3522593474791794,2.265198230743408,5,0.36513686180114746,False,http://localhost:8003,1768821244,,False,1,8217f139,2026-01-19_12-14-04,2.2695438861846924,2.2695438861846924,238784,sergio-XPS-15-9500,192.168.1.5,2.2695438861846924,1,True,False,False,False,True,False,True,False,0.09707599413052871,8217f139
|
||||
0.0743152533045929,0.3522593474791794,2.2422447204589844,5,0.3608452320098877,False,http://localhost:8003,1768821249,,False,1,efe10aca,2026-01-19_12-14-09,2.246490240097046,2.246490240097046,239994,sergio-XPS-15-9500,192.168.1.5,2.246490240097046,1,True,False,False,False,True,False,False,True,0.0391565433402237,efe10aca
|
||||
0.08002835367212643,0.35831740099305937,8.202797412872314,5,1.1076955318450927,True,http://localhost:8003,1768821261,,False,1,3f085082,2026-01-19_12-14-21,8.2071533203125,8.2071533203125,241216,sergio-XPS-15-9500,192.168.1.5,8.2071533203125,1,False,False,True,True,True,True,True,False,0.0835804142411709,3f085082
|
||||
0.0743152533045929,0.3522593474791794,3.885773181915283,5,0.3617554664611816,True,http://localhost:8003,1768821268,,False,1,ca26375b,2026-01-19_12-14-28,3.890075922012329,3.890075922012329,243329,sergio-XPS-15-9500,192.168.1.5,3.890075922012329,1,True,False,False,False,True,False,False,True,0.09060074015212932,ca26375b
|
||||
0.0743152533045929,0.3522593474791794,2.2462470531463623,5,0.3624699592590332,False,http://localhost:8003,1768821274,,False,1,69643aea,2026-01-19_12-14-34,2.2505128383636475,2.2505128383636475,244551,sergio-XPS-15-9500,192.168.1.5,2.2505128383636475,1,True,False,False,False,True,False,False,True,0.07530859871726936,69643aea
|
||||
0.0743152533045929,0.3522593474791794,2.263847827911377,5,0.3658243179321289,False,http://localhost:8003,1768821279,,False,1,4cae77fc,2026-01-19_12-14-39,2.267988681793213,2.267988681793213,245765,sergio-XPS-15-9500,192.168.1.5,2.267988681793213,1,True,False,False,False,True,False,False,True,0.08801626009397175,4cae77fc
|
||||
0.0743152533045929,0.3522593474791794,2.2468783855438232,5,0.3630548000335693,False,http://localhost:8003,1768821285,,False,1,6b987e08,2026-01-19_12-14-45,2.2512388229370117,2.2512388229370117,246985,sergio-XPS-15-9500,192.168.1.5,2.2512388229370117,1,True,False,False,False,True,False,False,True,0.09792932706586027,6b987e08
|
||||
|
65
src/results/raytune_easyocr_results_20260119_120204.csv
Normal file
@@ -0,0 +1,65 @@
|
||||
CER,WER,TIME,PAGES,TIME_PER_PAGE,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/text_threshold,config/low_text,config/link_threshold,config/slope_ths,config/ycenter_ths,config/height_ths,config/width_ths,config/add_margin,config/contrast_ths,config/adjust_contrast,config/decoder,config/beamWidth,config/min_size,logdir
|
||||
0.3871430382852802,0.5182750384528632,19.13978934288025,5,3.7033697605133056,http://localhost:8002,1768819587,,False,1,0ba51edc,2026-01-19_11-46-27,19.150158882141113,19.150158882141113,137518,sergio-XPS-15-9500,192.168.1.5,19.150158882141113,1,0.5066472683346976,0.3124874041155775,0.2640094851170725,0.11369463817649797,0.47012928436448354,0.7140749654136573,0.643133477191141,0.11910600147231132,0.2722183833676177,0.7684200450221536,beamsearch,3,20,0ba51edc
|
||||
0.062223201197885825,0.26399044299206303,11.377342224121094,5,2.1752953052520754,http://localhost:8002,1768819602,,False,1,c2ddb294,2026-01-19_11-46-42,11.383038759231567,11.383038759231567,137840,sergio-XPS-15-9500,192.168.1.5,11.383038759231567,1,0.4175797290692802,0.5963231402122613,0.36874666681089985,0.223680908941245,0.459921344533471,0.9160307499007694,0.9279619232072562,0.12298366234684793,0.11516147921112997,0.6668263919581685,greedy,10,20,c2ddb294
|
||||
0.39700544361882206,0.5264527267179566,12.552152156829834,5,2.4208834171295166,http://localhost:8002,1768819617,,False,1,e82ff347,2026-01-19_11-46-57,12.557852029800415,12.557852029800415,138037,sergio-XPS-15-9500,192.168.1.5,12.557852029800415,1,0.8540537965666715,0.294588934626999,0.5092574060096554,0.2836712766196415,0.6190202697962148,0.810073297090729,0.955177616687997,0.10497968516826324,0.20957208332268756,0.7475085753710696,beamsearch,7,10,e82ff347
|
||||
0.07781775834482615,0.3051087241758982,12.261723279953003,5,2.362420177459717,http://localhost:8002,1768819633,,False,1,532bade0,2026-01-19_11-47-13,12.265946626663208,12.265946626663208,138237,sergio-XPS-15-9500,192.168.1.5,12.265946626663208,1,0.8141250315590343,0.479912630164245,0.2027669826029772,0.11444262905063128,0.7404783620983263,0.301871563170945,0.35514852924629375,0.27832075427107744,0.2643837228077205,0.7950403527209229,greedy,3,10,532bade0
|
||||
0.3487860557598165,0.5005453336802469,12.705831289291382,5,2.4508751392364503,http://localhost:8002,1768819649,,False,1,7d15d320,2026-01-19_11-47-29,12.712336301803589,12.712336301803589,138464,sergio-XPS-15-9500,192.168.1.5,12.712336301803589,1,0.3225669850847642,0.2716721665537871,0.26115345621898345,0.2438651926519595,0.6194544051054931,0.5792394844360738,0.4710319694788726,0.13213212713543926,0.1990327712555196,0.3304729155445536,beamsearch,7,10,7d15d320
|
||||
0.3356719522269469,0.47356787280835055,13.11896562576294,5,2.5329070568084715,http://localhost:8002,1768819666,,False,1,9d244107,2026-01-19_11-47-46,13.124910593032837,13.124910593032837,138659,sergio-XPS-15-9500,192.168.1.5,13.124910593032837,1,0.34889752108886873,0.39007345640142954,0.22641510809759163,0.17907271838822,0.8644844159597871,0.6275871303293161,0.9722853596788665,0.25555008849029126,0.20043175984558798,0.7707927516030697,beamsearch,7,10,9d244107
|
||||
0.2742112621871928,0.43639473613327356,13.743902206420898,5,2.6569590091705324,http://localhost:8002,1768819683,,False,1,f160d61d,2026-01-19_11-48-03,13.750498533248901,13.750498533248901,138904,sergio-XPS-15-9500,192.168.1.5,13.750498533248901,1,0.8392454366146391,0.3155621572041812,0.4873405945675176,0.08582733675720434,0.9790121644393985,0.4062417762545848,0.6466326123022476,0.19715070089301498,0.23503015353761492,0.41517636715917056,beamsearch,10,15,f160d61d
|
||||
0.09848790101332737,0.32483251468294605,10.20632028579712,5,1.9527865886688232,http://localhost:8002,1768819697,,False,1,1be1e7a5,2026-01-19_11-48-17,10.210542917251587,10.210542917251587,139116,sergio-XPS-15-9500,192.168.1.5,10.210542917251587,1,0.833246186868533,0.22457589994570235,0.32254503276757784,0.23399561843072308,0.30165921403980517,0.8658122652407174,0.47250440785836867,0.2238860017234068,0.1886386486304371,0.4576817046304348,greedy,3,10,1be1e7a5
|
||||
0.4136569417819424,0.5311620590036745,14.170307874679565,5,2.746191930770874,http://localhost:8002,1768819714,,False,1,0746a065,2026-01-19_11-48-34,14.175411701202393,14.175411701202393,139318,sergio-XPS-15-9500,192.168.1.5,14.175411701202393,1,0.6782050871534447,0.22844595642210797,0.2858119663327552,0.0823237135063647,0.9612792593924089,0.665348992884313,0.8626670975336155,0.04300909760808497,0.270098820639789,0.45556228770798246,beamsearch,10,5,0746a065
|
||||
0.4517379831360281,0.5799232118269153,15.184179544448853,5,2.9479862689971923,http://localhost:8002,1768819733,,False,1,ef6faf9d,2026-01-19_11-48-53,15.188986778259277,15.188986778259277,139517,sergio-XPS-15-9500,192.168.1.5,15.188986778259277,1,0.39558213831954714,0.5599422938176799,0.3313024647230755,0.11634655299660798,0.8823955834187702,0.6660518255567262,0.796016060076042,0.1299041367034449,0.2152856765400713,0.6606446175138574,beamsearch,10,10,ef6faf9d
|
||||
0.0795526147054266,0.34016478642481734,11.83824896812439,5,2.2789079189300536,http://localhost:8002,1768819748,,False,1,e584ad1a,2026-01-19_11-49-08,11.842672109603882,11.842672109603882,139771,sergio-XPS-15-9500,192.168.1.5,11.842672109603882,1,0.521503445317256,0.5967505351644852,0.4313761698948889,0.18235873322120522,0.425714368894258,0.9959973340677325,0.7683261374584024,0.018826411104235885,0.09775666402707693,0.628476421820741,greedy,5,20,e584ad1a
|
||||
0.09113684668662517,0.3330104965172591,13.415843725204468,5,2.595126819610596,http://localhost:8002,1768819765,,False,1,933eaf3b,2026-01-19_11-49-25,13.420702457427979,13.420702457427979,139980,sergio-XPS-15-9500,192.168.1.5,13.420702457427979,1,0.6841928895220837,0.4987357892894665,0.3892687916541862,0.013496416992424515,0.7313608327277628,0.30075189594812957,0.32892055287409155,0.2910230441279402,0.12231738001404545,0.6542796585827699,greedy,3,20,933eaf3b
|
||||
0.07683542859531813,0.29422679092874626,12.476734638214111,5,2.407120943069458,http://localhost:8002,1768819781,,False,1,5cc050c0,2026-01-19_11-49-41,12.481242179870605,12.481242179870605,140188,sergio-XPS-15-9500,192.168.1.5,12.481242179870605,1,0.7076826224292139,0.4751142111109723,0.5719253650216765,0.20726075894486198,0.7574616804022614,0.48759940016947356,0.34266143931551063,0.18447732850058915,0.05055007965981624,0.5684478612561757,greedy,5,15,5cc050c0
|
||||
0.06306661910327489,0.2898453031979762,11.470694541931152,5,2.2064542293548586,http://localhost:8002,1768819796,,False,1,d3c4733b,2026-01-19_11-49-56,11.4755117893219,11.4755117893219,140395,sergio-XPS-15-9500,192.168.1.5,11.4755117893219,1,0.6620107715544297,0.46192225302253637,0.5999869164872036,0.22619461913686095,0.5081500315391371,0.475339433636797,0.5106649520736647,0.18343269541739415,0.05344530818183559,0.5503520865389809,greedy,5,15,d3c4733b
|
||||
0.062270483694448396,0.28136185456156826,11.204349517822266,5,2.1529050350189207,http://localhost:8002,1768819810,,False,1,b45ad82b,2026-01-19_11-50-10,11.209157705307007,11.209157705307007,140574,sergio-XPS-15-9500,192.168.1.5,11.209157705307007,1,0.5677170679516823,0.39248586783769635,0.5772785270028471,0.27599118000336537,0.5077328211777172,0.9804901966926808,0.4977991183990612,0.07239471385409058,0.1374763382905679,0.553666724679821,greedy,5,15,b45ad82b
|
||||
0.05996048766984661,0.26719903989315885,10.76261305809021,5,2.0644459247589113,http://localhost:8002,1768819824,,False,1,8acf6ec9,2026-01-19_11-50-24,10.767472267150879,10.767472267150879,140781,sergio-XPS-15-9500,192.168.1.5,10.767472267150879,1,0.45865724369035377,0.3694009035940602,0.4151173065881186,0.2983365466960818,0.3631913446659816,0.9980863757691772,0.5845159135795941,0.0721946556655992,0.1459278780476781,0.694791501629087,greedy,5,5,8acf6ec9
|
||||
0.06099161461125324,0.2731943754797238,10.691137313842773,5,2.049327087402344,http://localhost:8002,1768819838,,False,1,0551450f,2026-01-19_11-50-38,10.69617772102356,10.69617772102356,140969,sergio-XPS-15-9500,192.168.1.5,10.69617772102356,1,0.4402243626112622,0.3701488279313097,0.40203668237242685,0.2972046540464212,0.33871151213781014,0.8838165530603757,0.7081592028492127,0.0882537861188746,0.15672333775519132,0.701627303389235,greedy,10,5,0551450f
|
||||
0.061099404730611595,0.2721280502767147,10.488921165466309,5,2.0086814403533935,http://localhost:8002,1768819852,,False,1,e740013a,2026-01-19_11-50-52,10.493494510650635,10.493494510650635,141174,sergio-XPS-15-9500,192.168.1.5,10.493494510650635,1,0.46435985811111974,0.34785224515762775,0.45493529224642276,0.29478569868586896,0.35587921159117397,0.8172744152107332,0.7122588321341333,0.0735916007360217,0.15982046838787856,0.7164721195205754,greedy,5,5,e740013a
|
||||
0.062362858472938,0.272332407323177,10.604278802871704,5,2.034042978286743,http://localhost:8002,1768819866,,False,1,22c24728,2026-01-19_11-51-06,10.608573198318481,10.608573198318481,141340,sergio-XPS-15-9500,192.168.1.5,10.608573198318481,1,0.579678584169857,0.41597218340706976,0.4252016667747404,0.2679346252767811,0.34868781409745264,0.7747328556811077,0.5639686467419519,0.07445003550177257,0.16554473301217898,0.7073749357717483,greedy,10,5,22c24728
|
||||
0.06215767332972164,0.2747475932624559,10.546220541000366,5,2.021405267715454,http://localhost:8002,1768819880,,False,1,d1b611a8,2026-01-19_11-51-20,10.550852060317993,10.550852060317993,141520,sergio-XPS-15-9500,192.168.1.5,10.550852060317993,1,0.4422199064362936,0.3610913124264453,0.512759066575697,0.25795910850742676,0.5611259808565064,0.9053873818686548,0.5976970185172742,0.003121661182585389,0.08700122299695832,0.6200011976268031,greedy,10,5,d1b611a8
|
||||
0.06426741821045164,0.27754887165353204,10.526280164718628,5,2.017490863800049,http://localhost:8002,1768819894,,False,1,a1925725,2026-01-19_11-51-34,10.530900001525879,10.530900001525879,141685,sergio-XPS-15-9500,192.168.1.5,10.530900001525879,1,0.5079166883998535,0.44070967935910216,0.3555775923905935,0.2990878745571421,0.31120640343991984,0.9491605272601941,0.721432583570574,0.044062271648251126,0.1572631030161951,0.5962531429630691,greedy,5,5,a1925725
|
||||
0.060448802280017165,0.2709457820432465,10.548709630966187,5,2.0212356567382814,http://localhost:8002,1768819907,,False,1,f6248ceb,2026-01-19_11-51-47,10.553314208984375,10.553314208984375,141848,sergio-XPS-15-9500,192.168.1.5,10.553314208984375,1,0.4717256039811322,0.36544351935053254,0.44547752189718304,0.29867816914798173,0.3833038520221923,0.8392790049435077,0.6924094072779299,0.0852529065561854,0.1544529445184886,0.7151769237673308,greedy,5,5,f6248ceb
|
||||
0.061830952891847354,0.27643497142574114,10.458194017410278,5,2.00508770942688,http://localhost:8002,1768819921,,False,1,9408f008,2026-01-19_11-52-01,10.462894439697266,10.462894439697266,142026,sergio-XPS-15-9500,192.168.1.5,10.462894439697266,1,0.36151386422841214,0.3538388593453238,0.4559692019279934,0.258413183713029,0.39490484466097675,0.8743587585061078,0.7008339670509499,0.08528252345983173,0.1412514911085921,0.7102293742914433,greedy,5,5,9408f008
|
||||
0.06426139507144008,0.27969442229397773,10.85228157043457,5,2.0829681873321535,http://localhost:8002,1768819935,,False,1,a0aa078a,2026-01-19_11-52-15,10.856995105743408,10.856995105743408,142190,sergio-XPS-15-9500,192.168.1.5,10.856995105743408,1,0.4624158086714028,0.42040393809756477,0.41520125659911294,0.29032442769565125,0.38480963688924097,0.745502857691457,0.5817045834292819,0.045692170174803245,0.17769522993714032,0.6933972538344093,greedy,5,5,a0aa078a
|
||||
0.06269459198356074,0.27808950345890404,10.585867643356323,5,2.0289974212646484,http://localhost:8002,1768819949,,False,1,324be6ad,2026-01-19_11-52-29,10.590425252914429,10.590425252914429,142377,sergio-XPS-15-9500,192.168.1.5,10.590425252914429,1,0.39019467846190514,0.372308752898106,0.4640373077177259,0.20167201551181882,0.4408716269770253,0.8406520699713839,0.8098310920672391,0.1579316915947745,0.1384207575445601,0.7454573365368217,greedy,5,5,324be6ad
|
||||
0.07959827118630344,0.2871382933960637,11.532482385635376,5,2.219746446609497,http://localhost:8002,1768819964,,False,1,e1c26fe1,2026-01-19_11-52-44,11.537264823913574,11.537264823913574,142538,sergio-XPS-15-9500,192.168.1.5,11.537264823913574,1,0.3090429790922413,0.33472186465221,0.39720817790586443,0.0041528793175236445,0.3025883785231392,0.9359865988554746,0.4208565345904826,0.09825579905606344,0.08933198214929214,0.5029113260048625,greedy,5,5,e1c26fe1
|
||||
0.06153670825357198,0.2689836062793151,10.684980630874634,5,2.048065185546875,http://localhost:8002,1768819978,,False,1,871a2974,2026-01-19_11-52-58,10.689571142196655,10.689571142196655,142730,sergio-XPS-15-9500,192.168.1.5,10.689571142196655,1,0.6247643595063705,0.2700409637884238,0.523706372392991,0.26010593479118665,0.5419430667470642,0.8772489609968006,0.866157823298259,0.1525272090916175,0.23282983510183955,0.6005045065411087,greedy,10,5,871a2974
|
||||
0.06673842132253202,0.2895430656572954,11.181420803070068,5,2.148970937728882,http://localhost:8002,1768819993,,False,1,5aaa2960,2026-01-19_11-53-13,11.186044454574585,11.186044454574585,142902,sergio-XPS-15-9500,192.168.1.5,11.186044454574585,1,0.5312313131533724,0.5274817776501124,0.36246508220473683,0.1487343581575564,0.3926538404095683,0.9516125555915751,0.6733549601019699,0.048249293092278434,0.11205800044575707,0.7992457276130864,greedy,7,5,5aaa2960
|
||||
0.06397855317924395,0.27562926342642274,10.582021236419678,5,2.0291433334350586,http://localhost:8002,1768820006,,False,1,21bd3de3,2026-01-19_11-53-26,10.586687564849854,10.586687564849854,143089,sergio-XPS-15-9500,192.168.1.5,10.586687564849854,1,0.4768706082264196,0.4116856094728855,0.47401542881269365,0.24184252961783387,0.6689268585545911,0.7706602741028105,0.6152463359675456,0.02384590208270837,0.14958983968802692,0.6832923394286707,greedy,5,5,21bd3de3
|
||||
0.05928688439040566,0.26340764235199676,10.82849907875061,5,2.0774466037750243,http://localhost:8002,1768820021,,False,1,1557acdd,2026-01-19_11-53-41,10.833132982254028,10.833132982254028,143248,sergio-XPS-15-9500,192.168.1.5,10.833132982254028,1,0.7552574004836203,0.44533911204124527,0.31397183762754305,0.2781958432695631,0.4971448247990278,0.702889696463513,0.5563365487128928,0.10957807143315677,0.1792808875596712,0.7431378339011148,greedy,3,5,1557acdd
|
||||
0.05996751845943706,0.2656487417441341,11.046596050262451,5,2.1210866928100587,http://localhost:8002,1768820035,,False,1,23e5421b,2026-01-19_11-53-55,11.051404476165771,11.051404476165771,143435,sergio-XPS-15-9500,192.168.1.5,11.051404476165771,1,0.7718089675955625,0.4446379405494256,0.3019967059446066,0.27530868169916184,0.48775088657867727,0.7025268307300849,0.5457135094112008,0.10608020395503459,0.17680901565764098,0.7399221495601584,greedy,3,5,23e5421b
|
||||
0.05943303923556994,0.2621136461900505,10.89347219467163,5,2.090515375137329,http://localhost:8002,1768820049,,False,1,4662a08f,2026-01-19_11-54-09,10.898061990737915,10.898061990737915,143626,sergio-XPS-15-9500,192.168.1.5,10.898061990737915,1,0.7655197786088256,0.4384608011311873,0.2900656349558717,0.2738896956339715,0.4897956878476248,0.7114900186099934,0.5392251925681772,0.11338377422440528,0.18288699118515803,0.7492268780264275,greedy,3,5,4662a08f
|
||||
0.059764190418310784,0.26498596833223664,11.022373676300049,5,2.11647310256958,http://localhost:8002,1768820064,,False,1,8339cb3e,2026-01-19_11-54-24,11.026973724365234,11.026973724365234,143832,sergio-XPS-15-9500,192.168.1.5,11.026973724365234,1,0.7686099049266422,0.44630560025029414,0.2948219426310189,0.2727084952650962,0.49027990928339404,0.7249036670847477,0.5450468550932773,0.11187079599626384,0.18133138980677752,0.7495335565594098,greedy,3,5,8339cb3e
|
||||
0.060684238278697525,0.26432483439151866,12.10981273651123,5,2.3338690757751466,http://localhost:8002,1768820079,,False,1,9c9cf542,2026-01-19_11-54-39,12.114561557769775,12.114561557769775,144014,sergio-XPS-15-9500,192.168.1.5,12.114561557769775,1,0.7532180802163942,0.5128327503981508,0.2570950665245929,0.21228601663917626,0.5702886327992472,0.5874866302046862,0.41605423922305346,0.1393125792842351,0.22050576617777679,0.7624824674521864,greedy,3,20,9c9cf542
|
||||
0.08014581283242714,0.28932853882106035,10.766591310501099,5,2.0627391815185545,http://localhost:8002,1768820093,,False,1,7b99dc7d,2026-01-19_11-54-53,10.773411512374878,10.773411512374878,144217,sergio-XPS-15-9500,192.168.1.5,10.773411512374878,1,0.8900827816008225,0.43692605130405904,0.28299893768197637,0.25090796326354026,0.45116119804450994,0.7000835777935013,0.5311272120253014,0.10699302785038173,0.2904514002507723,0.7756605791225515,greedy,3,5,7b99dc7d
|
||||
0.05998922172744085,0.26585145931941695,11.418177604675293,5,2.19525465965271,http://localhost:8002,1768820108,,False,1,889ff391,2026-01-19_11-55-08,11.422764301300049,11.422764301300049,144398,sergio-XPS-15-9500,192.168.1.5,11.422764301300049,1,0.7853225189675154,0.463910613321873,0.23698735272141672,0.27377548391814954,0.6121219754884698,0.551217667291872,0.43571381214714444,0.11657214266943153,0.18871141271799163,0.7335864533748023,greedy,3,5,889ff391
|
||||
0.3537681802368841,0.4969864100911835,12.881014823913574,5,2.4865323543548583,http://localhost:8002,1768820124,,False,1,7e811d46,2026-01-19_11-55-24,12.88630223274231,12.88630223274231,144607,sergio-XPS-15-9500,192.168.1.5,12.88630223274231,1,0.7266484292255461,0.5415454213873866,0.3301145976622343,0.1865414523299046,0.47980014672018056,0.7370946863942303,0.6321175664041752,0.16199096365481883,0.24575549479858036,0.7988955477215958,beamsearch,3,5,7e811d46
|
||||
0.08668141149396207,0.3195016810538794,12.23897933959961,5,2.3584585189819336,http://localhost:8002,1768820140,,False,1,aad8a433,2026-01-19_11-55-40,12.244789123535156,12.244789123535156,144837,sergio-XPS-15-9500,192.168.1.5,12.244789123535156,1,0.8890784877906777,0.49729149007901785,0.3022378793797936,0.15068002069309427,0.5217560545383055,0.6246570748018311,0.39540672252266484,0.06113992103803731,0.19740387526722958,0.6691724379280026,greedy,3,20,aad8a433
|
||||
0.33039603802482187,0.4796702224046533,12.4546537399292,5,2.4026978492736815,http://localhost:8002,1768820156,,False,1,512657a2,2026-01-19_11-55-56,12.45941162109375,12.45941162109375,145063,sergio-XPS-15-9500,192.168.1.5,12.45941162109375,1,0.6232362282312066,0.3918712695091323,0.2051294768906529,0.23628755351196915,0.5886422425865593,0.3680701363856915,0.45704649890130883,0.1172561016305299,0.17265532433475142,0.7657720890343414,beamsearch,3,5,512657a2
|
||||
0.06198201775009295,0.2639318510923077,10.336721420288086,5,1.9784754753112792,http://localhost:8002,1768820170,,False,1,1da2591c,2026-01-19_11-56-10,10.341253757476807,10.341253757476807,145258,sergio-XPS-15-9500,192.168.1.5,10.341253757476807,1,0.7945748814752798,0.3074609198039082,0.3512850377909583,0.2803387165565871,0.676034214318366,0.5425759112229473,0.4977769366841911,0.1405039691690697,0.18414358174506226,0.6495146967256282,greedy,3,10,1da2591c
|
||||
0.23930652997356217,0.4047803085409988,13.96639633178711,5,2.70588903427124,http://localhost:8002,1768820187,,False,1,1fc76c61,2026-01-19_11-56-27,13.971062898635864,13.971062898635864,145448,sergio-XPS-15-9500,192.168.1.5,13.971062898635864,1,0.7298730667959007,0.43128174897306926,0.37543194001483676,0.033557047235571416,0.4227439352044997,0.6369762315598249,0.5720837980668902,0.16989421299763682,0.20357556846664004,0.33606535760084727,beamsearch,7,15,1fc76c61
|
||||
0.05916457749009331,0.2603697639812623,10.936553001403809,5,2.099363851547241,http://localhost:8002,1768820201,,False,1,466fabc4,2026-01-19_11-56-41,10.941264390945435,10.941264390945435,145657,sergio-XPS-15-9500,192.168.1.5,10.941264390945435,1,0.7646655943554652,0.4496059489020273,0.3037692280282893,0.27820217212001197,0.4796795208364998,0.7118325937653041,0.5413221047834652,0.10120472780313837,0.16953835385986285,0.7373278953886837,greedy,3,5,466fabc4
|
||||
0.06262277504663857,0.2680984132847148,10.939441919326782,5,2.099300193786621,http://localhost:8002,1768820216,,False,1,1d6d1749,2026-01-19_11-56-56,10.943971633911133,10.943971633911133,145853,sergio-XPS-15-9500,192.168.1.5,10.943971633911133,1,0.8234354899576677,0.4593995267054814,0.27033008042371826,0.24452223445450588,0.44951347701495115,0.6907896319414741,0.5324461272026295,0.1235467025491428,0.12932778883432983,0.7317188726093867,greedy,3,5,1d6d1749
|
||||
0.05970784035209096,0.258759438956101,10.990158319473267,5,2.1104721069335937,http://localhost:8002,1768820230,,False,1,c82e12e9,2026-01-19_11-57-10,10.994841575622559,10.994841575622559,146026,sergio-XPS-15-9500,192.168.1.5,10.994841575622559,1,0.7435671290019616,0.4902723579691337,0.31030673207841203,0.2821781420999702,0.4743635349095276,0.7868678535393907,0.6534237946773291,0.09810216733901932,0.18818934557100567,0.769000804122876,greedy,3,5,c82e12e9
|
||||
0.07399781605809005,0.2901757233837255,11.241674661636353,5,2.159311056137085,http://localhost:8002,1768820245,,False,1,dc4b8ad0,2026-01-19_11-57-25,11.246280431747437,11.246280431747437,146227,sergio-XPS-15-9500,192.168.1.5,11.246280431747437,1,0.8635855881513506,0.5679840217648511,0.3108576081126515,0.26596438861226535,0.4736117661041297,0.7935405923179888,0.6568791745253106,0.09852706108769861,0.22110037713279163,0.7726420509771701,greedy,3,5,dc4b8ad0
|
||||
0.059443757821647306,0.2652742693642366,10.932884454727173,5,2.0979042530059813,http://localhost:8002,1768820259,,False,1,dd5c1aa0,2026-01-19_11-57-39,10.937772035598755,10.937772035598755,146415,sergio-XPS-15-9500,192.168.1.5,10.937772035598755,1,0.7497851211362265,0.49340285442914233,0.24486518704295845,0.22169705261942863,0.5344290231994961,0.7343738246174152,0.6273279588084633,0.1401353190181211,0.19391335447188496,0.751931055711065,greedy,3,10,dd5c1aa0
|
||||
0.33905839374179186,0.46681630291244874,11.817269086837769,5,2.2754374027252195,http://localhost:8002,1768820274,,False,1,3e431bbc,2026-01-19_11-57-54,11.822028636932373,11.822028636932373,146609,sergio-XPS-15-9500,192.168.1.5,11.822028636932373,1,0.7409469255126825,0.486311604635016,0.2426683920471307,0.22749653812474147,0.5339818816411395,0.7675880896677424,0.6136943680830941,0.22201604923294813,0.2076143561269635,0.7773645815175689,beamsearch,3,10,3e431bbc
|
||||
0.06022704320482961,0.26313388102560387,10.998746633529663,5,2.1118124961853026,http://localhost:8002,1768820288,,False,1,156758d9,2026-01-19_11-58-08,11.003510475158691,11.003510475158691,146797,sergio-XPS-15-9500,192.168.1.5,11.003510475158691,1,0.6958655352045846,0.5130811270234237,0.3398685649368741,0.281609671843136,0.6188171051225511,0.6619539249830828,0.7621944146375241,0.13722873492512194,0.19541317596404653,0.724668083186668,greedy,3,10,156758d9
|
||||
0.0619061941971184,0.2666708599391416,10.721810817718506,5,2.0556752681732178,http://localhost:8002,1768820303,,False,1,98b752e7,2026-01-19_11-58-23,10.726754426956177,10.726754426956177,146994,sergio-XPS-15-9500,192.168.1.5,10.726754426956177,1,0.8065507370753903,0.479579558894321,0.2710803109658562,0.2200369611680297,0.562424006392253,0.6785297866543542,0.6428102120307683,0.17361026837711904,0.25222880963797256,0.6816772979912098,greedy,3,10,98b752e7
|
||||
0.060772000212913825,0.2693655727035526,11.828697204589844,5,2.276572847366333,http://localhost:8002,1768820318,,False,1,b76fb991,2026-01-19_11-58-38,11.833409070968628,11.833409070968628,147173,sergio-XPS-15-9500,192.168.1.5,11.833409070968628,1,0.6588562181986706,0.5057274333487476,0.21185595176486843,0.2530909139222912,0.6525256193586906,0.8104014913294882,0.4815502590805036,0.13014894080011688,0.16940039157653397,0.7552923776175787,greedy,3,10,b76fb991
|
||||
0.07314038576784788,0.3150308431474841,12.541530132293701,5,2.420142650604248,http://localhost:8002,1768820334,,False,1,2cddab16,2026-01-19_11-58-54,12.546295404434204,12.546295404434204,147391,sergio-XPS-15-9500,192.168.1.5,12.546295404434204,1,0.8467938317793842,0.5454167229484307,0.31221025364961774,0.19484970751487457,0.7101321488954703,0.6066858622923857,0.3883609000553786,0.09428410179254802,0.23303430823510501,0.5028771950032019,greedy,3,20,2cddab16
|
||||
0.061126787276099506,0.2754658344456032,12.830697536468506,5,2.4755293846130373,http://localhost:8002,1768820350,,False,1,c5e9c336,2026-01-19_11-59-10,12.835358381271362,12.835358381271362,147587,sergio-XPS-15-9500,192.168.1.5,12.835358381271362,1,0.7742942276856887,0.45602451871204075,0.2906132981749209,0.28249790167048744,0.49285375099310735,0.7281704754203927,0.30136076169570813,0.11215186859095508,0.18604751676297107,0.7485499894558536,greedy,3,10,c5e9c336
|
||||
0.059864794050619355,0.2672736064749025,11.629220485687256,5,2.235791301727295,http://localhost:8002,1768820365,,False,1,4746a594,2026-01-19_11-59-25,11.634002208709717,11.634002208709717,147814,sergio-XPS-15-9500,192.168.1.5,11.634002208709717,1,0.7600341581312108,0.4856987064104726,0.25429745004407167,0.23782684371695748,0.5171294186553896,0.7199868218813051,0.5150852975917685,0.1466645033310691,0.21110091695829342,0.7887741773568971,greedy,3,15,4746a594
|
||||
0.059687361636187354,0.25915782844539953,10.77558970451355,5,2.0677656650543215,http://localhost:8002,1768820379,,False,1,914de1fb,2026-01-19_11-59-39,10.780381202697754,10.780381202697754,148016,sergio-XPS-15-9500,192.168.1.5,10.780381202697754,1,0.7206799644549393,0.4038298181079831,0.22829349882480535,0.26704100913427425,0.40874625247425306,0.7422689086598406,0.5495893868854069,0.1269079072494077,0.1907921420998867,0.7564991275004229,greedy,3,5,914de1fb
|
||||
0.05944052289142775,0.2603006035896063,11.235510110855103,5,2.1587305068969727,http://localhost:8002,1768820394,,False,1,67d86f75,2026-01-19_11-59-54,11.240439653396606,11.240439653396606,148208,sergio-XPS-15-9500,192.168.1.5,11.240439653396606,1,0.7181780900634192,0.40619737782309295,0.23858930971427372,0.17036711251144926,0.41028051751847794,0.7619291987754846,0.5944586460638401,0.12680370752155648,0.19408677869066687,0.7340530830475422,greedy,3,5,67d86f75
|
||||
0.05864527764234886,0.2576966365837255,10.711666345596313,5,2.054002857208252,http://localhost:8002,1768820408,,False,1,ec233275,2026-01-19_12-00-08,10.71644115447998,10.71644115447998,148394,sergio-XPS-15-9500,192.168.1.5,10.71644115447998,1,0.7119912570829008,0.4067353312041748,0.22290482686450167,0.13876386837316096,0.4216785745225061,0.7449060175492836,0.6189859060561754,0.12837536724587273,0.16720360936555814,0.6490148035375993,greedy,7,5,ec233275
|
||||
0.058352281456512646,0.26325850918850957,10.756606340408325,5,2.0632463455200196,http://localhost:8002,1768820422,,False,1,1b85472e,2026-01-19_12-00-22,10.761056900024414,10.761056900024414,148589,sergio-XPS-15-9500,192.168.1.5,10.761056900024414,1,0.6647184009064185,0.42466969296752816,0.21838222053573686,0.1629305080861391,0.7994293119091709,0.6436655189392679,0.6065310919737225,0.14619053351152517,0.1671131734904739,0.6416317933607728,greedy,7,10,1b85472e
|
||||
0.3660240439441647,0.4947236362577508,13.107557773590088,5,2.53425874710083,http://localhost:8002,1768820439,,False,1,c50724c2,2026-01-19_12-00-39,13.112190961837769,13.112190961837769,148777,sergio-XPS-15-9500,192.168.1.5,13.112190961837769,1,0.6634068628046286,0.4221917610956251,0.22391772412866445,0.1502086057528373,0.7842640466327674,0.6529205282440211,0.5966980952588006,0.18574462350804272,0.16077183234622805,0.6422206751727608,beamsearch,7,5,c50724c2
|
||||
0.062152982639591625,0.27540347582693964,10.558995485305786,5,2.022567129135132,http://localhost:8002,1768820452,,False,1,881d9f45,2026-01-19_12-00-52,10.563637018203735,10.563637018203735,148992,sergio-XPS-15-9500,192.168.1.5,10.563637018203735,1,0.6346549144056921,0.3809145239465362,0.2823575989757486,0.1284614307850303,0.8067091565131851,0.8337121990108658,0.567787249051487,0.20829411872710996,0.16721474316062188,0.6282852294207945,greedy,7,5,881d9f45
|
||||
0.05856626938126275,0.2581712321259471,10.876498222351074,5,2.087088108062744,http://localhost:8002,1768820467,,False,1,48fc43e4,2026-01-19_12-01-07,10.88118314743042,10.88118314743042,149163,sergio-XPS-15-9500,192.168.1.5,10.88118314743042,1,0.7042086976838686,0.4025170289737934,0.2160231541556799,0.16549335913941385,0.8858930429274254,0.6801565065140187,0.6745339610780225,0.08262987034261617,0.13360114059916128,0.5889319630704115,greedy,7,15,48fc43e4
|
||||
0.05976448758711881,0.2592269888370555,10.979224681854248,5,2.107044887542725,http://localhost:8002,1768820481,,False,1,652caf77,2026-01-19_12-01-21,10.983993291854858,10.983993291854858,149348,sergio-XPS-15-9500,192.168.1.5,10.983993291854858,1,0.6988387104713508,0.4289552511338064,0.2166071100318819,0.08342333197598858,0.8547499849878485,0.6078156114278425,0.6796871899662313,0.0591502474857241,0.14761325178795806,0.5982026862890478,greedy,7,15,652caf77
|
||||
0.059133090191924254,0.2616872288695368,10.93423843383789,5,2.0975637912750242,http://localhost:8002,1768820495,,False,1,2e85880b,2026-01-19_12-01-35,10.93894910812378,10.93894910812378,149544,sergio-XPS-15-9500,192.168.1.5,10.93894910812378,1,0.6774988511926161,0.4026304656490138,0.20196424213945063,0.16637061772902026,0.9069000290827862,0.6717917525978443,0.607813099351824,0.08607375284532315,0.12816482122073206,0.5836410965708964,greedy,7,15,2e85880b
|
||||
0.058834943191146474,0.258064666499282,10.712863683700562,5,2.055108594894409,http://localhost:8002,1768820509,,False,1,08c06d24,2026-01-19_12-01-49,10.71783971786499,10.71783971786499,149716,sergio-XPS-15-9500,192.168.1.5,10.71783971786499,1,0.676761107149889,0.3948167640336808,0.20446373408896712,0.1252645275302706,0.928745330628802,0.6772167484136661,0.728934789581864,0.07948320492885358,0.12455482683154301,0.5820049881076059,greedy,7,15,08c06d24
|
||||
0.05934167210765926,0.26507859745022083,10.722304821014404,5,2.0554207801818847,http://localhost:8002,1768820524,,False,1,b3f45b00,2026-01-19_12-02-04,10.727020978927612,10.727020978927612,149910,sergio-XPS-15-9500,192.168.1.5,10.727020978927612,1,0.6791241480460476,0.38507960399360586,0.2008675489682369,0.13136654102633838,0.9452093699034901,0.6808870002862947,0.7451721898503598,0.08065678907057289,0.11084582244266457,0.5764033974919818,greedy,7,15,b3f45b00
|
||||
|
65
src/results/raytune_paddle_results_20260119_122609.csv
Normal file
@@ -0,0 +1,65 @@
|
||||
CER,WER,TIME,PAGES,TIME_PER_PAGE,worker,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/use_doc_orientation_classify,config/use_doc_unwarping,config/textline_orientation,config/text_det_thresh,config/text_det_box_thresh,config/text_det_unclip_ratio,config/text_rec_score_thresh,logdir
|
||||
0.03506661663316561,0.09890345974963388,11.85569167137146,5,2.223856973648071,http://localhost:8002,1768821470,,False,1,c385d490,2026-01-19_12-17-50,11.864287614822388,11.864287614822388,255694,sergio-XPS-15-9500,192.168.1.5,11.864287614822388,1,False,False,False,0.3694663403739679,0.4296387270337578,0.0,0.1783109083293045,c385d490
|
||||
0.03599172786858722,0.09831877575011358,3.6901509761810303,5,0.642470121383667,http://localhost:8002,1768821477,,False,1,28a0a423,2026-01-19_12-17-57,3.6944973468780518,3.6944973468780518,255930,sergio-XPS-15-9500,192.168.1.5,3.6944973468780518,1,True,False,False,0.443249796611768,0.4817558265252385,0.0,0.06237975078446407,28a0a423
|
||||
0.07296898422220219,0.13203708321215762,10.501965999603271,5,2.0055192947387694,http://localhost:8002,1768821491,,False,1,f699b826,2026-01-19_12-18-11,10.506679058074951,10.506679058074951,256056,sergio-XPS-15-9500,192.168.1.5,10.506679058074951,1,False,True,False,0.2851409433291632,0.5181201198120159,0.0,0.5402431853279566,f699b826
|
||||
0.06341497143231878,0.12432485697376627,10.013647079467773,5,1.9113924980163575,http://localhost:8002,1768821505,,False,1,49e77d45,2026-01-19_12-18-25,10.018975019454956,10.018975019454956,256261,sergio-XPS-15-9500,192.168.1.5,10.018975019454956,1,False,True,False,0.4091020962342421,0.5477675836994064,0.0,0.28125964062929637,49e77d45
|
||||
0.06363307378397837,0.11080195018785229,10.315315961837769,5,1.9735893249511718,http://localhost:8002,1768821518,,False,1,08dff189,2026-01-19_12-18-38,10.319286346435547,10.319286346435547,256431,sergio-XPS-15-9500,192.168.1.5,10.319286346435547,1,False,True,True,0.4761569778732009,0.47781667917332393,0.0,0.010287859440038183,08dff189
|
||||
0.00927190028988934,0.08293509652512027,3.394526243209839,5,0.5889779567718506,http://localhost:8002,1768821525,,False,1,2808180e,2026-01-19_12-18-45,3.3984148502349854,3.3984148502349854,256622,sergio-XPS-15-9500,192.168.1.5,3.3984148502349854,1,False,False,True,0.49092093640044654,0.16386227611297105,0.0,0.36495336114676485,2808180e
|
||||
0.06414858633862171,0.1138840355665884,10.091642618179321,5,1.9286378383636475,http://localhost:8002,1768821539,,False,1,8b33e2a2,2026-01-19_12-18-59,10.095749855041504,10.095749855041504,256746,sergio-XPS-15-9500,192.168.1.5,10.095749855041504,1,False,True,False,0.664057104821503,0.380194482697527,0.0,0.0957856258135195,8b33e2a2
|
||||
0.04089344159161516,0.11588877886734197,3.399895191192627,5,0.5897929668426514,http://localhost:8002,1768821546,,False,1,2b3b0aad,2026-01-19_12-19-06,3.403998613357544,3.403998613357544,256911,sergio-XPS-15-9500,192.168.1.5,3.403998613357544,1,True,False,False,0.15162885621474814,0.015269709226466177,0.0,0.6005426046606002,2b3b0aad
|
||||
0.06440335927000067,0.125496108332261,10.158945322036743,5,1.9415269851684571,http://localhost:8002,1768821559,,False,1,8c1998de,2026-01-19_12-19-19,10.162839651107788,10.162839651107788,257030,sergio-XPS-15-9500,192.168.1.5,10.162839651107788,1,True,True,True,0.3692127966881518,0.23308318268023623,0.0,0.3773645637989277,8c1998de
|
||||
0.0637132502302169,0.11234475429253714,9.987636089324951,5,1.9088503837585449,http://localhost:8002,1768821573,,False,1,52bacbb6,2026-01-19_12-19-33,9.991463661193848,9.991463661193848,257222,sergio-XPS-15-9500,192.168.1.5,9.991463661193848,1,False,True,False,0.6035565410217514,0.21880259661403342,0.0,0.18713153326839937,52bacbb6
|
||||
0.008343047226538839,0.08349130431035265,3.386183738708496,5,0.5885046482086181,http://localhost:8002,1768821579,,False,1,08c1ee35,2026-01-19_12-19-39,3.39007830619812,3.39007830619812,257399,sergio-XPS-15-9500,192.168.1.5,3.39007830619812,1,True,False,True,0.15926489447447112,0.017648992877564967,0.0,0.44224480118340653,08c1ee35
|
||||
0.007922541795615114,0.07887346048819885,3.468980550765991,5,0.5967342376708984,http://localhost:8002,1768821586,,False,1,d00c4e76,2026-01-19_12-19-46,3.472960948944092,3.472960948944092,257525,sergio-XPS-15-9500,192.168.1.5,3.472960948944092,1,True,False,True,0.07077078342680466,0.004051086507914577,0.0,0.46605997897727297,d00c4e76
|
||||
0.016055552163489285,0.08753651728221294,3.3815455436706543,5,0.5863098621368408,http://localhost:8002,1768821593,,False,1,bb72b916,2026-01-19_12-19-53,3.385627031326294,3.385627031326294,257655,sergio-XPS-15-9500,192.168.1.5,3.385627031326294,1,True,False,True,0.00406946269144004,0.024694902295496916,0.0,0.48724120796716147,bb72b916
|
||||
0.04101062641443912,0.11434949759329069,3.3144912719726562,5,0.5752715110778809,http://localhost:8002,1768821599,,False,1,c12ba2dc,2026-01-19_12-19-59,3.3184001445770264,3.3184001445770264,257771,sergio-XPS-15-9500,192.168.1.5,3.3184001445770264,1,True,False,True,0.11631707320987289,0.690466345723201,0.0,0.6724394280648069,c12ba2dc
|
||||
0.00877838333494364,0.08577894245301848,3.401432514190674,5,0.589998722076416,http://localhost:8002,1768821606,,False,1,463a2384,2026-01-19_12-20-06,3.4053428173065186,3.4053428173065186,257879,sergio-XPS-15-9500,192.168.1.5,3.4053428173065186,1,True,False,True,0.22358777119494402,0.11342742897015146,0.0,0.42574884909601923,463a2384
|
||||
0.008258946852964685,0.07832593783541303,3.4435582160949707,5,0.5993115901947021,http://localhost:8002,1768821613,,False,1,9ec8a6c5,2026-01-19_12-20-13,3.447549343109131,3.447549343109131,257998,sergio-XPS-15-9500,192.168.1.5,3.447549343109131,1,True,False,True,0.00914625516134962,0.28951184233224014,0.0,0.4822045024114849,9ec8a6c5
|
||||
0.016055552163489285,0.08753651728221294,3.357020139694214,5,0.58282470703125,http://localhost:8002,1768821620,,False,1,c5e2ab01,2026-01-19_12-20-20,3.360861301422119,3.360861301422119,258136,sergio-XPS-15-9500,192.168.1.5,3.360861301422119,1,True,False,True,0.003475038037149451,0.29241480396041347,0.0,0.5570331572371645,c5e2ab01
|
||||
0.009030183622618133,0.06800810511996136,3.4037389755249023,5,0.5921475410461425,http://localhost:8002,1768821627,,False,1,791ed981,2026-01-19_12-20-27,3.4075520038604736,3.4075520038604736,258252,sergio-XPS-15-9500,192.168.1.5,3.4075520038604736,1,True,False,True,0.08655779066151734,0.3187645875435276,0.0,0.2687428540439976,791ed981
|
||||
0.008664940340048574,0.08581798715920706,3.501950263977051,5,0.6108397006988525,http://localhost:8002,1768821633,,False,1,f8442025,2026-01-19_12-20-33,3.5058133602142334,3.5058133602142334,258364,sergio-XPS-15-9500,192.168.1.5,3.5058133602142334,1,True,False,True,0.26385969784523366,0.10646638343274928,0.0,0.6888529567810926,f8442025
|
||||
0.013289181242042186,0.08277097527295318,3.2847726345062256,5,0.5695433616638184,http://localhost:8002,1768821640,,False,1,c4cc8356,2026-01-19_12-20-40,3.2885093688964844,3.2885093688964844,258479,sergio-XPS-15-9500,192.168.1.5,3.2885093688964844,1,True,False,True,0.0783907286407576,0.6144374684317566,0.0,0.49431837576833404,c4cc8356
|
||||
0.008558844366776789,0.08503058558440392,3.376376152038574,5,0.5869657039642334,http://localhost:8002,1768821647,,False,1,fb7bf10e,2026-01-19_12-20-47,3.380413770675659,3.380413770675659,258615,sergio-XPS-15-9500,192.168.1.5,3.380413770675659,1,True,False,True,0.19290877255165814,0.09975349505857617,0.0,0.6114422209758432,fb7bf10e
|
||||
0.007997676431652,0.07780877475636923,3.3821396827697754,5,0.5890754699707031,http://localhost:8002,1768821654,,False,1,d2036b54,2026-01-19_12-20-54,3.386087417602539,3.386087417602539,258726,sergio-XPS-15-9500,192.168.1.5,3.386087417602539,1,True,False,True,0.045413006981742665,0.014462040606135707,0.0,0.43172761082245126,d2036b54
|
||||
0.009147368445442098,0.06969651955749985,3.374091148376465,5,0.5859436988830566,http://localhost:8002,1768821660,,False,1,50ea7f3b,2026-01-19_12-21-00,3.3778791427612305,3.3778791427612305,258841,sergio-XPS-15-9500,192.168.1.5,3.3778791427612305,1,True,False,True,0.05615414666061707,0.1767564331348277,0.0,0.294181079680786,50ea7f3b
|
||||
0.008414440034646826,0.07859969916180594,3.3822972774505615,5,0.5889940738677979,http://localhost:8002,1768821667,,False,1,248f11ad,2026-01-19_12-21-07,3.3861117362976074,3.3861117362976074,258958,sergio-XPS-15-9500,192.168.1.5,3.3861117362976074,1,True,False,True,0.037929131718362014,0.08279922744979032,0.0,0.44895447738110594,248f11ad
|
||||
0.008631855890798765,0.08171378358546351,3.3687093257904053,5,0.5860745429992675,http://localhost:8002,1768821674,,False,1,ed62f7dc,2026-01-19_12-21-14,3.372666835784912,3.372666835784912,259076,sergio-XPS-15-9500,192.168.1.5,3.372666835784912,1,True,False,True,0.1333628019047363,0.2729950555484231,0.0,0.39746071410829,ed62f7dc
|
||||
0.008664940340048574,0.08499154087821534,3.371145248413086,5,0.5862448215484619,http://localhost:8002,1768821681,,False,1,d8907a1f,2026-01-19_12-21-21,3.375185012817383,3.375185012817383,259206,sergio-XPS-15-9500,192.168.1.5,3.375185012817383,1,True,False,True,0.2765606196671755,0.060003260056553154,0.0,0.5025665425204284,d8907a1f
|
||||
0.009147368445442098,0.07229696373274716,3.3624093532562256,5,0.5846651554107666,http://localhost:8002,1768821687,,False,1,ebaac043,2026-01-19_12-21-27,3.366320848464966,3.366320848464966,259323,sergio-XPS-15-9500,192.168.1.5,3.366320848464966,1,True,False,True,0.04919576638833845,0.36820782546645486,0.0,0.32312205105133734,ebaac043
|
||||
0.008558844366776789,0.08503058558440392,3.3781065940856934,5,0.587260627746582,http://localhost:8002,1768821694,,False,1,a0894bc0,2026-01-19_12-21-34,3.3822152614593506,3.3822152614593506,259443,sergio-XPS-15-9500,192.168.1.5,3.3822152614593506,1,True,False,True,0.1994235733794807,0.15972291414455095,0.0,0.5977644425109412,a0894bc0
|
||||
0.008024895940958,0.07962534018744696,3.398592710494995,5,0.5916557788848877,http://localhost:8002,1768821701,,False,1,3498c1b8,2026-01-19_12-21-41,3.4023826122283936,3.4023826122283936,259554,sergio-XPS-15-9500,192.168.1.5,3.4023826122283936,1,True,False,True,0.1046266985888523,0.23508200526753675,0.0,0.5467266950434034,3498c1b8
|
||||
0.008024895940958,0.07962534018744696,3.4101011753082275,5,0.5957276344299316,http://localhost:8002,1768821707,,False,1,00fc5f6a,2026-01-19_12-21-47,3.4141347408294678,3.4141347408294678,259689,sergio-XPS-15-9500,192.168.1.5,3.4141347408294678,1,True,False,True,0.09816375424029757,0.40866092341544563,0.0,0.5397528720422529,00fc5f6a
|
||||
0.008449143199810622,0.08349130431035265,3.4055111408233643,5,0.5931827545166015,http://localhost:8002,1768821714,,False,1,e98c02d1,2026-01-19_12-21-54,3.409532070159912,3.409532070159912,259816,sergio-XPS-15-9500,192.168.1.5,3.409532070159912,1,True,False,True,0.3140290686317056,0.052614998451672106,0.0,0.6465903750193005,e98c02d1
|
||||
0.008024895940958,0.07962534018744696,3.3723814487457275,5,0.5866386890411377,http://localhost:8002,1768821721,,False,1,c70f3f43,2026-01-19_12-22-01,3.3762624263763428,3.3762624263763428,259923,sergio-XPS-15-9500,192.168.1.5,3.3762624263763428,1,True,False,True,0.10014126954970229,0.42707748560882025,0.0,0.5502134276128419,c70f3f43
|
||||
0.008343047226538839,0.08349130431035265,3.3672409057617188,5,0.5856597900390625,http://localhost:8002,1768821728,,False,1,70400fbe,2026-01-19_12-22-08,3.371093511581421,3.371093511581421,260039,sergio-XPS-15-9500,192.168.1.5,3.371093511581421,1,True,False,True,0.16292741177177594,0.4548418182130589,0.0,0.5302300590456391,70400fbe
|
||||
0.008664940340048574,0.08499154087821534,3.4183735847473145,5,0.5965535163879394,http://localhost:8002,1768821734,,False,1,4dcb599d,2026-01-19_12-22-14,3.4222280979156494,3.4222280979156494,260159,sergio-XPS-15-9500,192.168.1.5,3.4222280979156494,1,True,False,True,0.23726923927972388,0.4074643735298082,0.0,0.41001202937163644,4dcb599d
|
||||
0.04068873330092939,0.11438501946884572,3.257974624633789,5,0.5640182018280029,http://localhost:8002,1768821741,,False,1,4228b5e1,2026-01-19_12-22-21,3.261892557144165,3.261892557144165,260291,sergio-XPS-15-9500,192.168.1.5,3.261892557144165,1,True,False,False,0.12333092543339132,0.5239761637260665,0.0,0.5745717593014468,4228b5e1
|
||||
0.06275857947195311,0.12652527218853557,9.750442743301392,5,1.8625127792358398,http://localhost:8002,1768821754,,False,1,3588064b,2026-01-19_12-22-34,9.754103899002075,9.754103899002075,260400,sergio-XPS-15-9500,192.168.1.5,9.754103899002075,1,False,True,True,0.10034065797370648,0.34091325083457025,0.0,0.6394382232363077,3588064b
|
||||
0.040999537564886945,0.11588877886734197,3.285776138305664,5,0.5690357685089111,http://localhost:8002,1768821761,,False,1,11ccb158,2026-01-19_12-22-41,3.289609670639038,3.289609670639038,260569,sergio-XPS-15-9500,192.168.1.5,3.289609670639038,1,True,False,False,0.32864774599403973,0.14086017880721893,0.0,0.46819585706944256,11ccb158
|
||||
0.062252142887134154,0.11824393793048431,9.891753673553467,5,1.8906636714935303,http://localhost:8002,1768821774,,False,1,6fc2cbb9,2026-01-19_12-22-54,9.895762920379639,9.895762920379639,260704,sergio-XPS-15-9500,192.168.1.5,9.895762920379639,1,False,True,True,0.059161274748840434,0.21510105294599707,0.0,0.5189526304991655,6fc2cbb9
|
||||
0.035476033214537156,0.11817641701000778,3.285740613937378,5,0.5687613487243652,http://localhost:8002,1768821781,,False,1,d915205d,2026-01-19_12-23-01,3.289746046066284,3.289746046066284,260873,sergio-XPS-15-9500,192.168.1.5,3.289746046066284,1,True,False,False,0.4165672741815639,0.0010212040152359678,0.0,0.34076033139687656,d915205d
|
||||
0.0640894002629319,0.11483863284111936,9.806191444396973,5,1.8735287666320801,http://localhost:8002,1768821794,,False,1,2f6a0de8,2026-01-19_12-23-14,9.809982538223267,9.809982538223267,260993,sergio-XPS-15-9500,192.168.1.5,9.809982538223267,1,False,True,True,0.5305871352962446,0.5562291603129679,0.0,0.19677826870589865,2f6a0de8
|
||||
0.008734210036141653,0.08345578243479762,3.3932855129241943,5,0.590654993057251,http://localhost:8002,1768821801,,False,1,75a6f03e,2026-01-19_12-23-21,3.3974790573120117,3.3974790573120117,261182,sergio-XPS-15-9500,192.168.1.5,3.3974790573120117,1,True,False,True,0.17403705065527203,0.05196087574793615,0.0,0.37230135627667593,75a6f03e
|
||||
0.008024895940958,0.07962534018744696,3.372239828109741,5,0.586278247833252,http://localhost:8002,1768821807,,False,1,59bdf5af,2026-01-19_12-23-27,3.3761444091796875,3.3761444091796875,261290,sergio-XPS-15-9500,192.168.1.5,3.3761444091796875,1,True,False,True,0.0964007218643779,0.4285920164263687,0.0,0.5544150084923888,59bdf5af
|
||||
0.007884233436756935,0.07784781946255781,3.391608476638794,5,0.5895267486572265,http://localhost:8002,1768821814,,False,1,181fa700,2026-01-19_12-23-34,3.3955013751983643,3.3955013751983643,261408,sergio-XPS-15-9500,192.168.1.5,3.3955013751983643,1,True,False,True,0.04616218689941105,0.4861882831078568,0.0,0.5658024954699784,181fa700
|
||||
0.008187554044856696,0.07781229758700277,3.379288911819458,5,0.5891064167022705,http://localhost:8002,1768821821,,False,1,8df7daf7,2026-01-19_12-23-41,3.383202314376831,3.383202314376831,261523,sergio-XPS-15-9500,192.168.1.5,3.383202314376831,1,True,False,True,0.02800972164203512,0.4596234327116702,0.0,0.5894305118437192,8df7daf7
|
||||
0.0080286377688869,0.07962181735681341,3.3880317211151123,5,0.5899625778198242,http://localhost:8002,1768821828,,False,1,d427a211,2026-01-19_12-23-48,3.3918912410736084,3.3918912410736084,261651,sergio-XPS-15-9500,192.168.1.5,3.3918912410736084,1,True,False,True,0.060058513373542344,0.4968017369460056,0.0,0.4546749796342963,d427a211
|
||||
0.04089344159161516,0.11588877886734197,3.2276556491851807,5,0.5582141876220703,http://localhost:8002,1768821834,,False,1,c83e898d,2026-01-19_12-23-54,3.2317638397216797,3.2317638397216797,261771,sergio-XPS-15-9500,192.168.1.5,3.2317638397216797,1,False,False,False,0.12734972085227625,0.3933923240644007,0.0,0.6218152533645911,c83e898d
|
||||
0.07289971452610912,0.1312833201534554,8.918929815292358,5,1.6958380699157716,http://localhost:8002,1768821846,,False,1,34bfaecf,2026-01-19_12-24-06,8.923492193222046,8.923492193222046,261885,sergio-XPS-15-9500,192.168.1.5,8.923492193222046,1,True,True,True,0.02983245257805507,0.5541286918768669,0.0,0.5254000761733085,34bfaecf
|
||||
0.008664940340048574,0.08424318400960076,3.3413267135620117,5,0.5809893608093262,http://localhost:8002,1768821853,,False,1,d28ff6ad,2026-01-19_12-24-13,3.3452816009521484,3.3452816009521484,262045,sergio-XPS-15-9500,192.168.1.5,3.3452816009521484,1,True,False,True,0.15364693264219786,0.5914356505484054,0.0,0.4346147311057641,d28ff6ad
|
||||
0.00877838333494364,0.08577894245301848,3.4076058864593506,5,0.5933670043945313,http://localhost:8002,1768821860,,False,1,1bd5239a,2026-01-19_12-24-20,3.4112603664398193,3.4112603664398193,262180,sergio-XPS-15-9500,192.168.1.5,3.4112603664398193,1,True,False,True,0.22332206917987685,0.3526810869908701,0.0,0.5730079634012908,1bd5239a
|
||||
0.03369141887914488,0.11024529401954712,3.2711544036865234,5,0.5658481121063232,http://localhost:8002,1768821867,,False,1,df514085,2026-01-19_12-24-27,3.2749204635620117,3.2749204635620117,262288,sergio-XPS-15-9500,192.168.1.5,3.2749204635620117,1,True,False,False,0.07573375090561205,0.2490247970846971,0.0,0.39959759235219644,df514085
|
||||
0.0623615517224065,0.124505989182175,9.822217226028442,5,1.8769143104553223,http://localhost:8002,1768821880,,False,1,05146970,2026-01-19_12-24-40,9.826353549957275,9.826353549957275,262409,sergio-XPS-15-9500,192.168.1.5,9.826353549957275,1,False,True,True,0.01074645265207852,0.13367849913726723,0.0,0.6632577581918868,05146970
|
||||
0.008024895940958,0.07962534018744696,3.3825182914733887,5,0.5886817455291748,http://localhost:8002,1768821886,,False,1,b670fd4b,2026-01-19_12-24-46,3.3867027759552,3.3867027759552,262594,sergio-XPS-15-9500,192.168.1.5,3.3867027759552,1,True,False,True,0.09944138895292096,0.44624592238486255,0.0,0.5462963698223894,b670fd4b
|
||||
0.016572945800740084,0.09518707717328821,3.4130094051361084,5,0.5945035457611084,http://localhost:8002,1768821893,,False,1,be5f9b1d,2026-01-19_12-24-53,3.4169981479644775,3.4169981479644775,262711,sergio-XPS-15-9500,192.168.1.5,3.4169981479644775,1,True,False,True,0.6894923163644786,0.4890742911772068,0.0,0.4855884110840981,be5f9b1d
|
||||
0.008251781930748131,0.08198754491185642,3.367403745651245,5,0.5863472938537597,http://localhost:8002,1768821900,,False,1,1c75b89c,2026-01-19_12-25-00,3.371392011642456,3.371392011642456,262819,sergio-XPS-15-9500,192.168.1.5,3.371392011642456,1,True,False,True,0.1150745104873075,0.32762735447067737,0.0,0.5208070473970087,1c75b89c
|
||||
0.007922541795615114,0.07887346048819885,3.387901544570923,5,0.5906172752380371,http://localhost:8002,1768821907,,False,1,6340f2d6,2026-01-19_12-25-07,3.391674041748047,3.391674041748047,262936,sergio-XPS-15-9500,192.168.1.5,3.391674041748047,1,True,False,True,0.07997843641478165,0.4088133874043337,0.0,0.5627391657839758,6340f2d6
|
||||
0.007922541795615114,0.07887346048819885,3.368699312210083,5,0.585447120666504,http://localhost:8002,1768821913,,False,1,7ffe088b,2026-01-19_12-25-13,3.372554302215576,3.372554302215576,263058,sergio-XPS-15-9500,192.168.1.5,3.372554302215576,1,True,False,True,0.07055815208796122,0.07907086437131383,0.0,0.46815861739605075,7ffe088b
|
||||
0.007922541795615114,0.07887346048819885,3.376523733139038,5,0.5873369693756103,http://localhost:8002,1768821920,,False,1,f252a3e6,2026-01-19_12-25-20,3.3803553581237793,3.3803553581237793,263185,sergio-XPS-15-9500,192.168.1.5,3.3803553581237793,1,True,False,True,0.06870328017999491,0.03579995978472439,0.0,0.5047711345804472,f252a3e6
|
||||
0.02490382433538609,0.09753449830381603,3.3904788494110107,5,0.5890470027923584,http://localhost:8002,1768821927,,False,1,edee0586,2026-01-19_12-25-27,3.394632577896118,3.394632577896118,263300,sergio-XPS-15-9500,192.168.1.5,3.394632577896118,1,True,False,True,0.0009275348433581271,0.031063654135949786,0.0,0.45979693397354415,edee0586
|
||||
0.008414440034646826,0.07859969916180594,3.4424312114715576,5,0.5994386196136474,http://localhost:8002,1768821934,,False,1,ef76bf22,2026-01-19_12-25-34,3.446359395980835,3.446359395980835,263418,sergio-XPS-15-9500,192.168.1.5,3.446359395980835,1,True,False,True,0.03189500271483534,0.0016098696097210721,0.0,0.49583062638649,ef76bf22
|
||||
0.007922541795615114,0.07887346048819885,3.445734977722168,5,0.6011210918426514,http://localhost:8002,1768821941,,False,1,f647f452,2026-01-19_12-25-41,3.449845314025879,3.449845314025879,263537,sergio-XPS-15-9500,192.168.1.5,3.449845314025879,1,True,False,True,0.06868764014547389,0.08690693420543298,0.0,0.42607348522409366,f647f452
|
||||
0.007922541795615114,0.07887346048819885,3.4003381729125977,5,0.5931215763092041,http://localhost:8002,1768821947,,False,1,92f45b9b,2026-01-19_12-25-47,3.404212713241577,3.404212713241577,263672,sergio-XPS-15-9500,192.168.1.5,3.404212713241577,1,True,False,True,0.0725476612921705,0.08215869338356059,0.0,0.4170900315829183,92f45b9b
|
||||
0.007922541795615114,0.07887346048819885,3.3902156352996826,5,0.5895231246948243,http://localhost:8002,1768821954,,False,1,7349d65b,2026-01-19_12-25-54,3.3941099643707275,3.3941099643707275,263792,sergio-XPS-15-9500,192.168.1.5,3.3941099643707275,1,True,False,True,0.07327612908475345,0.09511260866628114,0.0,0.42047687042215837,7349d65b
|
||||
0.008631855890798765,0.08246566328471161,3.3953261375427246,5,0.5909849166870117,http://localhost:8002,1768821961,,False,1,dbe6de3f,2026-01-19_12-26-01,3.39920711517334,3.39920711517334,263908,sergio-XPS-15-9500,192.168.1.5,3.39920711517334,1,True,False,True,0.1407896872320316,0.07713209075208538,0.0,0.38134661262033054,dbe6de3f
|
||||
0.007922541795615114,0.07887346048819885,3.451122760772705,5,0.6020939826965332,http://localhost:8002,1768821968,,False,1,7d295e31,2026-01-19_12-26-08,3.4549307823181152,3.4549307823181152,264023,sergio-XPS-15-9500,192.168.1.5,3.4549307823181152,1,True,False,True,0.06788051560872134,0.03348309120485185,0.0,0.476817937122221,7d295e31
|
||||
|
74
src/run_tuning.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run hyperparameter tuning for OCR services."""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from raytune_ocr import (
|
||||
check_workers, create_trainable, run_tuner, analyze_results,
|
||||
paddle_ocr_payload, doctr_payload, easyocr_payload,
|
||||
PADDLE_OCR_SEARCH_SPACE, DOCTR_SEARCH_SPACE, EASYOCR_SEARCH_SPACE,
|
||||
PADDLE_OCR_CONFIG_KEYS, DOCTR_CONFIG_KEYS, EASYOCR_CONFIG_KEYS,
|
||||
)
|
||||
|
||||
SERVICES = {
|
||||
"paddle": {
|
||||
"ports": [8002],
|
||||
"payload_fn": paddle_ocr_payload,
|
||||
"search_space": PADDLE_OCR_SEARCH_SPACE,
|
||||
"config_keys": PADDLE_OCR_CONFIG_KEYS,
|
||||
"name": "PaddleOCR",
|
||||
},
|
||||
"doctr": {
|
||||
"ports": [8003],
|
||||
"payload_fn": doctr_payload,
|
||||
"search_space": DOCTR_SEARCH_SPACE,
|
||||
"config_keys": DOCTR_CONFIG_KEYS,
|
||||
"name": "DocTR",
|
||||
},
|
||||
"easyocr": {
|
||||
"ports": [8002],
|
||||
"payload_fn": easyocr_payload,
|
||||
"search_space": EASYOCR_SEARCH_SPACE,
|
||||
"config_keys": EASYOCR_CONFIG_KEYS,
|
||||
"name": "EasyOCR",
|
||||
},
|
||||
}
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run OCR hyperparameter tuning")
|
||||
parser.add_argument("--service", choices=["paddle", "doctr", "easyocr"], required=True)
|
||||
parser.add_argument("--samples", type=int, default=64, help="Number of samples")
|
||||
args = parser.parse_args()
|
||||
|
||||
cfg = SERVICES[args.service]
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Hyperparameter Tuning: {cfg['name']}")
|
||||
print(f"Samples: {args.samples}")
|
||||
print(f"{'='*50}\n")
|
||||
|
||||
# Check workers
|
||||
healthy = check_workers(cfg["ports"], cfg["name"])
|
||||
|
||||
# Create trainable and run tuning
|
||||
trainable = create_trainable(cfg["ports"], cfg["payload_fn"])
|
||||
results = run_tuner(
|
||||
trainable=trainable,
|
||||
search_space=cfg["search_space"],
|
||||
num_samples=args.samples,
|
||||
num_workers=len(healthy),
|
||||
)
|
||||
|
||||
# Analyze results
|
||||
df = analyze_results(
|
||||
results,
|
||||
output_folder="results",
|
||||
prefix=f"raytune_{args.service}",
|
||||
config_keys=cfg["config_keys"],
|
||||
)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print("Tuning complete!")
|
||||
print(f"{'='*50}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 44 KiB |
|
Before Width: | Height: | Size: 44 KiB |
@@ -21,27 +21,17 @@
|
||||
},
|
||||
{
|
||||
"file": "figura_5.png",
|
||||
"title": "Arquitectura de ejecución con subprocesos",
|
||||
"title": "Arquitectura de ejecución con Docker Compose",
|
||||
"index": 5
|
||||
},
|
||||
{
|
||||
"file": "figura_6.png",
|
||||
"title": "Arquitectura de ejecución con subprocesos",
|
||||
"title": "Impacto de textline_orientation en CER",
|
||||
"index": 6
|
||||
},
|
||||
{
|
||||
"file": "figura_7.png",
|
||||
"title": "Impacto de textline_orientation en CER",
|
||||
"title": "Reducción de errores: Baseline vs Optimizado",
|
||||
"index": 7
|
||||
},
|
||||
{
|
||||
"file": "figura_8.png",
|
||||
"title": "Comparación Baseline vs Optimizado (24 páginas)",
|
||||
"index": 8
|
||||
},
|
||||
{
|
||||
"file": "figura_9.png",
|
||||
"title": "Estructura del repositorio del proyecto",
|
||||
"index": 9
|
||||
}
|
||||
]
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<a:clrMap xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" bg1="lt1" tx1="dk1" bg2="lt2" tx2="dk2" accent1="accent1" accent2="accent2" accent3="accent3" accent4="accent4" accent5="accent5" accent6="accent6" hlink="hlink" folHlink="folHlink"/>
|
||||
@@ -1,15 +0,0 @@
|
||||
<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
||||
<o:MainFile HRef="../plantilla_individual.htm"/>
|
||||
<o:File HRef="item0013.xml"/>
|
||||
<o:File HRef="props014.xml"/>
|
||||
<o:File HRef="item0015.xml"/>
|
||||
<o:File HRef="props016.xml"/>
|
||||
<o:File HRef="item0017.xml"/>
|
||||
<o:File HRef="props018.xml"/>
|
||||
<o:File HRef="themedata.thmx"/>
|
||||
<o:File HRef="colorschememapping.xml"/>
|
||||
<o:File HRef="image001.png"/>
|
||||
<o:File HRef="image002.gif"/>
|
||||
<o:File HRef="header.htm"/>
|
||||
<o:File HRef="filelist.xml"/>
|
||||
</xml>
|
||||
|
Before Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 3.9 KiB |
|
Before Width: | Height: | Size: 3.9 KiB |
|
Before Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 13 KiB |
|
Before Width: | Height: | Size: 25 KiB |
@@ -1,258 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><ct:contentTypeSchema ct:_="" ma:_="" ma:contentTypeName="Documento" ma:contentTypeID="0x010100DF3D7C797EA12745A270EF30E38719B9" ma:contentTypeVersion="19" ma:contentTypeDescription="Crear nuevo documento." ma:contentTypeScope="" ma:versionID="227b02526234ef39b0b78895a9d90cf5" xmlns:ct="http://schemas.microsoft.com/office/2006/metadata/contentType" xmlns:ma="http://schemas.microsoft.com/office/2006/metadata/properties/metaAttributes">
|
||||
<xsd:schema targetNamespace="http://schemas.microsoft.com/office/2006/metadata/properties" ma:root="true" ma:fieldsID="3c939c8607e2f594db8bbb23634dd059" ns2:_="" ns3:_="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:p="http://schemas.microsoft.com/office/2006/metadata/properties" xmlns:ns2="0a70e875-3d35-4be2-921f-7117c31bab9b" xmlns:ns3="27c1adeb-3674-457c-b08c-8a73f31b6e23">
|
||||
<xsd:import namespace="0a70e875-3d35-4be2-921f-7117c31bab9b"/>
|
||||
<xsd:import namespace="27c1adeb-3674-457c-b08c-8a73f31b6e23"/>
|
||||
<xsd:element name="properties">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="documentManagement">
|
||||
<xsd:complexType>
|
||||
<xsd:all>
|
||||
<xsd:element ref="ns2:SharedWithUsers" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:SharedWithDetails" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceFastMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoTags" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceOCR" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceGenerationTime" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceEventHashCode" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceDateTaken" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaLengthInSeconds" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceLocation" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:lcf76f155ced4ddcb4097134ff3c332f" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:TaxCatchAll" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceSearchProperties" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:_Flow_SignoffStatus" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceObjectDetectorVersions" minOccurs="0"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="0a70e875-3d35-4be2-921f-7117c31bab9b" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="SharedWithUsers" ma:index="8" nillable="true" ma:displayName="Compartido con" ma:internalName="SharedWithUsers" ma:readOnly="true">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:UserMulti">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="UserInfo" minOccurs="0" maxOccurs="unbounded">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="DisplayName" type="xsd:string" minOccurs="0"/>
|
||||
<xsd:element name="AccountId" type="dms:UserId" minOccurs="0" nillable="true"/>
|
||||
<xsd:element name="AccountType" type="xsd:string" minOccurs="0"/>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="SharedWithDetails" ma:index="9" nillable="true" ma:displayName="Detalles de uso compartido" ma:internalName="SharedWithDetails" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="TaxCatchAll" ma:index="23" nillable="true" ma:displayName="Taxonomy Catch All Column" ma:hidden="true" ma:list="{c7f67346-78c9-4c4d-b954-8d350fdf60db}" ma:internalName="TaxCatchAll" ma:showField="CatchAllData" ma:web="0a70e875-3d35-4be2-921f-7117c31bab9b">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:MultiChoiceLookup">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="Value" type="dms:Lookup" maxOccurs="unbounded" minOccurs="0" nillable="true"/>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="27c1adeb-3674-457c-b08c-8a73f31b6e23" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="MediaServiceMetadata" ma:index="10" nillable="true" ma:displayName="MediaServiceMetadata" ma:hidden="true" ma:internalName="MediaServiceMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceFastMetadata" ma:index="11" nillable="true" ma:displayName="MediaServiceFastMetadata" ma:hidden="true" ma:internalName="MediaServiceFastMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoKeyPoints" ma:index="12" nillable="true" ma:displayName="MediaServiceAutoKeyPoints" ma:hidden="true" ma:internalName="MediaServiceAutoKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceKeyPoints" ma:index="13" nillable="true" ma:displayName="KeyPoints" ma:internalName="MediaServiceKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoTags" ma:index="14" nillable="true" ma:displayName="Tags" ma:internalName="MediaServiceAutoTags" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceOCR" ma:index="15" nillable="true" ma:displayName="Extracted Text" ma:internalName="MediaServiceOCR" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceGenerationTime" ma:index="16" nillable="true" ma:displayName="MediaServiceGenerationTime" ma:hidden="true" ma:internalName="MediaServiceGenerationTime" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceEventHashCode" ma:index="17" nillable="true" ma:displayName="MediaServiceEventHashCode" ma:hidden="true" ma:internalName="MediaServiceEventHashCode" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceDateTaken" ma:index="18" nillable="true" ma:displayName="MediaServiceDateTaken" ma:hidden="true" ma:internalName="MediaServiceDateTaken" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaLengthInSeconds" ma:index="19" nillable="true" ma:displayName="Length (seconds)" ma:internalName="MediaLengthInSeconds" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Unknown"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceLocation" ma:index="20" nillable="true" ma:displayName="Location" ma:internalName="MediaServiceLocation" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="lcf76f155ced4ddcb4097134ff3c332f" ma:index="22" nillable="true" ma:taxonomy="true" ma:internalName="lcf76f155ced4ddcb4097134ff3c332f" ma:taxonomyFieldName="MediaServiceImageTags" ma:displayName="Etiquetas de imagen" ma:readOnly="false" ma:fieldId="{5cf76f15-5ced-4ddc-b409-7134ff3c332f}" ma:taxonomyMulti="true" ma:sspId="17631b59-e624-4eb7-963c-219f14f887a3" ma:termSetId="09814cd3-568e-fe90-9814-8d621ff8fb84" ma:anchorId="fba54fb3-c3e1-fe81-a776-ca4b69148c4d" ma:open="true" ma:isKeyword="false">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element ref="pc:Terms" minOccurs="0" maxOccurs="1"></xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceSearchProperties" ma:index="24" nillable="true" ma:displayName="MediaServiceSearchProperties" ma:hidden="true" ma:internalName="MediaServiceSearchProperties" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="_Flow_SignoffStatus" ma:index="25" nillable="true" ma:displayName="Estado de aprobación" ma:internalName="Estado_x0020_de_x0020_aprobaci_x00f3_n">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceObjectDetectorVersions" ma:index="26" nillable="true" ma:displayName="MediaServiceObjectDetectorVersions" ma:description="" ma:hidden="true" ma:indexed="true" ma:internalName="MediaServiceObjectDetectorVersions" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all" xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:odoc="http://schemas.microsoft.com/internal/obd">
|
||||
<xsd:import namespace="http://purl.org/dc/elements/1.1/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd"/>
|
||||
<xsd:import namespace="http://purl.org/dc/terms/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd"/>
|
||||
<xsd:element name="coreProperties" type="CT_coreProperties"/>
|
||||
<xsd:complexType name="CT_coreProperties">
|
||||
<xsd:all>
|
||||
<xsd:element ref="dc:creator" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dcterms:created" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:identifier" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentType" minOccurs="0" maxOccurs="1" type="xsd:string" ma:index="0" ma:displayName="Tipo de contenido"/>
|
||||
<xsd:element ref="dc:title" minOccurs="0" maxOccurs="1" ma:index="4" ma:displayName="Título"/>
|
||||
<xsd:element ref="dc:subject" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:description" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="keywords" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dc:language" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="category" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="version" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="revision" minOccurs="0" maxOccurs="1" type="xsd:string">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
This value indicates the number of saves or revisions. The application is responsible for updating this value after each revision.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="lastModifiedBy" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dcterms:modified" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentStatus" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:schema>
|
||||
<xs:schema targetNamespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" elementFormDefault="qualified" attributeFormDefault="unqualified" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||
<xs:element name="Person">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:DisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountId" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountType" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="DisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountId" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountType" type="xs:string"></xs:element>
|
||||
<xs:element name="BDCAssociatedEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:BDCEntity" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
<xs:attribute ref="pc:EntityNamespace"></xs:attribute>
|
||||
<xs:attribute ref="pc:EntityName"></xs:attribute>
|
||||
<xs:attribute ref="pc:SystemInstanceName"></xs:attribute>
|
||||
<xs:attribute ref="pc:AssociationName"></xs:attribute>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:attribute name="EntityNamespace" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="EntityName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="SystemInstanceName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="AssociationName" type="xs:string"></xs:attribute>
|
||||
<xs:element name="BDCEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:EntityDisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityInstanceReference" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId1" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId2" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId3" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId4" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId5" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="EntityDisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityInstanceReference" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId1" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId2" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId3" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId4" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId5" type="xs:string"></xs:element>
|
||||
<xs:element name="Terms">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermInfo" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermInfo">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:TermId" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermName" type="xs:string"></xs:element>
|
||||
<xs:element name="TermId" type="xs:string"></xs:element>
|
||||
</xs:schema>
|
||||
</ct:contentTypeSchema>
|
||||
@@ -1 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"><b:Source><b:Tag>Dor81</b:Tag><b:SourceType>JournalArticle</b:SourceType><b:Guid>{D7C468B5-5E32-4254-9330-6DB2DDB01037}</b:Guid><b:Title>There's a S.M.A.R.T. way to write management's goals and objectives</b:Title><b:Year>1981</b:Year><b:Author><b:Author><b:NameList><b:Person><b:Last>Doran</b:Last><b:First>G.</b:First><b:Middle>T.</b:Middle></b:Person></b:NameList></b:Author></b:Author><b:JournalName>Management Review (AMA FORUM)</b:JournalName><b:Pages>35-36</b:Pages><b:Volume>70</b:Volume><b:RefOrder>1</b:RefOrder></b:Source></b:Sources>
|
||||
@@ -1 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><p:properties xmlns:p="http://schemas.microsoft.com/office/2006/metadata/properties" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"><documentManagement><lcf76f155ced4ddcb4097134ff3c332f xmlns="27c1adeb-3674-457c-b08c-8a73f31b6e23"><Terms xmlns="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"></Terms></lcf76f155ced4ddcb4097134ff3c332f><TaxCatchAll xmlns="0a70e875-3d35-4be2-921f-7117c31bab9b" xsi:nil="true"/><_Flow_SignoffStatus xmlns="27c1adeb-3674-457c-b08c-8a73f31b6e23" xsi:nil="true"/></documentManagement></p:properties>
|
||||
@@ -1 +0,0 @@
|
||||
<?mso-contentType?><FormTemplates xmlns="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms"><Display>DocumentLibraryForm</Display><Edit>DocumentLibraryForm</Edit><New>DocumentLibraryForm</New></FormTemplates>
|
||||
@@ -1,258 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><ct:contentTypeSchema ct:_="" ma:_="" ma:contentTypeName="Documento" ma:contentTypeID="0x010100DF3D7C797EA12745A270EF30E38719B9" ma:contentTypeVersion="19" ma:contentTypeDescription="Crear nuevo documento." ma:contentTypeScope="" ma:versionID="227b02526234ef39b0b78895a9d90cf5" xmlns:ct="http://schemas.microsoft.com/office/2006/metadata/contentType" xmlns:ma="http://schemas.microsoft.com/office/2006/metadata/properties/metaAttributes">
|
||||
<xsd:schema targetNamespace="http://schemas.microsoft.com/office/2006/metadata/properties" ma:root="true" ma:fieldsID="3c939c8607e2f594db8bbb23634dd059" ns2:_="" ns3:_="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:p="http://schemas.microsoft.com/office/2006/metadata/properties" xmlns:ns2="0a70e875-3d35-4be2-921f-7117c31bab9b" xmlns:ns3="27c1adeb-3674-457c-b08c-8a73f31b6e23">
|
||||
<xsd:import namespace="0a70e875-3d35-4be2-921f-7117c31bab9b"/>
|
||||
<xsd:import namespace="27c1adeb-3674-457c-b08c-8a73f31b6e23"/>
|
||||
<xsd:element name="properties">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="documentManagement">
|
||||
<xsd:complexType>
|
||||
<xsd:all>
|
||||
<xsd:element ref="ns2:SharedWithUsers" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:SharedWithDetails" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceFastMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoTags" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceOCR" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceGenerationTime" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceEventHashCode" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceDateTaken" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaLengthInSeconds" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceLocation" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:lcf76f155ced4ddcb4097134ff3c332f" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:TaxCatchAll" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceSearchProperties" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:_Flow_SignoffStatus" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceObjectDetectorVersions" minOccurs="0"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="0a70e875-3d35-4be2-921f-7117c31bab9b" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="SharedWithUsers" ma:index="8" nillable="true" ma:displayName="Compartido con" ma:internalName="SharedWithUsers" ma:readOnly="true">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:UserMulti">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="UserInfo" minOccurs="0" maxOccurs="unbounded">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="DisplayName" type="xsd:string" minOccurs="0"/>
|
||||
<xsd:element name="AccountId" type="dms:UserId" minOccurs="0" nillable="true"/>
|
||||
<xsd:element name="AccountType" type="xsd:string" minOccurs="0"/>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="SharedWithDetails" ma:index="9" nillable="true" ma:displayName="Detalles de uso compartido" ma:internalName="SharedWithDetails" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="TaxCatchAll" ma:index="23" nillable="true" ma:displayName="Taxonomy Catch All Column" ma:hidden="true" ma:list="{c7f67346-78c9-4c4d-b954-8d350fdf60db}" ma:internalName="TaxCatchAll" ma:showField="CatchAllData" ma:web="0a70e875-3d35-4be2-921f-7117c31bab9b">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:MultiChoiceLookup">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="Value" type="dms:Lookup" maxOccurs="unbounded" minOccurs="0" nillable="true"/>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="27c1adeb-3674-457c-b08c-8a73f31b6e23" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="MediaServiceMetadata" ma:index="10" nillable="true" ma:displayName="MediaServiceMetadata" ma:hidden="true" ma:internalName="MediaServiceMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceFastMetadata" ma:index="11" nillable="true" ma:displayName="MediaServiceFastMetadata" ma:hidden="true" ma:internalName="MediaServiceFastMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoKeyPoints" ma:index="12" nillable="true" ma:displayName="MediaServiceAutoKeyPoints" ma:hidden="true" ma:internalName="MediaServiceAutoKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceKeyPoints" ma:index="13" nillable="true" ma:displayName="KeyPoints" ma:internalName="MediaServiceKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoTags" ma:index="14" nillable="true" ma:displayName="Tags" ma:internalName="MediaServiceAutoTags" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceOCR" ma:index="15" nillable="true" ma:displayName="Extracted Text" ma:internalName="MediaServiceOCR" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceGenerationTime" ma:index="16" nillable="true" ma:displayName="MediaServiceGenerationTime" ma:hidden="true" ma:internalName="MediaServiceGenerationTime" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceEventHashCode" ma:index="17" nillable="true" ma:displayName="MediaServiceEventHashCode" ma:hidden="true" ma:internalName="MediaServiceEventHashCode" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceDateTaken" ma:index="18" nillable="true" ma:displayName="MediaServiceDateTaken" ma:hidden="true" ma:internalName="MediaServiceDateTaken" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaLengthInSeconds" ma:index="19" nillable="true" ma:displayName="Length (seconds)" ma:internalName="MediaLengthInSeconds" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Unknown"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceLocation" ma:index="20" nillable="true" ma:displayName="Location" ma:internalName="MediaServiceLocation" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="lcf76f155ced4ddcb4097134ff3c332f" ma:index="22" nillable="true" ma:taxonomy="true" ma:internalName="lcf76f155ced4ddcb4097134ff3c332f" ma:taxonomyFieldName="MediaServiceImageTags" ma:displayName="Etiquetas de imagen" ma:readOnly="false" ma:fieldId="{5cf76f15-5ced-4ddc-b409-7134ff3c332f}" ma:taxonomyMulti="true" ma:sspId="17631b59-e624-4eb7-963c-219f14f887a3" ma:termSetId="09814cd3-568e-fe90-9814-8d621ff8fb84" ma:anchorId="fba54fb3-c3e1-fe81-a776-ca4b69148c4d" ma:open="true" ma:isKeyword="false">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element ref="pc:Terms" minOccurs="0" maxOccurs="1"></xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceSearchProperties" ma:index="24" nillable="true" ma:displayName="MediaServiceSearchProperties" ma:hidden="true" ma:internalName="MediaServiceSearchProperties" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="_Flow_SignoffStatus" ma:index="25" nillable="true" ma:displayName="Estado de aprobación" ma:internalName="Estado_x0020_de_x0020_aprobaci_x00f3_n">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceObjectDetectorVersions" ma:index="26" nillable="true" ma:displayName="MediaServiceObjectDetectorVersions" ma:description="" ma:hidden="true" ma:indexed="true" ma:internalName="MediaServiceObjectDetectorVersions" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all" xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:odoc="http://schemas.microsoft.com/internal/obd">
|
||||
<xsd:import namespace="http://purl.org/dc/elements/1.1/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd"/>
|
||||
<xsd:import namespace="http://purl.org/dc/terms/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd"/>
|
||||
<xsd:element name="coreProperties" type="CT_coreProperties"/>
|
||||
<xsd:complexType name="CT_coreProperties">
|
||||
<xsd:all>
|
||||
<xsd:element ref="dc:creator" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dcterms:created" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:identifier" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentType" minOccurs="0" maxOccurs="1" type="xsd:string" ma:index="0" ma:displayName="Tipo de contenido"/>
|
||||
<xsd:element ref="dc:title" minOccurs="0" maxOccurs="1" ma:index="4" ma:displayName="Título"/>
|
||||
<xsd:element ref="dc:subject" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:description" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="keywords" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dc:language" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="category" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="version" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="revision" minOccurs="0" maxOccurs="1" type="xsd:string">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
This value indicates the number of saves or revisions. The application is responsible for updating this value after each revision.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="lastModifiedBy" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dcterms:modified" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentStatus" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:schema>
|
||||
<xs:schema targetNamespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" elementFormDefault="qualified" attributeFormDefault="unqualified" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||
<xs:element name="Person">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:DisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountId" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountType" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="DisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountId" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountType" type="xs:string"></xs:element>
|
||||
<xs:element name="BDCAssociatedEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:BDCEntity" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
<xs:attribute ref="pc:EntityNamespace"></xs:attribute>
|
||||
<xs:attribute ref="pc:EntityName"></xs:attribute>
|
||||
<xs:attribute ref="pc:SystemInstanceName"></xs:attribute>
|
||||
<xs:attribute ref="pc:AssociationName"></xs:attribute>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:attribute name="EntityNamespace" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="EntityName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="SystemInstanceName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="AssociationName" type="xs:string"></xs:attribute>
|
||||
<xs:element name="BDCEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:EntityDisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityInstanceReference" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId1" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId2" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId3" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId4" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId5" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="EntityDisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityInstanceReference" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId1" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId2" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId3" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId4" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId5" type="xs:string"></xs:element>
|
||||
<xs:element name="Terms">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermInfo" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermInfo">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:TermId" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermName" type="xs:string"></xs:element>
|
||||
<xs:element name="TermId" type="xs:string"></xs:element>
|
||||
</xs:schema>
|
||||
</ct:contentTypeSchema>
|
||||
@@ -1 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"><b:Source><b:Tag>Dor81</b:Tag><b:SourceType>JournalArticle</b:SourceType><b:Guid>{D7C468B5-5E32-4254-9330-6DB2DDB01037}</b:Guid><b:Title>There's a S.M.A.R.T. way to write management's goals and objectives</b:Title><b:Year>1981</b:Year><b:Author><b:Author><b:NameList><b:Person><b:Last>Doran</b:Last><b:First>G.</b:First><b:Middle>T.</b:Middle></b:Person></b:NameList></b:Author></b:Author><b:JournalName>Management Review (AMA FORUM)</b:JournalName><b:Pages>35-36</b:Pages><b:Volume>70</b:Volume><b:RefOrder>1</b:RefOrder></b:Source></b:Sources>
|
||||
@@ -1 +0,0 @@
|
||||
<?mso-contentType?><FormTemplates xmlns="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms"><Display>DocumentLibraryForm</Display><Edit>DocumentLibraryForm</Edit><New>DocumentLibraryForm</New></FormTemplates>
|
||||
@@ -1,258 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><ct:contentTypeSchema ct:_="" ma:_="" ma:contentTypeName="Documento" ma:contentTypeID="0x010100DF3D7C797EA12745A270EF30E38719B9" ma:contentTypeVersion="19" ma:contentTypeDescription="Crear nuevo documento." ma:contentTypeScope="" ma:versionID="227b02526234ef39b0b78895a9d90cf5" xmlns:ct="http://schemas.microsoft.com/office/2006/metadata/contentType" xmlns:ma="http://schemas.microsoft.com/office/2006/metadata/properties/metaAttributes">
|
||||
<xsd:schema targetNamespace="http://schemas.microsoft.com/office/2006/metadata/properties" ma:root="true" ma:fieldsID="3c939c8607e2f594db8bbb23634dd059" ns2:_="" ns3:_="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:p="http://schemas.microsoft.com/office/2006/metadata/properties" xmlns:ns2="0a70e875-3d35-4be2-921f-7117c31bab9b" xmlns:ns3="27c1adeb-3674-457c-b08c-8a73f31b6e23">
|
||||
<xsd:import namespace="0a70e875-3d35-4be2-921f-7117c31bab9b"/>
|
||||
<xsd:import namespace="27c1adeb-3674-457c-b08c-8a73f31b6e23"/>
|
||||
<xsd:element name="properties">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="documentManagement">
|
||||
<xsd:complexType>
|
||||
<xsd:all>
|
||||
<xsd:element ref="ns2:SharedWithUsers" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:SharedWithDetails" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceFastMetadata" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceKeyPoints" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceAutoTags" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceOCR" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceGenerationTime" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceEventHashCode" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceDateTaken" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaLengthInSeconds" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceLocation" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:lcf76f155ced4ddcb4097134ff3c332f" minOccurs="0"/>
|
||||
<xsd:element ref="ns2:TaxCatchAll" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceSearchProperties" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:_Flow_SignoffStatus" minOccurs="0"/>
|
||||
<xsd:element ref="ns3:MediaServiceObjectDetectorVersions" minOccurs="0"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="0a70e875-3d35-4be2-921f-7117c31bab9b" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="SharedWithUsers" ma:index="8" nillable="true" ma:displayName="Compartido con" ma:internalName="SharedWithUsers" ma:readOnly="true">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:UserMulti">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="UserInfo" minOccurs="0" maxOccurs="unbounded">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="DisplayName" type="xsd:string" minOccurs="0"/>
|
||||
<xsd:element name="AccountId" type="dms:UserId" minOccurs="0" nillable="true"/>
|
||||
<xsd:element name="AccountType" type="xsd:string" minOccurs="0"/>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="SharedWithDetails" ma:index="9" nillable="true" ma:displayName="Detalles de uso compartido" ma:internalName="SharedWithDetails" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="TaxCatchAll" ma:index="23" nillable="true" ma:displayName="Taxonomy Catch All Column" ma:hidden="true" ma:list="{c7f67346-78c9-4c4d-b954-8d350fdf60db}" ma:internalName="TaxCatchAll" ma:showField="CatchAllData" ma:web="0a70e875-3d35-4be2-921f-7117c31bab9b">
|
||||
<xsd:complexType>
|
||||
<xsd:complexContent>
|
||||
<xsd:extension base="dms:MultiChoiceLookup">
|
||||
<xsd:sequence>
|
||||
<xsd:element name="Value" type="dms:Lookup" maxOccurs="unbounded" minOccurs="0" nillable="true"/>
|
||||
</xsd:sequence>
|
||||
</xsd:extension>
|
||||
</xsd:complexContent>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="27c1adeb-3674-457c-b08c-8a73f31b6e23" elementFormDefault="qualified" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:dms="http://schemas.microsoft.com/office/2006/documentManagement/types" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls">
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/2006/documentManagement/types"/>
|
||||
<xsd:import namespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/>
|
||||
<xsd:element name="MediaServiceMetadata" ma:index="10" nillable="true" ma:displayName="MediaServiceMetadata" ma:hidden="true" ma:internalName="MediaServiceMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceFastMetadata" ma:index="11" nillable="true" ma:displayName="MediaServiceFastMetadata" ma:hidden="true" ma:internalName="MediaServiceFastMetadata" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoKeyPoints" ma:index="12" nillable="true" ma:displayName="MediaServiceAutoKeyPoints" ma:hidden="true" ma:internalName="MediaServiceAutoKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceKeyPoints" ma:index="13" nillable="true" ma:displayName="KeyPoints" ma:internalName="MediaServiceKeyPoints" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceAutoTags" ma:index="14" nillable="true" ma:displayName="Tags" ma:internalName="MediaServiceAutoTags" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceOCR" ma:index="15" nillable="true" ma:displayName="Extracted Text" ma:internalName="MediaServiceOCR" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note">
|
||||
<xsd:maxLength value="255"/>
|
||||
</xsd:restriction>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceGenerationTime" ma:index="16" nillable="true" ma:displayName="MediaServiceGenerationTime" ma:hidden="true" ma:internalName="MediaServiceGenerationTime" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceEventHashCode" ma:index="17" nillable="true" ma:displayName="MediaServiceEventHashCode" ma:hidden="true" ma:internalName="MediaServiceEventHashCode" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceDateTaken" ma:index="18" nillable="true" ma:displayName="MediaServiceDateTaken" ma:hidden="true" ma:internalName="MediaServiceDateTaken" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaLengthInSeconds" ma:index="19" nillable="true" ma:displayName="Length (seconds)" ma:internalName="MediaLengthInSeconds" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Unknown"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceLocation" ma:index="20" nillable="true" ma:displayName="Location" ma:internalName="MediaServiceLocation" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="lcf76f155ced4ddcb4097134ff3c332f" ma:index="22" nillable="true" ma:taxonomy="true" ma:internalName="lcf76f155ced4ddcb4097134ff3c332f" ma:taxonomyFieldName="MediaServiceImageTags" ma:displayName="Etiquetas de imagen" ma:readOnly="false" ma:fieldId="{5cf76f15-5ced-4ddc-b409-7134ff3c332f}" ma:taxonomyMulti="true" ma:sspId="17631b59-e624-4eb7-963c-219f14f887a3" ma:termSetId="09814cd3-568e-fe90-9814-8d621ff8fb84" ma:anchorId="fba54fb3-c3e1-fe81-a776-ca4b69148c4d" ma:open="true" ma:isKeyword="false">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element ref="pc:Terms" minOccurs="0" maxOccurs="1"></xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceSearchProperties" ma:index="24" nillable="true" ma:displayName="MediaServiceSearchProperties" ma:hidden="true" ma:internalName="MediaServiceSearchProperties" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Note"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="_Flow_SignoffStatus" ma:index="25" nillable="true" ma:displayName="Estado de aprobación" ma:internalName="Estado_x0020_de_x0020_aprobaci_x00f3_n">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
<xsd:element name="MediaServiceObjectDetectorVersions" ma:index="26" nillable="true" ma:displayName="MediaServiceObjectDetectorVersions" ma:description="" ma:hidden="true" ma:indexed="true" ma:internalName="MediaServiceObjectDetectorVersions" ma:readOnly="true">
|
||||
<xsd:simpleType>
|
||||
<xsd:restriction base="dms:Text"/>
|
||||
</xsd:simpleType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<xsd:schema targetNamespace="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" elementFormDefault="qualified" attributeFormDefault="unqualified" blockDefault="#all" xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:odoc="http://schemas.microsoft.com/internal/obd">
|
||||
<xsd:import namespace="http://purl.org/dc/elements/1.1/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd"/>
|
||||
<xsd:import namespace="http://purl.org/dc/terms/" schemaLocation="http://dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd"/>
|
||||
<xsd:element name="coreProperties" type="CT_coreProperties"/>
|
||||
<xsd:complexType name="CT_coreProperties">
|
||||
<xsd:all>
|
||||
<xsd:element ref="dc:creator" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dcterms:created" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:identifier" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentType" minOccurs="0" maxOccurs="1" type="xsd:string" ma:index="0" ma:displayName="Tipo de contenido"/>
|
||||
<xsd:element ref="dc:title" minOccurs="0" maxOccurs="1" ma:index="4" ma:displayName="Título"/>
|
||||
<xsd:element ref="dc:subject" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element ref="dc:description" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="keywords" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dc:language" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="category" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="version" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element name="revision" minOccurs="0" maxOccurs="1" type="xsd:string">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
This value indicates the number of saves or revisions. The application is responsible for updating this value after each revision.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="lastModifiedBy" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
<xsd:element ref="dcterms:modified" minOccurs="0" maxOccurs="1"/>
|
||||
<xsd:element name="contentStatus" minOccurs="0" maxOccurs="1" type="xsd:string"/>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
</xsd:schema>
|
||||
<xs:schema targetNamespace="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" elementFormDefault="qualified" attributeFormDefault="unqualified" xmlns:pc="http://schemas.microsoft.com/office/infopath/2007/PartnerControls" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||
<xs:element name="Person">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:DisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountId" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:AccountType" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="DisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountId" type="xs:string"></xs:element>
|
||||
<xs:element name="AccountType" type="xs:string"></xs:element>
|
||||
<xs:element name="BDCAssociatedEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:BDCEntity" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
<xs:attribute ref="pc:EntityNamespace"></xs:attribute>
|
||||
<xs:attribute ref="pc:EntityName"></xs:attribute>
|
||||
<xs:attribute ref="pc:SystemInstanceName"></xs:attribute>
|
||||
<xs:attribute ref="pc:AssociationName"></xs:attribute>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:attribute name="EntityNamespace" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="EntityName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="SystemInstanceName" type="xs:string"></xs:attribute>
|
||||
<xs:attribute name="AssociationName" type="xs:string"></xs:attribute>
|
||||
<xs:element name="BDCEntity">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:EntityDisplayName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityInstanceReference" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId1" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId2" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId3" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId4" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:EntityId5" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="EntityDisplayName" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityInstanceReference" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId1" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId2" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId3" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId4" type="xs:string"></xs:element>
|
||||
<xs:element name="EntityId5" type="xs:string"></xs:element>
|
||||
<xs:element name="Terms">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermInfo" minOccurs="0" maxOccurs="unbounded"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermInfo">
|
||||
<xs:complexType>
|
||||
<xs:sequence>
|
||||
<xs:element ref="pc:TermName" minOccurs="0"></xs:element>
|
||||
<xs:element ref="pc:TermId" minOccurs="0"></xs:element>
|
||||
</xs:sequence>
|
||||
</xs:complexType>
|
||||
</xs:element>
|
||||
<xs:element name="TermName" type="xs:string"></xs:element>
|
||||
<xs:element name="TermId" type="xs:string"></xs:element>
|
||||
</xs:schema>
|
||||
</ct:contentTypeSchema>
|
||||
@@ -1 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?><b:Sources SelectedStyle="\APASixthEditionOfficeOnline.xsl" StyleName="APA" Version="6" xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"><b:Source><b:Tag>Dor81</b:Tag><b:SourceType>JournalArticle</b:SourceType><b:Guid>{D7C468B5-5E32-4254-9330-6DB2DDB01037}</b:Guid><b:Title>There's a S.M.A.R.T. way to write management's goals and objectives</b:Title><b:Year>1981</b:Year><b:Author><b:Author><b:NameList><b:Person><b:Last>Doran</b:Last><b:First>G.</b:First><b:Middle>T.</b:Middle></b:Person></b:NameList></b:Author></b:Author><b:JournalName>Management Review (AMA FORUM)</b:JournalName><b:Pages>35-36</b:Pages><b:Volume>70</b:Volume><b:RefOrder>1</b:RefOrder></b:Source></b:Sources>
|
||||
@@ -1 +0,0 @@
|
||||
<?mso-contentType?><FormTemplates xmlns="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms"><Display>DocumentLibraryForm</Display><Edit>DocumentLibraryForm</Edit><New>DocumentLibraryForm</New></FormTemplates>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{B3A822E2-E694-47D5-9E22-DA4B12671ABB}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/contentType"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/properties/metaAttributes"/><ds:schemaRef ds:uri="http://www.w3.org/2001/XMLSchema"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/properties"/><ds:schemaRef ds:uri="0a70e875-3d35-4be2-921f-7117c31bab9b"/><ds:schemaRef ds:uri="27c1adeb-3674-457c-b08c-8a73f31b6e23"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/documentManagement/types"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/><ds:schemaRef ds:uri="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"/><ds:schemaRef ds:uri="http://purl.org/dc/elements/1.1/"/><ds:schemaRef ds:uri="http://purl.org/dc/terms/"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/internal/obd"/></ds:schemaRefs></ds:datastoreItem>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{3CBD5336-2C2D-4DA8-8EBD-C205328B54AF}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"/></ds:schemaRefs></ds:datastoreItem>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{DB456AF2-52F5-44D8-AEC6-B5F9D96C377E}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/properties"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/><ds:schemaRef ds:uri="27c1adeb-3674-457c-b08c-8a73f31b6e23"/><ds:schemaRef ds:uri="0a70e875-3d35-4be2-921f-7117c31bab9b"/></ds:schemaRefs></ds:datastoreItem>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{BE74C307-52FE-48C3-92C2-E1552852BAAA}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms"/></ds:schemaRefs></ds:datastoreItem>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{B3A822E2-E694-47D5-9E22-DA4B12671ABB}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/contentType"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/properties/metaAttributes"/><ds:schemaRef ds:uri="http://www.w3.org/2001/XMLSchema"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/metadata/properties"/><ds:schemaRef ds:uri="0a70e875-3d35-4be2-921f-7117c31bab9b"/><ds:schemaRef ds:uri="27c1adeb-3674-457c-b08c-8a73f31b6e23"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/2006/documentManagement/types"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/office/infopath/2007/PartnerControls"/><ds:schemaRef ds:uri="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"/><ds:schemaRef ds:uri="http://purl.org/dc/elements/1.1/"/><ds:schemaRef ds:uri="http://purl.org/dc/terms/"/><ds:schemaRef ds:uri="http://schemas.microsoft.com/internal/obd"/></ds:schemaRefs></ds:datastoreItem>
|
||||
@@ -1,2 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<ds:datastoreItem ds:itemID="{3CBD5336-2C2D-4DA8-8EBD-C205328B54AF}" xmlns:ds="http://schemas.openxmlformats.org/officeDocument/2006/customXml"><ds:schemaRefs><ds:schemaRef ds:uri="http://schemas.openxmlformats.org/officeDocument/2006/bibliography"/></ds:schemaRefs></ds:datastoreItem>
|
||||