diff --git a/.claudeignore b/.claudeignore new file mode 100644 index 0000000..b3d2cc0 --- /dev/null +++ b/.claudeignore @@ -0,0 +1,6 @@ +~$*.docx +results/ +__pycache__/ +dataset +results +.DS_Store diff --git a/.gitignore b/.gitignore index 100b6f6..686d80f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ ~$*.docx results/ -__pycache__/* +__pycache__/ +dataset +results +.DS_Store +.claude +node_modules diff --git a/README.md b/README.md index 805e5a6..ac8da34 100644 --- a/README.md +++ b/README.md @@ -1,53 +1,311 @@ -# Sistema OCR multimotor con IA para PDFs escaneados en español +# Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español -**Trabajo Fin de Máster (TFM) – Tipo 2: Desarrollo de Software** -**Líneas:** Percepción computacional · Aprendizaje automático -**Autor:** Sergio Jiménez Jiménez · **UNIR** · **Año:** 2025 +**Trabajo Fin de Máster (TFM) – Máster Universitario en Inteligencia Artificial** +**Líneas:** Percepción computacional · Aprendizaje automático +**Autor:** Sergio Jiménez Jiménez · **UNIR** · **Año:** 2025 -> Extracción de texto desde **PDFs escaneados** en **español** mediante **motores OCR basados en IA** (EasyOCR · PaddleOCR · DocTR). -> Se excluyen soluciones clásicas como **Tesseract** o propietarias como **ABBYY**, centrando el proyecto en modelos neuronales modernos. +> Optimización sistemática de hiperparámetros de **PaddleOCR (PP-OCRv5)** mediante **Ray Tune** con **Optuna** para mejorar el reconocimiento óptico de caracteres en documentos académicos en español. --- -## 🧭 Objetivo +## Objetivo -Desarrollar y evaluar un **sistema OCR multimotor** capaz de: -- Procesar PDFs escaneados extremo a extremo (**PDF → Imagen → Preprocesado → OCR → Evaluación**). -- **Reducir el CER al menos un 15 %** respecto a una línea base neuronal (EasyOCR). -- Mantener **tiempos por página** adecuados y un pipeline **modular y reproducible**. +Optimizar el rendimiento de PaddleOCR para documentos académicos en español mediante ajuste de hiperparámetros, alcanzando un **CER inferior al 2%** sin requerir fine-tuning del modelo ni recursos GPU dedicados. -**Métricas principales:** -- **CER** (*Character Error Rate*) -- **WER** (*Word Error Rate*) -- **Latencia por página* +**Resultado alcanzado:** CER = **1.49%** (objetivo cumplido) --- -## 🧩 Alcance y diseño +## Resultados Principales -- **Idioma:** español (texto impreso, no manuscrito). -- **Entrada:** PDFs escaneados con calidad variable, ruido o inclinación. -- **Motores evaluados:** - - **EasyOCR** – baseline neuronal ligera. - - **PaddleOCR (PP-OCR)** – referencia industrial multilingüe. - - **DocTR (Mindee)** – arquitectura PyTorch modular con salida estructurada. -- **Evaluación:** CER, WER y latencia promedio por página. +| Modelo | CER | Precisión Caracteres | WER | Precisión Palabras | +|--------|-----|---------------------|-----|-------------------| +| PaddleOCR (Baseline) | 7.78% | 92.22% | 14.94% | 85.06% | +| **PaddleOCR-HyperAdjust** | **1.49%** | **98.51%** | **7.62%** | **92.38%** | ---- +**Mejora obtenida:** Reducción del CER en un **80.9%** -## 🏗️ Arquitectura del sistema +### Configuración Óptima Encontrada -```text -PDF (escaneado) - └─► Conversión a imagen (PyMuPDF / pdf2image) - └─► Preprocesado (OpenCV) - └─► OCR (EasyOCR | PaddleOCR | DocTR) - └─► Evaluación (CER · WER · latencia) +```python +config_optimizada = { + "textline_orientation": True, # CRÍTICO - reduce CER ~70% + "use_doc_orientation_classify": False, + "use_doc_unwarping": False, + "text_det_thresh": 0.4690, # Correlación -0.52 con CER + "text_det_box_thresh": 0.5412, + "text_det_unclip_ratio": 0.0, + "text_rec_score_thresh": 0.6350, +} ``` -## 🔜 Próximos pasos +--- -1. Ajustar parámetros y arquitecturas en DocTR (detector y reconocedor). -2. Añadir métricas de latencia. -3. Incorporar postprocesamiento lingüístico (corrección ortográfica). -4. Explorar TrOCR o MMOCR como comparación avanzada en la segunda fase. +## Metodología + +### Pipeline de Trabajo + +``` +PDF (académico UNIR) + └─► Conversión a imagen (PyMuPDF, 300 DPI) + └─► Extracción de ground truth + └─► OCR con PaddleOCR (PP-OCRv5) + └─► Evaluación (CER, WER con jiwer) + └─► Optimización (Ray Tune + Optuna) +``` + +### Experimento de Optimización + +| Parámetro | Valor | +|-----------|-------| +| Número de trials | 64 | +| Algoritmo de búsqueda | OptunaSearch (TPE) | +| Métrica objetivo | CER (minimizar) | +| Trials concurrentes | 2 | +| Tiempo total | ~6 horas (CPU) | + +--- + +## Estructura del Repositorio + +``` +MastersThesis/ +├── docs/ # Capítulos del TFM en Markdown (estructura UNIR) +│ ├── 00_resumen.md # Resumen + Abstract + Keywords +│ ├── 01_introduccion.md # Cap. 1: Introducción (1.1-1.3) +│ ├── 02_contexto_estado_arte.md # Cap. 2: Contexto y estado del arte (2.1-2.3) +│ ├── 03_objetivos_metodologia.md # Cap. 3: Objetivos y metodología (3.1-3.4) +│ ├── 04_desarrollo_especifico.md # Cap. 4: Desarrollo específico (4.1-4.3) +│ ├── 05_conclusiones_trabajo_futuro.md # Cap. 5: Conclusiones (5.1-5.2) +│ ├── 06_referencias_bibliograficas.md # Referencias bibliográficas (APA) +│ └── 07_anexo_a.md # Anexo A: Código fuente y datos +├── thesis_output/ # Documento final generado +│ ├── plantilla_individual.htm # TFM completo (abrir en Word) +│ └── figures/ # Figuras generadas desde Mermaid +│ ├── figura_1.png ... figura_7.png +│ └── figures_manifest.json +├── src/ +│ ├── paddle_ocr_fine_tune_unir_raytune.ipynb # Experimento principal +│ ├── paddle_ocr_tuning.py # Script de evaluación CLI +│ ├── dataset_manager.py # Clase ImageTextDataset +│ ├── prepare_dataset.ipynb # Preparación del dataset +│ └── raytune_paddle_subproc_results_*.csv # Resultados de 64 trials +├── results/ # Resultados de benchmarks +├── instructions/ # Plantilla e instrucciones UNIR +│ ├── instrucciones.pdf +│ ├── plantilla_individual.pdf +│ └── plantilla_individual.htm +├── apply_content.py # Genera documento TFM desde docs/ + plantilla +├── generate_mermaid_figures.py # Convierte diagramas Mermaid a PNG +├── ocr_benchmark_notebook.ipynb # Benchmark comparativo inicial +└── README.md +``` + +--- + +## Hallazgos Clave + +1. **`textline_orientation=True` es crítico**: Reduce el CER en un 69.7%. Para documentos con layouts mixtos (tablas, encabezados), la clasificación de orientación de línea es esencial. + +2. **Umbral `text_det_thresh` importante**: Correlación -0.52 con CER. Valores óptimos entre 0.4-0.5. Valores < 0.1 causan fallos catastróficos (CER >40%). + +3. **Componentes innecesarios para PDFs digitales**: `use_doc_orientation_classify` y `use_doc_unwarping` no mejoran el rendimiento en documentos académicos digitales. + +--- + +## Requisitos + +| Componente | Versión | +|------------|---------| +| Python | 3.11.9 | +| PaddlePaddle | 3.2.2 | +| PaddleOCR | 3.3.2 | +| Ray | 2.52.1 | +| Optuna | 4.6.0 | +| jiwer | (para métricas CER/WER) | +| PyMuPDF | (para conversión PDF) | + +--- + +## Uso + +### Preparar dataset +```bash +# Ejecutar prepare_dataset.ipynb para convertir PDF a imágenes y extraer ground truth +jupyter notebook src/prepare_dataset.ipynb +``` + +### Ejecutar optimización +```bash +# Ejecutar el notebook principal de Ray Tune +jupyter notebook src/paddle_ocr_fine_tune_unir_raytune.ipynb +``` + +### Evaluación individual +```bash +python src/paddle_ocr_tuning.py \ + --pdf-folder ./dataset \ + --textline-orientation True \ + --text-det-thresh 0.469 \ + --text-det-box-thresh 0.541 \ + --text-rec-score-thresh 0.635 +``` + +--- + +## Fuentes de Datos + +- **Dataset**: Instrucciones para la elaboración del TFE (UNIR), 24 páginas +- **Resultados Ray Tune (PRINCIPAL)**: `src/raytune_paddle_subproc_results_20251207_192320.csv` - 64 trials de optimización con todas las métricas y configuraciones + +--- + +## Generación del Documento TFM + +### Prerrequisitos + +```bash +# Instalar dependencias de Python +pip install beautifulsoup4 + +# Instalar mermaid-cli para generación de figuras +npm install @mermaid-js/mermaid-cli +``` + +### Flujo de Generación del Documento + +El documento TFM se genera en **3 pasos** que deben ejecutarse en orden: + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ PASO 1: generate_mermaid_figures.py │ +│ ────────────────────────────────────────────────────────────────── │ +│ • Lee diagramas Mermaid de docs/*.md │ +│ • Genera thesis_output/figures/figura_*.png │ +│ • Crea figures_manifest.json con títulos │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ PASO 2: apply_content.py │ +│ ────────────────────────────────────────────────────────────────── │ +│ • Lee plantilla desde instructions/plantilla_individual.htm │ +│ • Inserta contenido de docs/*.md en cada capítulo │ +│ • Genera tablas con formato APA y figuras con referencias │ +│ • Guarda en thesis_output/plantilla_individual.htm │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ PASO 3: Abrir en Microsoft Word │ +│ ────────────────────────────────────────────────────────────────── │ +│ • Abrir thesis_output/plantilla_individual.htm │ +│ • Ctrl+A → F9 para actualizar índices (contenidos/figuras/tablas) │ +│ • Guardar como TFM_Sergio_Jimenez.docx │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Comandos de Generación + +```bash +# Desde el directorio raíz del proyecto: + +# PASO 1: Generar figuras PNG desde diagramas Mermaid +python3 generate_mermaid_figures.py +# Output: thesis_output/figures/figura_1.png ... figura_8.png + +# PASO 2: Aplicar contenido de docs/ a la plantilla UNIR +python3 apply_content.py +# Output: thesis_output/plantilla_individual.htm + +# PASO 3: Abrir en Word y finalizar documento +# - Abrir thesis_output/plantilla_individual.htm en Microsoft Word +# - Ctrl+A → F9 para actualizar todos los índices +# - IMPORTANTE: Ajustar manualmente el tamaño de las imágenes para legibilidad +# (seleccionar imagen → clic derecho → Tamaño y posición → ajustar al ancho de página) +# - Guardar como .docx +``` + +### Notas Importantes para Edición en Word + +1. **Ajuste de imágenes**: Las figuras Mermaid pueden requerir ajuste manual de tamaño para ser legibles. Seleccionar cada imagen y ajustar al ancho de texto (~16cm). + +2. **Actualización de índices**: Después de cualquier cambio, usar Ctrl+A → F9 para regenerar índices. + +3. **Formato de código**: Los bloques de código usan Consolas 9pt. Verificar que no se corten líneas largas. + +### Archivos de Entrada y Salida + +| Script | Entrada | Salida | +|--------|---------|--------| +| `generate_mermaid_figures.py` | `docs/*.md` (bloques ```mermaid```) | `thesis_output/figures/figura_*.png`, `figures_manifest.json` | +| `apply_content.py` | `instructions/plantilla_individual.htm`, `docs/*.md`, `thesis_output/figures/*.png` | `thesis_output/plantilla_individual.htm` | + +### Contenido Generado Automáticamente + +- **30 tablas** con formato APA (Tabla X. *Título* + Fuente: ...) +- **8 figuras** desde Mermaid (Figura X. *Título* + Fuente: Elaboración propia) +- **25 referencias** en formato APA con sangría francesa +- **Resumen/Abstract** con palabras clave +- **Índices** actualizables (contenidos, figuras, tablas) +- Eliminación automática de textos de instrucción de la plantilla + +--- + +## Trabajo Pendiente para Completar el TFM + +### Contexto: Limitaciones de Hardware + +Este trabajo adoptó la estrategia de **optimización de hiperparámetros** en lugar de **fine-tuning** debido a: +- **Sin GPU dedicada**: Ejecución exclusivamente en CPU +- **Tiempo de inferencia elevado**: ~69 segundos/página en CPU +- **Fine-tuning inviable**: Entrenar modelos de deep learning sin GPU requeriría tiempos prohibitivos + +La optimización de hiperparámetros demostró ser una **alternativa efectiva** al fine-tuning, logrando una reducción del 80.9% en el CER sin reentrenar el modelo. + +### Tareas Completadas + +- [x] **Estructura docs/ según plantilla UNIR**: Todos los capítulos siguen numeración exacta (1.1, 1.2, etc.) +- [x] **Añadir diagramas Mermaid**: 7 diagramas añadidos (pipeline OCR, arquitectura Ray Tune, gráficos de comparación) +- [x] **Generar documento TFM unificado**: Script `apply_content.py` genera documento completo desde docs/ +- [x] **Convertir Mermaid a PNG**: Script `generate_mermaid_figures.py` genera figuras automáticamente + +### Tareas Pendientes + +#### 1. Validación del Enfoque (Prioridad Alta) +- [ ] **Validación cruzada en otros documentos**: Evaluar la configuración óptima en otros tipos de documentos en español (facturas, formularios, contratos) para verificar generalización +- [ ] **Ampliar el dataset**: El dataset actual tiene solo 24 páginas. Construir un corpus más amplio y diverso (mínimo 100 páginas) +- [ ] **Validación del ground truth**: Revisar manualmente el texto de referencia extraído automáticamente para asegurar su exactitud + +#### 2. Experimentación Adicional (Prioridad Media) +- [ ] **Explorar `text_det_unclip_ratio`**: Este parámetro quedó fijado en 0.0. Incluirlo en el espacio de búsqueda podría mejorar resultados +- [ ] **Comparativa con fine-tuning** (si se obtiene acceso a GPU): Cuantificar la brecha de rendimiento entre optimización de hiperparámetros y fine-tuning real +- [ ] **Evaluación con GPU**: Medir tiempos de inferencia con aceleración GPU para escenarios de producción + +#### 3. Documentación y Presentación (Prioridad Alta) +- [ ] **Crear presentación**: Preparar slides para la defensa del TFM +- [ ] **Revisión final del documento**: Verificar formato, índices y contenido en Word + +#### 4. Extensiones Futuras (Opcional) +- [ ] **Herramienta de configuración automática**: Desarrollar una herramienta que determine automáticamente la configuración óptima para un nuevo tipo de documento +- [ ] **Benchmark público para español**: Publicar un benchmark de OCR para documentos en español que facilite comparación de soluciones +- [ ] **Optimización multi-objetivo**: Considerar CER, WER y tiempo de inferencia simultáneamente + +### Recomendación de Próximos Pasos + +1. **Inmediato**: Abrir documento generado en Word, actualizar índices (Ctrl+A, F9), guardar como .docx +2. **Corto plazo**: Validar en 2-3 tipos de documentos adicionales para demostrar generalización +3. **Para la defensa**: Crear presentación con visualizaciones de resultados + +--- + +## Licencia + +Este proyecto es parte de un Trabajo Fin de Máster académico. + +--- + +## Referencias + +- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) +- [Ray Tune](https://docs.ray.io/en/latest/tune/index.html) +- [Optuna](https://optuna.org/) +- [jiwer](https://github.com/jitsi/jiwer) diff --git a/TFM_Sergio_Jimenez_OCR.docx b/TFM_Sergio_Jimenez_OCR.docx new file mode 100644 index 0000000..fdb3bda Binary files /dev/null and b/TFM_Sergio_Jimenez_OCR.docx differ diff --git a/TFM_Sergio_Jimenez_OCR.pdf b/TFM_Sergio_Jimenez_OCR.pdf new file mode 100644 index 0000000..2a0f2ff Binary files /dev/null and b/TFM_Sergio_Jimenez_OCR.pdf differ diff --git a/apply_content.py b/apply_content.py new file mode 100644 index 0000000..367e92c --- /dev/null +++ b/apply_content.py @@ -0,0 +1,609 @@ +#!/usr/bin/env python3 +"""Replace template content with thesis content from docs/ folder using BeautifulSoup.""" + +import re +import os +from bs4 import BeautifulSoup, NavigableString + +BASE_DIR = '/Users/sergio/Desktop/MastersThesis' +TEMPLATE = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm') +DOCS_DIR = os.path.join(BASE_DIR, 'docs') + +# Global counters for tables and figures +table_counter = 0 +figure_counter = 0 + +def read_file(path): + try: + with open(path, 'r', encoding='utf-8') as f: + return f.read() + except UnicodeDecodeError: + with open(path, 'r', encoding='latin-1') as f: + return f.read() + +def write_file(path, content): + with open(path, 'w', encoding='utf-8') as f: + f.write(content) + +def md_to_html_para(text): + """Convert markdown inline formatting to HTML.""" + # Bold + text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) + # Italic + text = re.sub(r'\*([^*]+)\*', r'\1', text) + # Inline code + text = re.sub(r'`([^`]+)`', r'\1', text) + return text + +def extract_table_title(lines, current_index): + """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*).""" + # Check previous non-empty lines for table title + for i in range(current_index - 1, max(0, current_index - 5), -1): + line = lines[i].strip() + if line.startswith('**Tabla') or line.startswith('*Tabla'): + return line + if line and not line.startswith('|'): + break + return None + +def extract_figure_title_from_mermaid(lines, current_index): + """Extract title from mermaid diagram or preceding text.""" + # Look for title in mermaid content + for i in range(current_index + 1, min(len(lines), current_index + 20)): + line = lines[i].strip() + if line.startswith('```'): + break + if 'title' in line.lower(): + # Extract title from: title "Some Title" + match = re.search(r'title\s+["\']([^"\']+)["\']', line) + if match: + return match.group(1) + + # Check preceding lines for figure reference + for i in range(current_index - 1, max(0, current_index - 3), -1): + line = lines[i].strip() + if line.startswith('**Figura') or 'Figura' in line: + return line + + return None + +def parse_md_to_html_blocks(md_content): + """Convert markdown content to HTML blocks with template styles.""" + global table_counter, figure_counter + + html_blocks = [] + lines = md_content.split('\n') + i = 0 + + while i < len(lines): + line = lines[i] + + # Skip empty lines + if not line.strip(): + i += 1 + continue + + # Mermaid diagram - convert to figure with actual image + if line.strip().startswith('```mermaid'): + figure_counter += 1 + mermaid_lines = [] + i += 1 + while i < len(lines) and not lines[i].strip() == '```': + mermaid_lines.append(lines[i]) + i += 1 + + # Try to extract title from mermaid content (YAML format: title: "...") + mermaid_content = '\n'.join(mermaid_lines) + # Match YAML format: title: "Title" or title: 'Title' + title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_content) + if not title_match: + # Fallback to non-YAML format: title "Title" + title_match = re.search(r'title\s+["\']?([^"\'"\n]+)["\']?', mermaid_content) + if title_match: + fig_title = title_match.group(1).strip() + else: + fig_title = f"Diagrama {figure_counter}" + + # Check if the generated PNG exists + fig_file = f'figures/figura_{figure_counter}.png' + fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file) + + # Create figure with MsoCaption class and proper Word SEQ field for cross-reference + # Format: "Figura X." in bold, title in italic (per UNIR guidelines) + # Word TOC looks for text with Caption style - anchor must be outside main caption text + bookmark_id = f"_Ref_Fig{figure_counter}" + html_blocks.append(f'''
Figura {figure_counter}. {fig_title}
''') + + if os.path.exists(fig_path): + # Use Word-compatible width in cm (A4 text area is ~16cm wide, use ~12cm max) + html_blocks.append(f'''[Insertar diagrama Mermaid aquí]
''') + + html_blocks.append(f'''Fuente: Elaboración propia.
''') + html_blocks.append('{code}
Tabla {table_counter}. {clean_title}
''') + + # Build table HTML with APA style (horizontal lines only, no vertical) + table_html = '{md_to_html_para(cell)} | '
+ elif j == len(table_lines) - 1:
+ # Last row: bottom border only
+ table_html += f'{md_to_html_para(cell)} | '
+ else:
+ # Middle rows: no borders
+ table_html += f'{md_to_html_para(cell)} | '
+ table_html += '
Fuente: {table_source}.
') + html_blocks.append('{md_to_html_para(quote_text)}
') + continue + + # Bullet list + if re.match(r'^[\-\*\+]\s', line): + while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]): + item_text = lines[i][2:].strip() + html_blocks.append(f'· {md_to_html_para(item_text)}
') + i += 1 + continue + + # Numbered list + if re.match(r'^\d+\.\s', line): + num = 1 + while i < len(lines) and re.match(r'^\d+\.\s', lines[i]): + item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip() + html_blocks.append(f'{num}. {md_to_html_para(item_text)}
') + num += 1 + i += 1 + continue + + # Skip lines that are just table/figure titles (they'll be handled with the table/figure) + if line.strip().startswith('**Tabla') or line.strip().startswith('*Tabla'): + i += 1 + continue + if line.strip().startswith('**Figura') or line.strip().startswith('*Figura'): + i += 1 + continue + if line.strip().startswith('*Fuente:') or line.strip().startswith('Fuente:'): + i += 1 + continue + + # Regular paragraph + para_lines = [line] + i += 1 + while i < len(lines) and lines[i].strip() and not lines[i].startswith('#') and not lines[i].startswith('```') and not lines[i].startswith('>') and not re.match(r'^[\-\*\+]\s', lines[i]) and not re.match(r'^\d+\.\s', lines[i]) and '|' not in lines[i]: + para_lines.append(lines[i]) + i += 1 + + para_text = ' '.join(para_lines) + html_blocks.append(f'{md_to_html_para(para_text)}
') + + return '\n\n'.join(html_blocks) + +def extract_section_content(md_content): + """Extract content from markdown, skipping the first # header.""" + md_content = re.sub(r'^#\s+[^\n]+\n+', '', md_content, count=1) + return parse_md_to_html_blocks(md_content) + +def find_section_element(soup, keyword): + """Find element containing keyword (h1 or special paragraph classes).""" + # First try h1 + for h1 in soup.find_all('h1'): + text = h1.get_text() + if keyword.lower() in text.lower(): + return h1 + + # Try special paragraph classes for unnumbered sections + for p in soup.find_all('p', class_=['Ttulo1sinnumerar', 'Anexo', 'MsoNormal']): + text = p.get_text() + if keyword.lower() in text.lower(): + classes = p.get('class', []) + if 'Ttulo1sinnumerar' in classes or 'Anexo' in classes: + return p + if re.match(r'^\d+\.?\s', text.strip()): + return p + return None + +def remove_elements_between(start_elem, end_elem): + """Remove all elements between start and end (exclusive).""" + current = start_elem.next_sibling + elements_to_remove = [] + while current and current != end_elem: + elements_to_remove.append(current) + current = current.next_sibling + for elem in elements_to_remove: + if hasattr(elem, 'decompose'): + elem.decompose() + elif isinstance(elem, NavigableString): + elem.extract() + +def format_references(refs_content): + """Format references with proper MsoBibliography style.""" + refs_content = refs_content.replace('# Referencias bibliográficas {.unnumbered}', '').strip() + refs_html = '' + + for line in refs_content.split('\n\n'): + line = line.strip() + if not line: + continue + + # Apply markdown formatting + formatted = md_to_html_para(line) + + # Use MsoBibliography style with hanging indent (36pt indent, -36pt text-indent) + refs_html += f'''{formatted}
\n''' + + return refs_html + +def extract_resumen_parts(resumen_content): + """Extract Spanish resumen and English abstract from 00_resumen.md""" + parts = resumen_content.split('---') + + spanish_part = parts[0] if len(parts) > 0 else '' + english_part = parts[1] if len(parts) > 1 else '' + + # Extract Spanish content + spanish_text = '' + spanish_keywords = '' + if '**Palabras clave:**' in spanish_part: + text_part, kw_part = spanish_part.split('**Palabras clave:**') + spanish_text = text_part.replace('# Resumen', '').strip() + spanish_keywords = kw_part.strip() + else: + spanish_text = spanish_part.replace('# Resumen', '').strip() + + # Extract English content + english_text = '' + english_keywords = '' + if '**Keywords:**' in english_part: + text_part, kw_part = english_part.split('**Keywords:**') + english_text = text_part.replace('# Abstract', '').strip() + english_keywords = kw_part.strip() + else: + english_text = english_part.replace('# Abstract', '').strip() + + return spanish_text, spanish_keywords, english_text, english_keywords + +def main(): + global table_counter, figure_counter + + print("Reading template...") + html_content = read_file(TEMPLATE) + soup = BeautifulSoup(html_content, 'html.parser') + + print("Reading docs content...") + docs = { + 'resumen': read_file(os.path.join(DOCS_DIR, '00_resumen.md')), + 'intro': read_file(os.path.join(DOCS_DIR, '01_introduccion.md')), + 'contexto': read_file(os.path.join(DOCS_DIR, '02_contexto_estado_arte.md')), + 'objetivos': read_file(os.path.join(DOCS_DIR, '03_objetivos_metodologia.md')), + 'desarrollo': read_file(os.path.join(DOCS_DIR, '04_desarrollo_especifico.md')), + 'conclusiones': read_file(os.path.join(DOCS_DIR, '05_conclusiones_trabajo_futuro.md')), + 'referencias': read_file(os.path.join(DOCS_DIR, '06_referencias_bibliograficas.md')), + 'anexo': read_file(os.path.join(DOCS_DIR, '07_anexo_a.md')), + } + + # Extract resumen and abstract + spanish_text, spanish_kw, english_text, english_kw = extract_resumen_parts(docs['resumen']) + + # Replace title + print("Replacing title...") + for elem in soup.find_all(string=re.compile(r'Título del TFE', re.IGNORECASE)): + elem.replace_with(elem.replace('Título del TFE', 'Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español')) + + # Replace Resumen section + print("Replacing Resumen...") + resumen_title = soup.find('p', class_='Ttulondices', string=re.compile(r'Resumen')) + if resumen_title: + # Find and replace content after Resumen title until Abstract + current = resumen_title.find_next_sibling() + elements_to_remove = [] + while current: + text = current.get_text() if hasattr(current, 'get_text') else str(current) + if 'Abstract' in text and current.name == 'p' and 'Ttulondices' in str(current.get('class', [])): + break + elements_to_remove.append(current) + current = current.find_next_sibling() + + for elem in elements_to_remove: + if hasattr(elem, 'decompose'): + elem.decompose() + + # Insert new resumen content + resumen_html = f'''{spanish_text}
+Palabras clave: {spanish_kw}
+{english_text}
+Keywords: {english_kw}
+Universidad
+Internacional de La Rioja
Escuela
+Superior de Ingeniera y
Tecnologa
Mster Universitario
+en Inteligencia artificial
Optimizacin de Hiperparmetros OCR
+con Ray Tune para Documentos Acadmicos en Espaol
|
+ Trabajo fin de
+ estudio presentado por: |
+
+ Sergio Jimnez Jimnez |
+
|
+ Tipo de
+ trabajo: |
+
+ Desarrollo
+ Software |
+
|
+ Director/a: |
+
+ Javier Rodrigo
+ Villazn Terrazas |
+
|
+ Fecha: |
+
+ 06.10.2025 |
+
Resumen
+ +En este
+apartado se introducir un breve resumen en espaol del trabajo realizado
+(extensin entre 150 y 300 palabras). Este resumen debe incluir el objetivo o
+propsito de la investigacin, la metodologa, los resultados y las
+conclusiones.
El resumen
+debe contener lo qu se ha pretendido realizar (objetivo o propsito de la
+investigacin), cmo se ha realizado (mtodo o proceso desarrollado) y para qu
+se ha realizado (resultados y conclusiones).
Importante: La extensin mnima en un TFE individual es de 50 pginas, sin contar +portada, resumen, abstract, ndices y anexos.
+ +Palabras clave: (De 3 a 5 palabras) Descriptores
+del trabajo que lo enmarcan en unas temticas determinadas. Sern los
+utilizados para localizar tu trabajo si llega a ser publicado.
Abstract
+ +En +este apartado se introducir un breve resumen en ingls del trabajo +realizado (extensin entre 150 y 300 palabras). Este resumen debe incluir el +objetivo o propsito de la investigacin, la metodologa, los resultados y las +conclusiones.
+ +Keywords: (De 3 a 5 palabras en ingls)
+ +ndice de contenidos
+ + + + + +1.2. Planteamiento
+del trabajo3
2. Contexto
+y estado del arte4
3. Objetivos
+concretos y metodologa de trabajo6
4. Desarrollo especfico de la contribucin9
5. Conclusiones
+y trabajo futuro13
5.2. Lneas
+de trabajo futuro13
Anexo A. Cdigo
+fuente y datos analizados15
+ndice de figuras
Figura 1. Ejemplo
+de figura realizada para nuestro trabajo.2
+ndice de tablas
Tabla 1. Ejemplo
+de tabla con sus principales elementos.2
El primer captulo es siempre +una introduccin. En ella debes resumir de forma esquemtica pero +suficientemente clara lo esencial de cada una de las partes del trabajo. La +lectura de este primer captulo ha de dar una primera idea clara de lo que se +pretenda, las conclusiones a las que se ha llegado y del procedimiento +seguido.
+ +Como tal, es uno de los +captulos ms importantes de la memoria. Las ideas +principales a transmitir son la identificacin del problema a tratar, la +justificacin de su importancia, los objetivos generales (a grandes rasgos) y +un adelanto de la contribucin que esperas hacer.
+ +Tpicamente una introduccin +tiene tres apartados: Motivacin, Planteamiento del trabajo, Estructura del +trabajo. (Texto Normal del men de estilos.)
+ +Ejemplo de nota al pie[1].
+ +En este apartado se deber +presentar el problema de estudio al que se quiere dar solucin y justificar su +importancia para la comunidad educativa y cientfica.
+ +La lectura de este apartado +debe dar una idea clara de las razones, motivos e intereses que han llevado a +la eleccin de este tema. Recuerda que para poder justificar este trabajo debe +haber referencias a la investigacin previa sobre el tema objeto de estudio, +independientemente de que luego se profundice en otros apartados.
+ +Las siguientes preguntas +puedan ayudar a la redaccin de este apartado:
+ + Cul es el problema que quieres tratar?
+ + Cules crees que son las causas?
+ + Por qu es relevante el problema?
+ +A continuacin, se indica con +un ejemplo cmo deben introducirse los ttulos y las fuentes en Tablas y Figuras.
+ +Tabla 1. Ejemplo de tabla con sus principales
+elementos.
Fuente: American Psychological Association, +2020a.
+ + + +Figura 1. Ejemplo de figura realizada para nuestro
+trabajo.
Fuente: +American Psychological Association, 2020b.
+ +Se debe plantear, de forma +breve, el problema / necesidad detectada de la que se parte para proponer la +propuesta y la finalidad del TFE. Los objetivos se van a plantear +posteriormente, pero en este apartado debe quedar claro qu te planteas con la +intervencin.
+ +Es necesario que los temas +escogidos tengan una vinculacin directa con la ingeniera de software, el +desarrollo web y/o la ciberseguridad y, por tanto, el tema trabajado debe estar +en consonancia con la titulacin.
+ +Las siguientes preguntas +puedan ayudar a la redaccin de este apartado:
+ + Cmo se podra solucionar el problema?
+ + +Qu es lo que se propone? Aqu +describes tus objetivos en trminos generales.
+ +Aqu describes brevemente lo +que vas a contar en cada uno de los captulos siguientes.
+ +Despus de la introduccin, se suele describir el contexto de
+aplicacin. Suele ser un captulo (o dos en ciertos casos) en el que se estudia
+a fondo el dominio de aplicacin, citando numerosas referencias. Debe aportar
+un buen resumen del conocimiento que ya existe en el campo de los problemas
+habituales identificados.
Es conveniente que revises los estudios actuales publicados en la lnea
+elegida, y debers consultar diferentes fuentes. No es suficiente con la
+consulta on-line, es necesario acudir a la biblioteca y consultar manuales
+tcnicos.
Hay que tener presente los autores de referencia en la temtica del
+trabajo de investigacin. Si se ha excluido a alguno de los relevantes hay que
+justificar adecuadamente su exclusin. Si por la extensin del trabajo no se
+puede sealar a todos los autores, habr que justificar por qu se han elegido
+unos y se ha prescindido de otros.
La organizacin especfica en +secciones depender estrechamente el trabajo concreto que vayas a realizar. En +este punto ser fundamental la colaboracin con tu DIRECTOR, l podr +asesorarte y guiarte, aunque siempre debes tener claro que el trabajo fundamental +es tuyo.
+ +El captulo debera concluir
+con una ltima seccin de resumen de conclusiones, resumiendo las principales
+averiguaciones del estudio del dominio y cmo van a afectar al desarrollo
+especfico del trabajo.
Recuerda que para citar trabajos de diferentes autores es fundamental e +imprescindible seguir el formato APA, segn se describe en +el documento Normativa_APA.pdf disponible en el apartado de Documentacin del +Aula de informacin general del Mster Universitario en Inteligencia Artificial +(MIA). No se debe mencionar, ni utilizar ninguna fuente, sin citarla apropiadamente.
+ +Estado del arte (base +terica): antecedentes, estudios actuales, comparativa de herramientas +existentes, etc.
+ +Conclusiones (nexo de unin de lo investigado con el trabajo a realizar).
+ +Este tercer captulo es el +puente entre el estudio del dominio y la contribucin a realizar. Segn el +trabajo concreto, el bloque se puede organizar de distintas formas, pero hay +tres elementos que deben estar presentes con mayor o menor detalle: (1) objetivo +general, (2) objetivos especficos y (3) metodologa de trabajo.
+ +Es muy importante, por no +decir imprescindible, que los objetivos (general y especficos) sean SMART +(Doran, 1981) segn la idea de George T. Doran que utiliz la palabra smart (inteligente en ingls) para definir las +caractersticas de un objetivo:
+ +S: Specific / Especfico: que +exprese claramente qu es exactamente +lo que se quiere conseguir.
+ +M: Measurable / Medible: que se +puedan establecer medidas que determinen el xito o fracaso y tambin el +progreso en la consecucin del objetivo.
+ +A: Attainable / Alcanzable: que +sea viable su consecucin en base al esfuerzo, tiempo y recursos disponibles +para conseguirlo.
+ +R: Relevant / Relevante: que +tenga un impacto demostrable, es decir que sea til para un propsito concreto.
+ +T: Time-Related / Con un tiempo +determinado: que se pueda llevar a cabo en una fecha determinada.
+ +Los trabajos aplicados se +centran en conseguir un impacto concreto, demostrando la efectividad de una +tecnologa, proponiendo una nueva metodologa o aportando nuevas herramientas +tecnolgicas. El objetivo por tanto no debe ser sin ms crear una herramienta +o proponer una metodologa, sino que debe centrarse en conseguir un efecto +observable. Adems, como se ha dicho antes el objetivo general debe ser SMART
+ +Ejemplo de objetivo general +SMART: Mejorar el servicio de audio gua de un museo convirtindolo en una gua +interactiva controlada por voz y valorada positivamente, un mnimo 4 sobre 5, +por los visitantes del museo.
+ +Este objetivo descrito +anteriormente podra dar lugar a un trabajo de tipo 2 (desarrollo de software) +que plantease el desarrollo de un bot conversacional +que procesara la seal de voz recogida a travs del micrfono y a travs de +tcnicas de procesamiento del lenguaje natural fuera capaz de mantener una +conversacin con el visitante para determinar el contenido en el que est +interesado o resolver las posibles dudas o preguntas que pudiera tener a lo +largo de su visita.
+ +Independientemente del tipo de +trabajo, la hiptesis o el objetivo general tpicamente se dividirn en un +conjunto de objetivos ms especficos analizables por separado. Estos objetivos +especficos suelen ser explicaciones de los diferentes pasos o tareas a seguir +en la consecucin del objetivo general.
+ +Con los objetivos especficos +has de concretar qu pretendes conseguir. Estos objetivos que deben ser SMART +se formulan con un verbo en infinitivo ms el contenido del objeto de estudio. +Se suelen usar vietas para cada uno de los objetivos. Se pueden utilizar +frmulas verbales, como las siguientes:
+ +Los objetivos especficos +suelen ser alrededor de 5. Normalmente uno o dos sobre el marco terico o +estado del arte y dos o tres sobre el desarrollo especfico de la contribucin.
+ +En un trabajo como el anterior +se incluiran objetivos especficos tales como:
+ +De cara a alcanzar los objetivos especficos (y con ellos el objetivo +general o la validacin/refutacin de la hiptesis), ser necesario realizar +una serie de pasos. La metodologa del trabajo debe describir qu pasos se van +a dar, el porqu de cada paso, qu instrumentos se van a utilizar, cmo se van +a analizar los resultados, etc.
+ +4. Desarrollo
+especfico de la contribucin
Tipo 1. Piloto experimental
Captulo 4 - Descripcin
+detallada del experimento
En el captulo de Objetivos y Metodologa del Trabajo ya habrs +descrito a grandes rasgos la metodologa experimental que vas a seguir. Pero si +tu trabajo se centra en describir un piloto, debers dedicar un captulo a +describir con todo detalle las caractersticas del piloto. Como mnimo querrs +mencionar:
+ + +Qu tecnologas se utilizaron +(incluyendo justificacin de por qu se emplearon y descripciones detalladas de +las mismas).
+ + +Cmo se organiz el piloto
+ + +Qu personas participaron (con +datos demogrficos)
+ + +Qu tcnicas de evaluacin +automtica se emplearon.
+ + +Cmo transcurri el experimento.
+ + +Qu instrumentos de seguimiento y +evaluacin se utilizaron.
+ + +Qu tipo de anlisis estadsticos +se ha empleado (si procede).
+ +Captulo 5 - Descripcin de
+los resultados
En el siguiente captulo debers detallar los resultados obtenidos, con +tablas de resumen, grficas de resultados, identificacin de datos relevantes, +etc. Es una exposicin objetiva, sin valorar los resultados ni justificarlos.
+ +Captulo 6 - Discusin
Tras la presentacin objetiva de los resultados, querrs aportar una +discusin de los mismos. En este captulo puedes +discutir la relevancia de los resultados, presentar posibles explicaciones para +los datos anmalos y resaltar aquellos datos que sean particularmente +relevantes para el anlisis del experimento.
+ +Tipo 2. Desarrollo de
+software
En un trabajo de desarrollo de software es importante justificar los +criterios de diseo seguidos para desarrollar el programa, seguido de la +descripcin detallada del producto resultante y finalmente una evaluacin de la +calidad y aplicabilidad del producto. Esto suele verse reflejado en la +siguiente estructura de captulos:
+ +Captulo 4 - Identificacin
+de requisitos
En este captulo se debe indicar el trabajo previo realizado para guiar +el desarrollo del software. Esto debera incluir la identificacin adecuada del +problema a tratar, as como del contexto habitual de uso o funcionamiento de la +aplicacin. Idealmente, la identificacin de requisitos se debera hacer +contando con expertos en la materia a tratar.
+ +Captulo 5 - Descripcin de
+la herramienta software desarrollada
En el caso de desarrollos de +software, deberan aportarse detalles del proceso de desarrollo, incluyendo +las fases e hitos del proceso. Tambin deben presentarse diagramas explicativos +de la arquitectura o funcionamiento, as como capturas de pantalla que permitan +al lector entender el funcionamiento del programa.
+ +Captulo 6 - Evaluacin
La evaluacin debera cubrir por lo menos una mnima evaluacin de la +usabilidad de la herramienta, as como de su aplicabilidad para resolver el +problema propuesto. Estas evaluaciones suelen realizarse con usuarios expertos.
+ +Tipo 3. Comparativa de
+soluciones
Este tipo de trabajos suelen +seguir la estructura tpica de un estudio comparativo, parten de plantear la +comparativa a realizar, describen el desarrollo de la misma +y analizan los resultados.
+ +Captulo 4 - Planteamiento de la comparativa
En este captulo se debe +indicar el trabajo previo realizado para identificar el problema concreto a +tratar, as como las posibles soluciones alternativas que se van a evaluar. +Tambin se deben identificar los criterios de xito para la comparativa, las medidas +que se van a tomar, etc.
+ +Captulo 5 - Desarrollo de la comparativa
En este captulo se debera +desarrollar con todo detalle la comparativa realizada, con todos los resultados +y mediciones obtenidos. Puede ser til acompaar las descripciones con +grficas, tablas y otros instrumentos para plasmar los datos obtenidos.
+ +Captulo 6 - Discusin y anlisis de resultados
Mientras que el captulo +anterior se centrara en informar de los resultados y comparaciones obtenidos, +en este captulo se abordar la discusin sobre su posible significado, as +como el anlisis de las ventajas y desventajas de las distintas soluciones +evaluadas.
+ +En
+el captulo de Objetivos y Metodologa del Trabajo ya habrs descrito a grandes
+rasgos la metodologa experimental que vas a seguir. Pero si tu trabajo se
+centra en describir un piloto, debers dedicar un captulo a describir con todo
+detalle las caractersticas del piloto. Como mnimo querrs mencionar:
Qué tecnologas se utilizaron
+(incluyendo justificacin de por qué se emplearon y descripciones
+detalladas de las mismas).
Cmo se organiz el piloto
Qué personas participaron (con
+datos demogrficos)
Qué tcnicas de evaluacin
+automtica se emplearon.
Cmo transcurrí el experimento.
Qué instrumentos de seguimiento y
+evaluacin se utilizaron.
+Qué
+tipo de anlisis estadsticos se ha empleado (si procede).
.
+
Este ltimo captulo (en ocasiones,
+dos captulos complementarios) es habitual en todos los tipos de trabajos y presenta
+el resumen final de tu trabajo y debe servir para informar del alcance y
+relevancia de tu aportacin.
Suele estructurarse empezando con un
+resumen del problema tratado, de cmo se ha abordado y de por qu la solucin
+sera vlida.
Es
+recomendable que incluya tambin un resumen de las contribuciones del trabajo,
+en el que relaciones las contribuciones y los resultados obtenidos con los
+objetivos que habas planteado para el trabajo, discutiendo hasta qu punto has
+conseguido resolver los objetivos planteados.
Finalmente, se suele dedicar una ltima seccin a hablar de lneas de
+trabajo futuro que podran aportar valor aadido al TFE realizado. La seccin
+debera sealar las perspectivas de futuro que abre el trabajo desarrollado
+para el campo de estudio definido. En el fondo, debes justificar de qu modo
+puede emplearse la aportacin que has desarrollado y en qu campos.
Segn la normativa APA debe ponerse
+con sangra francesa y debe estar ordenado por orden alfabtico segn el
+apellido del primer autor.
Toda la bibliografa que aparezca en
+este apartado debe estar citada en el trabajo. La mayor parte de las citas
+deben aparecer en el captulo 2, que es donde se realiza el estudio del estado
+del arte. Adems, se recomienda evitar citas que hagan referencia a Wikipedia y
+que no todas las referencias sean solo enlaces de internet, es decir, que se
+vea alguna variabilidad entre libros, congresos, artculos y enlaces puntuales
+de internet.
Se recomienda encarecidamente
+utilizar el gestor de bibliografa de Word para gestionar la bibliografa.
Ejemplo:
Doran, G. T.
+(1981). There's a S.M.A.R.T. way to write management's goals and objectives. Management Review (AMA FORUM), 70, 35-36.
+
Anexo A. +Cdigo fuente y datos analizados
+ +Es recomendable que el estudiante incluya en +su memoria la URL del repositorio donde tiene alojado el cdigo fuente +desarrollado durante el TFE. El estudiante debe ser el nico autor del cdigo y +nico propietario del repositorio. En el repositorio no debe haber commit de ningn otro usuario del repositorio.
+ +De igual forma, los datos que hayan utilizado +para el anlisis, siempre que as se considere oportuno, tambin deberan estn +alojamos en el mismo repositorio.
+ +Si el TFE est asociado a una actividad o +proyecto de Empresa, se debe justificar en la memoria que, por temas de +confidencialidad, no se deja disponible ni el cdigo fuente ni los datos +utilizados.
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Sergio
+Jimnez Jimnez
Optimizacin
+de Hiperparmetros OCR con Ray Tune para Documentos Acadmicos en Espaol
13
+ +3.3.1\n", - "\n" - ], - "text/plain": [ - "\u001b[1;36m3.3\u001b[0m.\u001b[1;36m1\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import paddleocr\n", - "\n", - "print(paddleocr.__version__)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "b1541bb6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Lib\\site-packages\\paddleocr\n", - "\n" - ], - "text/plain": [ - "c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Lib\\site-packages\\paddleocr\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 1) Locate the installed PaddleOCR package\n", - "pkg_dir = os.path.dirname(paddleocr.__file__)\n", - "print(pkg_dir)" - ] - }, - { - "cell_type": "markdown", - "id": "84c999e2", - "metadata": {}, - "source": [ - "## 2 Helper Functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9596c7df", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List, Optional\n", - "from paddle_ocr_tuning import pdf_to_images, pdf_extract_text, evaluate_text, assemble_from_paddle_result\n", - "\n", - "def show_page(img: Image.Image, text: str, scale: float = 1):\n", - " \"\"\"\n", - " Displays a smaller version of the image with text as a footer.\n", - " \"\"\"\n", - " # Compute plot size based on image dimensions (but without resizing the image)\n", - " w, h = img.size\n", - " figsize = (w * scale / 100, h * scale / 100) # convert pixels to inches approx\n", - "\n", - " fig, ax = plt.subplots(figsize=figsize)\n", - " ax.imshow(img)\n", - " ax.axis(\"off\")\n", - "\n", - "\n", - " # Add OCR text below the image (footer)\n", - " # plt.figtext(0.5, 0.02, text.strip(), wrap=True, ha='center', va='bottom', fontsize=10)\n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "e42cae29", - "metadata": {}, - "source": [ - "## Run AI OCR Benchmark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b55c154", - "metadata": {}, - "outputs": [], - "source": [ - "results = []\n", - "\n", - "for pdf_file in os.listdir(PDF_FOLDER):\n", - " if not pdf_file.lower().endswith('.pdf'):\n", - " continue\n", - " pdf_path = os.path.join(PDF_FOLDER, pdf_file)\n", - " page_range = range(5, 10)\n", - " \n", - " images = pdf_to_images(pdf_path, 300, page_range)\n", - " \n", - " for i, img in enumerate(images):\n", - " # img = preprocess_for_ocr(img)\n", - " page_num = page_range[i]\n", - " ref = pdf_extract_text(pdf_path, page_num=page_num)\n", - " show_page(img, f\"page: {page_num}\", 0.15)\n", - " print(f\"ref: \\n{ref}\")\n", - " \n", - " # Convert PIL image to numpy array\n", - " image_array = np.array(img)\n", - " out = paddleocr_model.predict(\n", - " image_array,\n", - " use_doc_orientation_classify=False,\n", - " use_doc_unwarping=False,\n", - " use_textline_orientation=True\n", - " )\n", - " # PaddleOCR\n", - " paddle_text = assemble_from_paddle_result(out)\n", - " print(f\"paddle_text: \\n{paddle_text}\")\n", - " results.append({'PDF': pdf_file, 'Page': page_num, 'Model': 'PaddleOCR', 'Prediction': paddle_text, **evaluate_text(ref, paddle_text)})\n", - " " - ] - }, - { - "cell_type": "markdown", - "id": "0db6dc74", - "metadata": {}, - "source": [ - "## 5 Save and Analyze Results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "da3155e3", - "metadata": {}, - "outputs": [], - "source": [ - "df_results = pd.DataFrame(results)\n", - "\n", - "# Generate a unique filename with timestamp\n", - "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", - "filename = f\"ai_ocr_benchmark_finetune_results_{timestamp}.csv\"\n", - "filepath = os.path.join(OUTPUT_FOLDER, filename)\n", - "\n", - "df_results.to_csv(filepath, index=False)\n", - "print(f\"Benchmark results saved as {filename}\")\n", - "\n", - "# Summary by model\n", - "summary = df_results.groupby('Model')[['WER', 'CER']].mean()\n", - "print(summary)\n", - "\n", - "# Plot\n", - "summary.plot(kind='bar', figsize=(8,5), title='AI OCR Benchmark (WER & CER)')\n", - "plt.ylabel('Error Rate')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "3e0f00c0", - "metadata": {}, - "source": [ - "### How to read this chart:\n", - "- CER (Character Error Rate) focus on raw transcription quality\n", - "- WER (Word Error Rate) penalizes incorrect tokenization or missing spaces\n", - "- CER and WER are error metrics, which means:\n", - " - Higher values = worse performance\n", - " - Lower values = better accuracy" - ] - }, - { - "cell_type": "markdown", - "id": "830b0e25", - "metadata": {}, - "source": [ - "# Busqueda de hyperparametros\n", - "https://docs.ray.io/en/latest/tune/index.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3a4bd700", - "metadata": {}, - "outputs": [], - "source": [ - "!python --version\n", - "!pip --version" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "b0cf4bcf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", - "Collecting rich\n", - " Downloading rich-14.2.0-py3-none-any.whl.metadata (18 kB)\n", - "Requirement already satisfied: ray[tune] in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (2.51.1)\n", - "Requirement already satisfied: click!=8.3.0,>=7.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (8.2.1)\n", - "Requirement already satisfied: filelock in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (3.20.0)\n", - "Requirement already satisfied: jsonschema in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (4.25.1)\n", - "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (1.1.2)\n", - "Requirement already satisfied: packaging in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (25.0)\n", - "Requirement already satisfied: protobuf>=3.20.3 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (6.33.0)\n", - "Requirement already satisfied: pyyaml in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (6.0.2)\n", - "Requirement already satisfied: requests in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (2.32.5)\n", - "Requirement already satisfied: pandas in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (2.3.3)\n", - "Requirement already satisfied: tensorboardX>=1.9 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (2.6.4)\n", - "Requirement already satisfied: pyarrow>=9.0.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (22.0.0)\n", - "Requirement already satisfied: fsspec in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from ray[tune]) (2025.10.0)\n", - "Collecting markdown-it-py>=2.2.0 (from rich)\n", - " Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from rich) (2.19.2)\n", - "Requirement already satisfied: colorama in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from click!=8.3.0,>=7.0->ray[tune]) (0.4.6)\n", - "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich)\n", - " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: numpy in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from tensorboardX>=1.9->ray[tune]) (2.3.4)\n", - "Requirement already satisfied: attrs>=22.2.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema->ray[tune]) (25.4.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema->ray[tune]) (2025.9.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema->ray[tune]) (0.37.0)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from jsonschema->ray[tune]) (0.28.0)\n", - "Requirement already satisfied: typing-extensions>=4.4.0 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from referencing>=0.28.4->jsonschema->ray[tune]) (4.15.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas->ray[tune]) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas->ray[tune]) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from pandas->ray[tune]) (2025.2)\n", - "Requirement already satisfied: six>=1.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->ray[tune]) (1.17.0)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->ray[tune]) (3.4.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->ray[tune]) (3.11)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->ray[tune]) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sji\\desktop\\mastersthesis\\.venv\\lib\\site-packages (from requests->ray[tune]) (2025.10.5)\n", - "Downloading rich-14.2.0-py3-none-any.whl (243 kB)\n", - "Downloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)\n", - "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", - "Installing collected packages: mdurl, markdown-it-py, rich\n", - "\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ---------------------------------------- 0/3 [mdurl]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " ------------- -------------------------- 1/3 [markdown-it-py]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " -------------------------- ------------- 2/3 [rich]\n", - " ---------------------------------------- 3/3 [rich]\n", - "\n", - "Successfully installed markdown-it-py-4.0.0 mdurl-0.1.2 rich-14.2.0\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "# Instalación de Ray y Ray Tune\n", - "%pip install -U \"ray[tune]\" rich" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f3ca0b9b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-11-12 22:30:42,267\tINFO worker.py:1850 -- Calling ray.init() again after it has already been called.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ray Tune listo (versión: 2.51.1 )\n" - ] - } - ], - "source": [ - "import ray\n", - "from ray import tune\n", - "from ray.tune.schedulers import ASHAScheduler\n", - "\n", - "ray.init(ignore_reinit_error=True)\n", - "print(\"Ray Tune listo (versión:\", ray.__version__, \")\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ae5a10c4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-11-12 22:30:48,318\tINFO worker.py:1850 -- Calling ray.init() again after it has already been called.\n" - ] - } - ], - "source": [ - "# ===============================================================\n", - "# 🔍 RAY TUNE: OPTIMIZACIÓN AUTOMÁTICA DE HIPERPARÁMETROS OCR\n", - "# ===============================================================\n", - "\n", - "from ray import tune, air\n", - "from ray.tune.schedulers import ASHAScheduler\n", - "import pandas as pd\n", - "import time\n", - "import colorama\n", - "from rich import print\n", - "import sys, subprocess \n", - "from rich.console import Console\n", - "\n", - "colorama.just_fix_windows_console()\n", - "ray.init(ignore_reinit_error=True)\n", - "\n", - "# Tell Ray Tune to use a Jupyter-compatible console\n", - "console = Console(force_jupyter=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "96c320e8", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "# --- Configuración base del experimento ---\n", - "search_space = {\n", - " \"dpi\": tune.choice([240, 300, 360]),\n", - " \"textline_orientation\": tune.choice([True, False]),\n", - " \"text_det_box_thresh\": tune.uniform(0.4, 0.7),\n", - " \"text_det_unclip_ratio\": tune.uniform(1.2, 2.0),\n", - " \"text_rec_score_thresh\": tune.choice([0.0, 0.2, 0.4]),\n", - " \"line_tolerance\": tune.choice([0.5, 0.6, 0.7]),\n", - " \"min_box_score\": tune.choice([0, 0.5, 0.6])\n", - "}\n", - "KEYMAP = {\n", - " \"dpi\": \"dpi\",\n", - " \"textline_orientation\": \"textline-orientation\",\n", - " \"text_det_box_thresh\": \"text-det-box-thresh\",\n", - " \"text_det_unclip_ratio\": \"text-det-unclip-ratio\",\n", - " \"text_rec_score_thresh\": \"text-rec-score-thresh\",\n", - " \"line_tolerance\": \"line-tolerance\",\n", - " \"pages_per_pdf\": \"pages-per-pdf\",\n", - " \"min_box_score\": \"min-box-score\",\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "accb4e9d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Notebook Python: c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Scripts\\python.exe\n", - "\n" - ], - "text/plain": [ - "Notebook Python: c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Scripts\\python.exe\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{'CER': 0.019801980198019802, 'WER': 0.09090909090909091, 'TIME': 38.859522104263306, 'PAGES': 1}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\u001b[32m'CER'\u001b[0m: \u001b[1;36m0.019801980198019802\u001b[0m, \u001b[32m'WER'\u001b[0m: \u001b[1;36m0.09090909090909091\u001b[0m, \u001b[32m'TIME'\u001b[0m: \u001b[1;36m38.859522104263306\u001b[0m, \u001b[32m'PAGES'\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
return code: 0\n",
- "\n"
- ],
- "text/plain": [
- "return code: \u001b[1;36m0\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "args: ['c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\.venv\\\\Scripts\\\\python.exe', \n", - "'c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\paddle_ocr_tuning.py', '--pdf-folder', \n", - "'c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\instructions', '--pages-per-pdf', '1', '--dpi', '360', \n", - "'--textline-orientation', 'True', '--text-det-box-thresh', '0.46611732611383844', '--text-det-unclip-ratio', \n", - "'1.3598680409827462', '--text-rec-score-thresh', '0.0', '--line-tolerance', '0.5', '--min-box-score', '0.6']\n", - "\n" - ], - "text/plain": [ - "args: \u001b[1m[\u001b[0m\u001b[32m'c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\.venv\\\\Scripts\\\\python.exe'\u001b[0m, \n", - "\u001b[32m'c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\paddle_ocr_tuning.py'\u001b[0m, \u001b[32m'--pdf-folder'\u001b[0m, \n", - "\u001b[32m'c:\\\\Users\\\\sji\\\\Desktop\\\\MastersThesis\\\\instructions'\u001b[0m, \u001b[32m'--pages-per-pdf'\u001b[0m, \u001b[32m'1'\u001b[0m, \u001b[32m'--dpi'\u001b[0m, \u001b[32m'360'\u001b[0m, \n", - "\u001b[32m'--textline-orientation'\u001b[0m, \u001b[32m'True'\u001b[0m, \u001b[32m'--text-det-box-thresh'\u001b[0m, \u001b[32m'0.46611732611383844'\u001b[0m, \u001b[32m'--text-det-unclip-ratio'\u001b[0m, \n", - "\u001b[32m'1.3598680409827462'\u001b[0m, \u001b[32m'--text-rec-score-thresh'\u001b[0m, \u001b[32m'0.0'\u001b[0m, \u001b[32m'--line-tolerance'\u001b[0m, \u001b[32m'0.5'\u001b[0m, \u001b[32m'--min-box-score'\u001b[0m, \u001b[32m'0.6'\u001b[0m\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import sys, subprocess\n", - "print(\"Notebook Python:\", sys.executable)\n", - "# test paddle ocr run with params\n", - "args = [sys.executable, \n", - " SCRIPT_ABS, \n", - " \"--pdf-folder\", PDF_FOLDER_ABS, \n", - " \"--pages-per-pdf\", \"1\",\n", - " \"--dpi\",\"360\" ,\n", - " \"--textline-orientation\",\"True\",\n", - " \"--text-det-box-thresh\",\"0.46611732611383844\",\n", - " \"--text-det-unclip-ratio\",\"1.3598680409827462\",\n", - " \"--text-rec-score-thresh\",\"0.0\",\n", - " \"--line-tolerance\", \"0.5\",\n", - " \"--min-box-score\",\"0.6\"]\n", - "test_proc = subprocess.run(args, capture_output=True, text=True, cwd=SCRIPT_DIR)\n", - "if test_proc.returncode != 0:\n", - " print(test_proc.stderr)\n", - "last = test_proc.stdout.strip().splitlines()[-1]\n", - "\n", - "metrics = json.loads(last)\n", - "print(metrics)\n", - "\n", - "print(f\"return code: {test_proc.returncode}\")\n", - "print(f\"args: {args}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8df28468", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\sji\\Desktop\\MastersThesis\\.venv\\Lib\\site-packages\\ray\\tune\\impl\\tuner_internal.py:144: RayDeprecationWarning: The `RunConfig` class should be imported from `ray.tune` when passing it to the Tuner. Please update your imports. See this issue for more context and migration options: https://github.com/ray-project/ray/issues/49454. Disable these warnings by setting the environment variable: RAY_TRAIN_ENABLE_V2_MIGRATION_WARNINGS=0\n", - " _log_deprecation_warning(\n", - "2025-11-12 22:31:01,166\tINFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949\n" - ] - }, - { - "data": { - "text/html": [ - "
| Current time: | 2025-11-12 22:39:26 |
| Running for: | 00:08:25.78 |
| Memory: | 9.9/31.8 GiB |
| Trial name | status | loc | dpi | line_tolerance | min_box_score | text_det_box_thresh | text_det_unclip_rati\n", - "o | text_rec_score_thres\n", - "h | textline_orientation | iter | total time (s) | CER | WER | TIME |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| trainable_paddle_ocr_3632f_00000 | TERMINATED | 127.0.0.1:22388 | 360 | 0.6 | 0.6 | 0.598139 | 1.595 | 0.2 | True | 1 | 500.4 | 0.0684595 | 0.414935 | 473.74 |
| trainable_paddle_ocr_3632f_00001 | TERMINATED | 127.0.0.1:10796 | 300 | 0.6 | 0.5 | 0.418069 | 1.61857 | 0.2 | True | 1 | 465.474 | 0.0563063 | 0.285714 | 438.892 |
| Trial name | CER | PAGES | TIME | TIME_PER_PAGE | WER |
|---|---|---|---|---|---|
| trainable_paddle_ocr_3632f_00000 | 0.0684595 | 2 | 473.74 | 236.768 | 0.414935 |
| trainable_paddle_ocr_3632f_00001 | 0.0563063 | 2 | 438.892 | 219.372 | 0.285714 |
Guardado: raytune_paddle_subproc_results_20251112_223927.csv\n", - "\n" - ], - "text/plain": [ - "Guardado: raytune_paddle_subproc_results_20251112_223927.csv\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Generate a unique filename with timestamp\n", - "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", - "filename = f\"raytune_paddle_subproc_results_{timestamp}.csv\"\n", - "filepath = os.path.join(OUTPUT_FOLDER, filename)\n", - "\n", - "\n", - "df.to_csv(filename, index=False)\n", - "print(f\"Guardado: {filename}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "3e3a34e4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | CER | \n", - "WER | \n", - "TIME | \n", - "PAGES | \n", - "TIME_PER_PAGE | \n", - "timestamp | \n", - "training_iteration | \n", - "time_this_iter_s | \n", - "time_total_s | \n", - "pid | \n", - "time_since_restore | \n", - "iterations_since_restore | \n", - "config/dpi | \n", - "config/text_det_box_thresh | \n", - "config/text_det_unclip_ratio | \n", - "config/text_rec_score_thresh | \n", - "config/line_tolerance | \n", - "config/min_box_score | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.0 | \n", - "2.000000 | \n", - "2.000000e+00 | \n", - "2.0 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.0 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.000000 | \n", - "2.0 | \n", - "2.0 | \n", - "2.000000 | \n", - "
| mean | \n", - "0.062383 | \n", - "0.350325 | \n", - "456.315870 | \n", - "2.0 | \n", - "228.070288 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "482.937319 | \n", - "482.937319 | \n", - "16592.000000 | \n", - "482.937319 | \n", - "1.0 | \n", - "330.000000 | \n", - "0.508104 | \n", - "1.606787 | \n", - "0.2 | \n", - "0.6 | \n", - "0.550000 | \n", - "
| std | \n", - "0.008594 | \n", - "0.091373 | \n", - "24.641709 | \n", - "0.0 | \n", - "12.300573 | \n", - "2.404163e+01 | \n", - "0.0 | \n", - "24.696451 | \n", - "24.696451 | \n", - "8196.781808 | \n", - "24.696451 | \n", - "0.0 | \n", - "42.426407 | \n", - "0.127329 | \n", - "0.016666 | \n", - "0.0 | \n", - "0.0 | \n", - "0.070711 | \n", - "
| min | \n", - "0.056306 | \n", - "0.285714 | \n", - "438.891550 | \n", - "2.0 | \n", - "219.372469 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "465.474291 | \n", - "465.474291 | \n", - "10796.000000 | \n", - "465.474291 | \n", - "1.0 | \n", - "300.000000 | \n", - "0.418069 | \n", - "1.595003 | \n", - "0.2 | \n", - "0.6 | \n", - "0.500000 | \n", - "
| 25% | \n", - "0.059345 | \n", - "0.318019 | \n", - "447.603710 | \n", - "2.0 | \n", - "223.721378 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "474.205805 | \n", - "474.205805 | \n", - "13694.000000 | \n", - "474.205805 | \n", - "1.0 | \n", - "315.000000 | \n", - "0.463086 | \n", - "1.600895 | \n", - "0.2 | \n", - "0.6 | \n", - "0.525000 | \n", - "
| 50% | \n", - "0.062383 | \n", - "0.350325 | \n", - "456.315870 | \n", - "2.0 | \n", - "228.070288 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "482.937319 | \n", - "482.937319 | \n", - "16592.000000 | \n", - "482.937319 | \n", - "1.0 | \n", - "330.000000 | \n", - "0.508104 | \n", - "1.606787 | \n", - "0.2 | \n", - "0.6 | \n", - "0.550000 | \n", - "
| 75% | \n", - "0.065421 | \n", - "0.382630 | \n", - "465.028030 | \n", - "2.0 | \n", - "232.419197 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "491.668833 | \n", - "491.668833 | \n", - "19490.000000 | \n", - "491.668833 | \n", - "1.0 | \n", - "345.000000 | \n", - "0.553121 | \n", - "1.612680 | \n", - "0.2 | \n", - "0.6 | \n", - "0.575000 | \n", - "
| max | \n", - "0.068460 | \n", - "0.414935 | \n", - "473.740190 | \n", - "2.0 | \n", - "236.768107 | \n", - "1.762958e+09 | \n", - "1.0 | \n", - "500.400347 | \n", - "500.400347 | \n", - "22388.000000 | \n", - "500.400347 | \n", - "1.0 | \n", - "360.000000 | \n", - "0.598139 | \n", - "1.618572 | \n", - "0.2 | \n", - "0.6 | \n", - "0.600000 | \n", - "
Correlación con CER:\n", - " config/min_box_score 1.0\n", - "CER 1.0\n", - "config/text_det_box_thresh 1.0\n", - "config/dpi 1.0\n", - "config/text_det_unclip_ratio -1.0\n", - "config/text_rec_score_thresh NaN\n", - "config/line_tolerance NaN\n", - "Name: CER, dtype: float64\n", - "\n" - ], - "text/plain": [ - "Correlación con CER:\n", - " config/min_box_score \u001b[1;36m1.0\u001b[0m\n", - "CER \u001b[1;36m1.0\u001b[0m\n", - "config/text_det_box_thresh \u001b[1;36m1.0\u001b[0m\n", - "config/dpi \u001b[1;36m1.0\u001b[0m\n", - "config/text_det_unclip_ratio \u001b[1;36m-1.0\u001b[0m\n", - "config/text_rec_score_thresh NaN\n", - "config/line_tolerance NaN\n", - "Name: CER, dtype: float64\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Correlación con WER:\n", - " config/min_box_score 1.0\n", - "config/dpi 1.0\n", - "config/text_det_box_thresh 1.0\n", - "WER 1.0\n", - "config/text_det_unclip_ratio -1.0\n", - "config/text_rec_score_thresh NaN\n", - "config/line_tolerance NaN\n", - "Name: WER, dtype: float64\n", - "\n" - ], - "text/plain": [ - "Correlación con WER:\n", - " config/min_box_score \u001b[1;36m1.0\u001b[0m\n", - "config/dpi \u001b[1;36m1.0\u001b[0m\n", - "config/text_det_box_thresh \u001b[1;36m1.0\u001b[0m\n", - "WER \u001b[1;36m1.0\u001b[0m\n", - "config/text_det_unclip_ratio \u001b[1;36m-1.0\u001b[0m\n", - "config/text_rec_score_thresh NaN\n", - "config/line_tolerance NaN\n", - "Name: WER, dtype: float64\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "param_cols = [\n", - " \"config/dpi\",\n", - " \"config/text_det_box_thresh\",\n", - " \"config/text_det_unclip_ratio\",\n", - " \"config/text_rec_score_thresh\",\n", - " \"config/line_tolerance\",\n", - " \"config/min_box_score\",\n", - "]\n", - "# Correlación de Pearson con CER y WER\n", - "corr_cer = df[param_cols + [\"CER\"]].corr()[\"CER\"].sort_values(ascending=False)\n", - "corr_wer = df[param_cols + [\"WER\"]].corr()[\"WER\"].sort_values(ascending=False)\n", - "\n", - "print(\"Correlación con CER:\\n\", corr_cer)\n", - "print(\"Correlación con WER:\\n\", corr_wer)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "02fc0a87", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHHCAYAAABA5XcCAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAASQRJREFUeJzt3QncjXX+//GP/ZatspM1+y67DClZImuFaSI/07QIw4xCojITLcr8oqRF2zSMlLIkIoYoa0lZSkqbLdlDcf0f7+/vf505577PdW/d7nOf2+v5eBzuc13fa98+57tdOTzP8wwAAABJ5Ew6CAAAAARKAAAAySBHCQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoJQNHTt2zP74xz9aqVKlLEeOHPbnP//ZDd+7d69dd911VrRoUTd88uTJFu/bdL554YUX3PZ/9dVXllVdccUVVqdOHcvO66NjcN9996WYTmmUNp7PtfXr18d6VYCYIlCKs5tW0OeDDz4IpX3wwQdd+ttvv91efvllu+mmm9zwYcOG2TvvvGOjRo1ywzt27Jjh66llz50795zMN9o2RVOxYsXQfsmZM6ddeOGFVrduXfvTn/5kH3744W9ajyeffNKtx7l2rvZjevgP+5Q+CkgQH9544w3r1KmTFStWzPLmzWtlypSxG264wZYtW2bZwcKFC1MVyGakm2++OfDaSEhIsKwq/H6pT4ECBaxp06b20ksvxdX+P5dyn9O5I8M98MADVqlSpSTDq1SpEvpbN7vmzZvbuHHjItJoeLdu3eyvf/3rOX3AK9eqe/fuGTrfoG0K0qBBA/vLX/7i/j569Kht3brVZs+ebc8884wLGB977LF0B0p6uOimeC4F7UcFiH369LF8+fJZZunZs2fE+aXcPQWsPXr0cON8JUuWzLR1Qvro1Z7/8z//44L9hg0b2vDhw10u7Q8//OCCp6uuusref/99a9myZVzvYj2op06dmukPa12Xzz77bJLhuXLlsqws/H6pc0Hb0L9/fzt16pTdcsstcbP/zxUCpTijX4GNGzdONs2+ffusVq1aUYcrdyUeBW1TkLJly9of/vCHiGEPPfSQ/f73v7fHH3/cqlat6h728UY33My+6darV899fAcOHHD7TsMS7+Pf6uTJky6HQzmByHiTJk1yQZKKrvVjIbxY8J577nG5tblzZ+5j4cSJE3bBBRdYPASZOj/z588fmEb7Lj3XxPHjx11OzrnYP7/++qudPXvWXVepvV/qh2DlypXdvfKWdARK2Q13o2xk+fLl7sa3a9cuW7BgQSgr1S+204WuKN8f7jt06JC7cZYrV879IlLugYIKXVzh9P0f//iHK8ZSVnLx4sVd8Z1fh0Hz1AX/4osvhpaRUs6LAqCBAwe63AjNs379+m76lLYpPXV0dIPTg+Diiy+2v//9725/hG+b6mzVrl3brYfW59Zbb7WffvopIov6008/tRUrVkQtbsqM/RhUR0k5XVp3LVfFKIMGDXLrE62uzmeffWZt27Z1N1/dIB9++GE7F1Jajn9sZ86caWPGjHFplPbIkSNuvIpJtV+KFCnihrdp08bldoRTbqH2uY6Ntr1EiRJ29dVX28aNG9O8Pqk5H5OzatUqa9KkiZvu0ksvtaeffjpN+0s5no0aNXLnqXIt9eD67rvvItLoPChYsKAbrtxG/a3zR7nEZ86cSXb+P//8s02YMMFq1Khhjz76aNS6U8qxVLFLOOUqKOdJy9HDXDmJ+/fvj0jz5ptvWufOnd25p+Og7R8/fnySdfLPwQ0bNljr1q3dsRg9enSa5uGfG9dcc41ddNFFbp0UtOua8veR7nMSXqSUlmtddE516dLFVVfQj1Mdl7Qe02j8a1j3kTvuuMOds5dcckmK+yc156buC5q3jq+2UftQ+1LnflroWOs82blzZ8TwlStX2vXXX2/ly5d389W9Tjn0Ord8GbX/sxJylOLM4cOH3S/6cDoJVUG7Zs2aLhDQiasLz89KVRa7X69HD5F+/fpF/FrRA0g3Xp2sugBWr17t6jEpCza8wrcuUl3kytVSxWr9UtGFo/pRupFoGRquG63qA4ku1CC6uHRj+OKLL+zOO+90RYp6WOhC00N+6NChgdukCzk99GDRjf65555zNw9drKJt17YNGDDAhgwZ4gKzKVOm2KZNm9zDOU+ePG5fDB482M1Dv77Di5tiuR+VvX3//fdbu3btXE7P9u3b7amnnrJ169aF1t2nm5GCDxWZqU7Ka6+9ZnfffbcL2rQ+GSUty9HDUL929bDXQ1l/q6hV6RQ4qLhVOUwzZsywK6+80u0r/2F+2223uXnr/FGO448//ugCFhW1XnbZZWlan9Scj0E++eQTa9++vTsvdTx0TLXeqS2O9M89BVoKZtTwQg9+HT+dg+E5wQocOnToYM2aNXMPxHfffdflFOkcSS6XVPvl4MGDLrBMS66kznkFJNoePYh1Lmv/zJo1K2L9dV0ooNL/On5jx451Qe8jjzwSMT8dI+1zFSErGPT3UWrnsWTJEhfAlC5d2h0TFR3qeM+fP9991/X3/fffu3S6lhJLzbXu07XUt29fN41yVqpXr57i/kp8fxad04ULF44YpiBJ54u2UT+Mkts/aT03da0o90v3DwU0+nGYFjp/v/32W3fcw2mZutfpPNMzZ+3atfbEE0+4tBonGbn/swwPcWHGjBnK/oj6yZcvX0TaChUqeJ07d04yD6UdNGhQxLDx48d7BQoU8Hbs2BExfOTIkV6uXLm83bt3u+/Lli1z0w8ZMiTJfM+ePRv6W/Pq379/qrZp8uTJbp6vvPJKaNjp06e9Fi1aeAULFvSOHDmS4jZFk1Laxx9/3C33zTffdN9Xrlzpvv/zn/+MSLdo0aIkw2vXru21adMmyTwzaz/658GuXbvc93379nl58+b12rdv7505cyaUbsqUKS7d888/Hxqm9dawl156KTTs1KlTXqlSpbxevXp5qbV//343n3HjxkUdn9rlvPfeey5d5cqVvRMnTkTsh6pVq3odOnSI2CdKU6lSJe/qq68ODStSpEiSczq965OW8zHx9nfv3t1LSEjwvv7669Cwzz77zB37lG6zWkaJEiW8OnXqeD///HNo+Pz58920Y8eODQ3TOaFhDzzwQMQ8GjZs6DVq1CjZ5fzjH/9w077xxhteavjnWrt27SKOw7Bhw9x2HTp0KDQs/Pj5br31Vu+CCy7wTp48meRYTJs2LUn61Mzj119/deeArvGffvopIm34OuqciLbf03KtaxkapnGp4R+baB+dy4n3a6tWrdz2hAvaP6k9N3VfULrChQu7e0NqaDt1/9B1rc8nn3zi3XTTTVGfFyeiHKMJEyZ4OXLkiDj3M2L/ZyUUvcUZZWkqUg//vP322+men34F/O53v3O/HPRLyP8od0K/XP/zn/+4dHPmzHE5V9EqU6e3+bMq/OnXoH6x+fRrQr8yVGFYWdPngn6t+sU2/j5Q8Y5y28L3gXIzlPa9997LsvtRuQmnT592uQTh9Xr061e/YFVcmXjbw+si6Jeucme+/PJLy0hpWY4qjYbX+/joo4/s888/d/XJ9Ova35f61a3KxtqXfnGmclpUDKNfsL91fdJ7Pur4qnhGRWHKSfQpN1Q5PylRkauKVZTDEN46SsVQKv5IfAz9nLRwOvdSOoZ+kWahQoUsLZQrEX5ualna5q+//jo0LPz46brS8VI65T5s27YtYn7K4VBuQmKpmYdyHZQDofM9cX3L1Fw/ab3WlXOTmmPo0/FLfH/WZ+LEiUnS6hqNlrMXbf+k9dzs1atXmnLdFy9e7NLro1xW5QRpHRLnBuYPO0a6HrXvVPFfvx10bFKSEffaWKDoLc7o5p5SZe600ANp8+bNgReVbuCismrVHUhrFm5ydKNVperEFXf1gPHHnwu6sYQ/MLQPVKSpugLJ7YOsuB/9fZS4SECBgCpjJt6HKr5M/EBRcKd1z0hpWU7iVpzal34AFUTHS/NTPSOlU10J3WxVb0VFy9r2tK5Pes9H1ddR0YimTUzHRQ+59BxDUaCkIrNwfr22xNuSUh0Pv+jH/4GQWuHBn78sCV+e6u6pnpmKy/yALPxYhVP9sGgVi1MzD7/OTHr7xUrrtR6thXFyFPjox1FqBM072v5J67mZ1vVWMe7f/vY3FwBv2bLF/a3jm3g9du/e7YoK33rrrSTnW+LjHE1G3GtjgUDpPKdf5oru77rrrqjjq1WrZtmNbgTiN3nXPtCF+89//jNq+tT8MouX/RhUNyW8YntmLydxKyI/t0i/ZtVsOblcQdU3Uq6DmrbrV7GmUQX6119/PaIuVGZtd2ZIb6tHBV1+faq0dN+R0r5THRnVz1Mgpu5LVFdKwZwq1KseWOLGDNFajaV1HumV1ms9uRZuv1XQvDNimWmdhxoP+AGectB0rqgemOrJDR8+3A1XEKV7nOq56ZgojSrSq16m6kql5hhlxL02FgiUznO6ISmHJaVfQUqn4gVdJMnlhqSl+KhChQruF70unvBfSn42u8ZnNG2rHqrKgfB/jWnbVIR1+eWXp3iDCdq+WO1Hfx+p0ml4LoqK41REkdpft1mJX3FdD83UrL8q9arYSh/9IlUlbrVqTGvl9PSej7q567zxc8LC6bikZrl+WlVWTzx9Rl0HrVq1crlB//rXv1xLqozqZkKtF1VEquBULbV8Ov8yeh7+uaEfO8mdG8ldp6m91rOSzL5XqthXgav6c7v11ltdQKQAe8eOHa6lXXiDIBUtZvf9Tx2l85x+ka9Zs8Y9vKP9ylPrB7/MW78g1boquV/luqASN0sPomKSPXv2RLSe0fLUikI5BrpQM5KKR9TyT0GKWq35F7P2gX4tqfVVYlqf8O0J2r5Y7Uc9LJQ9/r//+78R06tVn7K4dcOLNypC0w1VLbr8YtJwftN0HbPE2f36taqiTbWeS6v0no8KOPQrXD2pq2jCp5ZY0c6HxFSUrvWeNm1axHqr7qHmkVHHUE3NlROgeer/aLlpr7zyimvJlBZ+wBU+PwXq6rIio+ehIFjFSmp5l/j6SHz9SOI0abnWs5LMvleKzhEFr88880zgMdLffrcM4bLb/idHKc7o5pm4cqSoQl3iehmpMWLECFferGxWZZ/qIaVKevr1oCbUag6sbFn1P6MgQw9k/XJWU2v9ulFTbY1Tk1XR9PrFoM7s9MDSTU3l30GVRNUviZarfkPUb4mWqSaiuhGmtdJpOGUH66YvetiqKwBVJNTNRl0M6FeSTzcZfVezbFUkVjNvVZTUdmoa3QjUS7a/fWp6rzJ8Fd3pAadcgFjtR+VmqAsCBV6aV9euXV0uhB4wamqe0R1CZgb9YlbPwMoRUvcNqlSqehs6pqrsqZymefPmubo2qnukY6M+ZfTA0D5TtwhqLp9Wv+V81P5ftGiRKwZUzpb/ENP6p1T/S+eaigu1nToXVWHX7x5A66CuMTKKzlPVBdL+0b7UvlMlYV0XCvQUJKlbi7TQvUc5VaorpsrF+gGiysBpKdZM7Tx0buj6u/baa12xrPaZchR1T9R2+YGprh/RvBTE6iGv5vZpudbTQ8fdv+8kpm5JgjqVTMm5vFcG0fWnumCPPfaY65dNRW36AaNuPHQt6jpU45RodeNitf/PmVg3u8Nv7x5AH41PT/cAcvToUW/UqFFelSpVXFPzYsWKeS1btvQeffRR1wTVp6asjzzyiFejRg2Xrnjx4l6nTp28DRs2hNJs27bNa926tZc/f363vJS6Cti7d683YMAAt0zNs27duhHbktI2ReM369VHzVbVVFbN+m+55Rbvww8/DJxu+vTprom11r1QoUJuXe666y7v+++/D6XZs2ePWw+N1/zDuwrIjP2YuHuA8O4ANL88efJ4JUuW9G6//fYkzae1rtoPiWne2mcZ2T1Aapbjdw8we/bsqPPZtGmT17NnT69o0aKuCwxNe8MNN3hLly4NNfEfMWKEV79+fXc81KWC/n7yySfTvd2pPR+jbf+KFSvc+aPp1OWBmngrTWpvs7NmzXLN/LWtF198sXfjjTd63377bZJ11nYmlpblyGuvveaahGs5uXPn9kqXLu317t3bW758eSiNf66tW7cuYlr/uOl/3/vvv+81b97cna9lypRx180777yTJF3QsUjLPGTVqlWumwj/uNerV8974oknIq6xwYMHu2tL94DE+yY113pa7jkpdQ8Qfs0G7deU9k9qzk2/ewDdX1Irue184YUXIp4v6vJC3UWoSwKth+6pH3/8cZJnUEbs/6wkh/6JdbAGAACQFVFHCQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAASgw8l0UieBemO5OvpKz1vfAQBA5lOvSOqwVp35Jn7RcDQESumkIEnvCwMAAPHnm2++cb37p4RAKZ38LuO1o9WVOwAAyPqOHDniMjpS++oXAqV08ovbFCQRKAEAEF9SW22GytwAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAeiZGwAAZBlnznq2dtdB23f0pJUolGBNK11suXLG7uXzBEoAACBLWLTlB7t/3mf2w+GToWGliyTYuGtrWcc6pWOyThS9AQCALBEk3f7KxoggSfYcPumGa3wsECgBAICYF7fdP+8z86KM84dpvNJlNgIlAAAQU2t3HUySkxRO4ZHGK11mI1ACAAAxte/oyQxNl5EIlAAAQEyVKJSQoekyEoESAACIqaaVLnat24I6AdBwjVe6zEagBAAAYipXzhyuCwBJHCz53zU+Fv0pESgBAICY61intD31h8usVJHI4jV91/BY9aNEh5MAACBL6FintF1dqxQ9cwMAAESj4rUWlxa1rIKiNwAAgKwaKE2dOtUqVqxoCQkJ1qxZM1u7dm2y6WfPnm01atRw6evWrWsLFy5Mkmbr1q3WtWtXK1KkiBUoUMCaNGliu3fvDo3fs2eP3XTTTVaqVCk3/rLLLrM5c+ack+0DAADxK6aB0qxZs2z48OE2btw427hxo9WvX986dOhg+/bti5p+9erV1rdvXxs4cKBt2rTJunfv7j5btmwJpdm5c6e1atXKBVPLly+3zZs327333usCK1+/fv1s+/bt9tZbb9knn3xiPXv2tBtuuMHNEwAAwJfD87zMf3HK/6ccJOX2TJkyxX0/e/aslStXzgYPHmwjR45Mkr537952/Phxmz9/fmhY8+bNrUGDBjZt2jT3vU+fPpYnTx57+eWXA5dbsGBBe+qpp1yukq9o0aL20EMP2R//+MdUrfuRI0dcjtXhw4etcOHCadpuAAAQG2l9fscsR+n06dO2YcMGa9eu3X9XJmdO933NmjVRp9Hw8PSiHCg/vQKtBQsWWLVq1dzwEiVKuGBs7ty5EdO0bNnS5WYdPHjQTTNz5kw7efKkXXHFFedkWwEAQHyKWaB04MABO3PmjJUsWTJiuL6rDlE0Gp5cehXZHTt2zCZOnGgdO3a0xYsXW48ePVzR2ooVK0LT/Pvf/7ZffvnF5SLly5fPbr31VnvjjTesSpUqget76tQpF4WGfwAAQPaWrfpRUu6QdOvWzYYNG+b+VrGc6japaK5NmzZumOosHTp0yN59910rVqyYy3FSHaWVK1e6CuLRTJgwwe6///5M3BoAAHDe5igpQMmVK5ft3bs3Yri+qzVaNBqeXHrNM3fu3Far1v91g+6rWbNmqNWbKnurTtTzzz9vV111latArsrkjRs3di3wgowaNcqVZ/qfb775Jt3bDgAA4kPMAqW8efNao0aNbOnSpRE5QvreokWLqNNoeHh6WbJkSSi95qnK4WrRFm7Hjh1WoUIF9/eJEydC9aHCKWjzc6SiURGdKn2FfwAAQPYW06I3dQ3Qv39/l5vTtGlTmzx5smvVNmDAgFAz/rJly7piLxk6dKgrPps0aZJ17tzZVcJev369TZ8+PTTPESNGuNZxrVu3trZt29qiRYts3rx5rqsAUbcBqoukekmPPvqoq6ekojcFXOGt6QAAAMyLsSeeeMIrX768lzdvXq9p06beBx98EBrXpk0br3///hHp//3vf3vVqlVz6WvXru0tWLAgyTyfe+45r0qVKl5CQoJXv359b+7cuRHjd+zY4fXs2dMrUaKEd8EFF3j16tXzXnrppTSt9+HDh9WtgvsfAADEh7Q+v2Paj1I8ox8lAADiT9z0owQAAJDVESgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACCrBkpTp061ihUrWkJCgjVr1szWrl2bbPrZs2dbjRo1XPq6devawoULk6TZunWrde3a1YoUKWIFChSwJk2a2O7duyPSrFmzxq688ko3vnDhwta6dWv7+eefM3z7AABA/IppoDRr1iwbPny4jRs3zjZu3Gj169e3Dh062L59+6KmX716tfXt29cGDhxomzZtsu7du7vPli1bQml27txprVq1csHU8uXLbfPmzXbvvfe6wCo8SOrYsaO1b9/eBWbr1q2zO++803LmjHncCAAAspAcnud5sVq4cpCU2zNlyhT3/ezZs1auXDkbPHiwjRw5Mkn63r172/Hjx23+/PmhYc2bN7cGDRrYtGnT3Pc+ffpYnjx57OWXXw5crqa5+uqrbfz48ele9yNHjrgcq8OHD7scKQAAkPWl9fkdsyyU06dP24YNG6xdu3b/XZmcOd135fhEo+Hh6UU5UH56BVoLFiywatWqueElSpRwwdjcuXND6ZVb9eGHH7pxLVu2tJIlS1qbNm1s1apVya7vqVOn3M4N/wAAgOwtZoHSgQMH7MyZMy5QCafve/bsiTqNhieXXkHQsWPHbOLEia5obfHixdajRw/r2bOnrVixwqX58ssv3f/33Xef3XLLLbZo0SK77LLL7KqrrrLPP/88cH0nTJjgIlD/o5wvAACQvWWrSjnKUZJu3brZsGHDXJGcivC6dOkSKprz09x66602YMAAa9iwoT3++ONWvXp1e/755wPnPWrUKJdN53+++eabTNoqAAAQK7ljteBixYpZrly5bO/evRHD9b1UqVJRp9Hw5NJrnrlz57ZatWpFpKlZs2aoaK106dLu/2hpEreMC5cvXz73AQAA54+Y5SjlzZvXGjVqZEuXLg0NU26Pvrdo0SLqNBoenl6WLFkSSq95qnL49u3bI9Ls2LHDKlSo4P5WVwRlypRJNg0AAEBMc5REXQP079/fGjdubE2bNrXJkye7Vm0qEpN+/fpZ2bJlXf0gGTp0qKt4PWnSJOvcubPNnDnT1q9fb9OnTw/Nc8SIEa51nPpFatu2rauDNG/ePNdVgOTIkcOlUZcE6o5AxXMvvviibdu2zV577bUY7QkAAJAVxTRQUkCzf/9+Gzt2rKuQraBFgY1fYVtFYeF9G6mV2quvvmpjxoyx0aNHW9WqVV2Ltjp16oTSqPK26iMpuBoyZIirezRnzhzXt5Lvz3/+s508edLVYzp48KALmJQzdemll2byHgAAAFlZTPtRimf0owQAQPyJm36UAAAAsjoCJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAsnKgNHXqVKtYsaIlJCRYs2bNbO3atcmmnz17ttWoUcOlr1u3ri1cuDBJmq1bt1rXrl2tSJEiVqBAAWvSpInt3r07STrP86xTp06WI0cOmzt3boZuFwAAiG8xD5RmzZplw4cPt3HjxtnGjRutfv361qFDB9u3b1/U9KtXr7a+ffvawIEDbdOmTda9e3f32bJlSyjNzp07rVWrVi6YWr58uW3evNnuvfdeF1glNnnyZBckAQAAJJbDU5ZKDCkHSbk9U6ZMcd/Pnj1r5cqVs8GDB9vIkSOTpO/du7cdP37c5s+fHxrWvHlza9CggU2bNs1979Onj+XJk8defvnlZJf90UcfWZcuXWz9+vVWunRpe+ONN1zQlRpHjhxxuVWHDx+2woULp3GrAQBALKT1+R3THKXTp0/bhg0brF27dv9doZw53fc1a9ZEnUbDw9OLcqD89Aq0FixYYNWqVXPDS5Qo4YKxxMVqJ06csN///veu2K9UqVIpruupU6fczg3/AACA7C2mgdKBAwfszJkzVrJkyYjh+r5nz56o02h4culVZHfs2DGbOHGidezY0RYvXmw9evSwnj172ooVK0LTDBs2zFq2bGndunVL1bpOmDDBRaD+R7leAAAge8tt2YxylEQBkIIhUbGc6japaK5Nmzb21ltv2bJly1wdp9QaNWqUq0vlU44SwRIAANlbTHOUihUrZrly5bK9e/dGDNf3oOIwDU8uveaZO3duq1WrVkSamjVrhlq9KUhShe8LL7zQpdVHevXqZVdccUXU5ebLl8+VZYZ/AABA9hbTQClv3rzWqFEjW7p0aUSOkL63aNEi6jQaHp5elixZEkqveapy+Pbt2yPS7NixwypUqOD+ViVxtYRTZW7/I48//rjNmDEjw7cTAADEp5gXvak4q3///ta4cWNr2rSpa66vVm0DBgxw4/v162dly5Z1dYRk6NChrvhs0qRJ1rlzZ5s5c6ZrtTZ9+vTQPEeMGOFax7Vu3dratm1rixYtsnnz5rmuAkS5T9FyrMqXL2+VKlXKtG0HAABZW8wDJQU0+/fvt7Fjx7oK2apPpMDGr7Ct4jK1hPOpAvarr75qY8aMsdGjR1vVqlVdi7Y6deqE0qjytuojKbgaMmSIVa9e3ebMmeP6VgIAAIibfpTiFf0oAQAQf+KqHyUAAICsjEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgMwKlkydP2qOPPpqRswQAAIifQGn//v02f/58W7x4sZ05c8YN++WXX+wf//iHVaxY0SZOnHgu1hMAACDT5U5L4lWrVlmXLl3syJEjliNHDmvcuLHNmDHDunfvbrlz57b77rvP+vfvf+7WFgAAIKvmKI0ZM8auueYa27x5sw0fPtzWrVtnPXr0sAcffNA+++wzu+222yx//vznbm0BAAAyUQ7P87zUJi5atKitXLnSatWqZT///LMVLFjQXn/9devWrZudb5SrVqRIETt8+LAVLlw41qsDAADOwfM7TTlKP/30kxUrVsz9rZyjCy64wOrUqZOWWQAAAGTPOkqiIrY9e/a4v5UZtX37djt+/HhEmnr16mXcGgIAAMRD0VvOnDldJe5ok/jD9b/fGi47o+gNAIDs//xOU47Srl27fsu6AQAAxJU0BUoVKlQ4d2sCAACQxaSpMvfDDz/sWrv53n//fTt16lTo+9GjR+2OO+7I2DUEAACIhzpKuXLlsh9++MFKlCjhvqts76OPPrLKlSu773v37rUyZcpQRwkAAJx/3QMkjqnSEGMBAACc3y/FBQAAyE4IlAAAADKqw8lnn33WvbpEfv31V3vhhRdCvXWrMjcAAMB5WZm7YsWKrkPJlJwP/S3R4SQAAPHnnHY4+dVXX/2WdQMAAMi+dZSWLVtmtWrVctFYYorMateubStXrkzzSkydOtXlViUkJFizZs1s7dq1yaafPXu21ahRw6WvW7euLVy4MEmarVu3WteuXV3UWKBAAWvSpInt3r3bjTt48KANHjzYqlev7l7uW758eRsyZIjbBgAAgHQFSpMnT7ZbbrklalaVApJbb73VHnvssbTM0mbNmmXDhw+3cePG2caNG61+/frWoUMH27dvX9T0q1evtr59+9rAgQNt06ZN1r17d/fZsmVLKM3OnTutVatWLphavny5bd682e69914XWMn333/vPo8++qibTvWsFi1a5OYJAACQrjpKeoWJAoqaNWtGHb9t2zZr3759KOcmNZSDpNyeKVOmuO9nz561cuXKuRyfkSNHJknfu3dvO378uM2fPz80rHnz5tagQQObNm2a+96nTx/LkyePvfzyy6leD+VS/eEPf3Dzzp075RJJ6igBABB/zmmHk+p5WwFIEAUY+/fvT/X8Tp8+bRs2bLB27dr9d4Vy5nTf16xZE3UaDQ9PL8qB8tMr0FqwYIFVq1bNDVcv4grG5s6dm+y6+DssNUESAAA4P6QpUCpbtmxEEVdiKuIqXbp0qud34MAB97qTkiVLRgzX9z179kSdRsOTS68iu2PHjtnEiROtY8eOtnjxYuvRo4f17NnTVqxYEbge48ePtz/96U+B66p32ikKDf8AAIDsLU2B0jXXXOPq+pw8eTLJOL0sV/WMunTpYrGkHCXp1q2bDRs2zBXJqQhP6+UXzYVTwNO5c2dXSf2+++4LnO+ECRNcVp3/UfEgAADI3tIUKI0ZM8a1GFOx1sMPP2xvvvmm+zz00EOuBZnG3XPPPamenzqq1It2VaQXTt9LlSoVdRoNTy695qniMwU+4VSvKnHdKXWQqVynQoUK2RtvvJFsseKoUaNc8Zz/+eabb1K9nQAA4DwIlFTEpVZnderUcYGDirT0GT16tBu2atWqJMViycmbN681atTIli5dGpEjpO8tWrSIOo2Gh6eXJUuWhNJrnqocvn379og0O3bscJXRw3OSVPFc6d96661Qi7gg+fLlc3WYwj8AACB7S3PNZQUb6rfop59+si+++MLUaK5q1ap20UUXpWsF1DVA//79rXHjxta0aVPXBYFang0YMMCN79evn6sbpaIvGTp0qLVp08YmTZrkisxmzpxp69evt+nTp4fmOWLECNc6rnXr1ta2bVvXUm/evHmuq4DwIOnEiRP2yiuvRNQ5Kl68uMvlAgAAUKATc0888YRXvnx5L2/evF7Tpk29Dz74IDSuTZs2Xv/+/SPS//vf//aqVavm0teuXdtbsGBBknk+99xzXpUqVbyEhASvfv363ty5c0Pj3nvvPXWJEPWza9euVK3z4cOHXXr9DwAA4kNan99p6kcJ/0U/SgAAxJ9z2o8SAADA+YRACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAWTlQmjp1qlWsWNESEhKsWbNmtnbt2mTTz54922rUqOHS161b1xYuXJgkzdatW61r165WpEgRK1CggDVp0sR2794dGn/y5EkbNGiQFS1a1AoWLGi9evWyvXv3npPtAwAA8SnmgdKsWbNs+PDhNm7cONu4caPVr1/fOnToYPv27YuafvXq1da3b18bOHCgbdq0ybp37+4+W7ZsCaXZuXOntWrVygVTy5cvt82bN9u9997rAivfsGHDbN68eS7oWrFihX3//ffWs2fPTNlmAAAQH3J4nufFcgWUg6TcnilTprjvZ8+etXLlytngwYNt5MiRSdL37t3bjh8/bvPnzw8Na968uTVo0MCmTZvmvvfp08fy5MljL7/8ctRlHj582IoXL26vvvqqXXfddW7Ytm3brGbNmrZmzRo3v5QcOXLE5VZpXoULF0739gMAgMyT1ud3THOUTp8+bRs2bLB27dr9d4Vy5nTfFbBEo+Hh6UU5UH56BVoLFiywatWqueElSpRwwdjcuXND6bXMX375JWI+yn0qX7584HJPnTrldm74BwAAZG8xDZQOHDhgZ86csZIlS0YM1/c9e/ZEnUbDk0uvIrtjx47ZxIkTrWPHjrZ48WLr0aOHK1ZTEZs/j7x589qFF16Y6uVOmDDBRaD+R7leAAAge4t5HaWMphwl6datm6uHpCI5FeF16dIlVDSXHqNGjXLZdP7nm2++ycC1BgAAWVHuWC68WLFilitXriStzfS9VKlSUafR8OTSa565c+e2WrVqRaRR/aNVq1aF5qFiv0OHDkXkKiW33Hz58rkPAAA4f8Q0R0nFX40aNbKlS5dG5Ajpe4sWLaJOo+Hh6WXJkiWh9JqnKodv3749Is2OHTusQoUK7m8tU5W9w+ej9Oo+IGi5AADg/BPTHCVR1wD9+/e3xo0bW9OmTW3y5MmuVduAAQPc+H79+lnZsmVdHSEZOnSotWnTxiZNmmSdO3e2mTNn2vr162369OmheY4YMcK1jmvdurW1bdvWFi1a5LoCUFcBojpG6l5Ay7744otdrXe1slOQlJoWbwAA4PwQ80BJAc3+/ftt7NixriK16hQpsPErbCuXRy3hfC1btnTN+seMGWOjR4+2qlWruhZtderUCaVR5W3VR1JwNWTIEKtevbrNmTPH9a3ke/zxx9181dGkWrSphdyTTz6ZyVsPAACyspj3oxSv6EcJAID4E1f9KAEAAGRlBEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAGTlQGnq1KlWsWJFS0hIsGbNmtnatWuTTT979myrUaOGS1+3bl1buHBhxPibb77ZcuTIEfHp2LFjRJodO3ZYt27drFixYla4cGFr1aqVvffee+dk+wAAQHyKeaA0a9YsGz58uI0bN842btxo9evXtw4dOti+ffuipl+9erX17dvXBg4caJs2bbLu3bu7z5YtWyLSKTD64YcfQp9//etfEeO7dOliv/76qy1btsw2bNjglqthe/bsOafbCwAA4kcOz/O8WK6AcpCaNGliU6ZMcd/Pnj1r5cqVs8GDB9vIkSOTpO/du7cdP37c5s+fHxrWvHlza9CggU2bNi2Uo3To0CGbO3du1GUeOHDAihcvbv/5z3/sd7/7nRt29OhRl7O0ZMkSa9euXYrrfeTIEStSpIgdPnzYTQcAALK+tD6/Y5qjdPr0aZebEx6Y5MyZ031fs2ZN1Gk0PHEgoxyoxOmXL19uJUqUsOrVq9vtt99uP/74Y2hc0aJF3fCXXnrJBV3KWXr66add+kaNGkVd7qlTp9zODf8AAIDsLXcsF66cnTNnzljJkiUjhuv7tm3bok6jorFo6cOLzFTs1rNnT6tUqZLt3LnTRo8ebZ06dXLBVK5cuVydpXfffdcV2RUqVMgFZwqSFi1aZBdddFHU5U6YMMHuv//+DNluAAAQH2IaKJ0rffr0Cf2tyt716tWzSy+91OUyXXXVVabSxkGDBrngaOXKlZY/f3579tln7dprr7V169ZZ6dKlk8xz1KhRri6VTzlKKiIEAADZV0yL3tTiTDk8e/fujRiu76VKlYo6jYanJb1UrlzZLeuLL75w31WBW3WcZs6caZdffrlddtll9uSTT7qA6cUXX4w6j3z58rmyzPAPAADI3mIaKOXNm9fVCVq6dGlomCpz63uLFi2iTqPh4elFFbCD0su3337r6ij5OUUnTpxw/6vILZy+a/kAAAAuNoj1blBx1jPPPONycrZu3eoqXquC9YABA9z4fv36uWIv39ChQ11dokmTJrl6TPfdd5+tX7/e7rzzTjf+2LFjNmLECPvggw/sq6++ckGV+kuqUqWKq/QtCqpUF6l///728ccfuz6VNM2uXbusc+fOMdoTAAAgq4l5HSU199+/f7+NHTvWVchWM38FQn6F7d27d0fk/LRs2dJeffVVGzNmjKukXbVqVdcNQJ06ddx4FeVt3rzZBV7qIqBMmTLWvn17Gz9+vCs+ExXDaRn33HOPXXnllfbLL79Y7dq17c0333T9KQEAAGSJfpTiFf0oAQAQf+KqHyUAAICsjEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQAACJQAAgAAESgAAAAEIlAAAAAIQKAEAAAQgUAIAAAhAoAQAABCAQAkAACAAgRIAAEAAAiUAAIAABEoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAACECgBAAAEIBACQAAIACBEgAAQIDcQSOQ+c6c9WztroO27+hJK1EowZpWuthy5czBoQAAIEYIlLKIRVt+sPvnfWY/HD4ZGla6SIKNu7aWdaxTOqbrBgDA+YqitywSJN3+ysaIIEn2HD7phms8AADIfARKWaC4TTlJXpRx/jCNVzoAAJC5CJRiTHWSEuckhVN4pPFKBwAAMheBUoyp4nZGpgMAABmHQCnG1LotI9MBAICMQ6AUY+oCQK3bgjoB0HCNVzoAAJC5CJRiTP0kqQsASRws+d81nv6UAAA4TwOlqVOnWsWKFS0hIcGaNWtma9euTTb97NmzrUaNGi593bp1beHChRHjb775ZsuRI0fEp2PHjknms2DBAre8/Pnz20UXXWTdu3e3WFA/SU/94TIrVSSyeE3fNZx+lAAAOE87nJw1a5YNHz7cpk2b5oKWyZMnW4cOHWz79u1WokSJJOlXr15tffv2tQkTJliXLl3s1VdfdQHOxo0brU6dOqF0CoxmzJgR+p4vX76I+cyZM8duueUWe/DBB+3KK6+0X3/91bZs2WKxomDo6lql6JkbAIAsJIfneTHtoEfBUZMmTWzKlCnu+9mzZ61cuXI2ePBgGzlyZJL0vXv3tuPHj9v8+fNDw5o3b24NGjRwwZafo3To0CGbO3du1GUqKFIO1v33328DBw5M13ofOXLEihQpYocPH7bChQunax4AACBzpfX5HdOit9OnT9uGDRusXbt2/12hnDnd9zVr1kSdRsPD04tyoBKnX758ucuRql69ut1+++32448/hsYp9+m7775zy2rYsKGVLl3aOnXqlGyO0qlTp9zODf8AAIDsLaaB0oEDB+zMmTNWsmTJiOH6vmfPnqjTaHhK6VXs9tJLL9nSpUvtoYceshUrVrhASMuSL7/80v1/33332ZgxY1zulOooXXHFFXbwYPSOHVXUpwjU/yjXCwAAZG9ZojJ3RuvTp4917drVVfRW/SUFQuvWrXO5TH7xntxzzz3Wq1cva9SokavPpErfqigezahRo1w2nf/55ptvMnWbAADAeRYoFStWzHLlymV79+6NGK7vpUqVijqNhqclvVSuXNkt64svvnDfVdQmtWr9X7N8v7K30u3evTvqPDReZZnhHwAAkL3FNFDKmzevy81REZlPuT363qJFi6jTaHh4elmyZElgevn2229dHSU/QNIyFfioZZ3vl19+sa+++soqVKiQAVsGAACyg5h3D6CuAfr372+NGze2pk2buu4B1KptwIABbny/fv2sbNmyro6QDB061Nq0aWOTJk2yzp0728yZM239+vU2ffp0N/7YsWOuNZuK1JTLtHPnTrvrrrusSpUqrtK3KDfotttus3Hjxrm6RgqOHnnkETfu+uuvj9m+AAAAWUvMAyU199+/f7+NHTvWVchWM/9FixaFKmyrKEyt03wtW7Z0fSepEvbo0aOtatWqrhsAvw8lFeVt3rzZXnzxRddFQJkyZax9+/Y2fvz4iL6UFBjlzp3bbrrpJvv5559dNwXLli1zlboBAACyRD9K8Yp+lAAAiD9x1Y8SAABAVhbzord45WfE0fEkAADxw39up7ZAjUApnY4ePer+p+NJAADi8zmuIriUUEcpndSNwffff2+FChVyHVVmxYhZQZw6xjyf+nw6X7db2Pbz77hzzDnm55MjGXR/V06SgiQ19gpvLBaEHKV00s695JJLLKs7XzvHPF+3W9j28++4c8w55ueTwhlwf09NTpKPytwAAAABCJQAAAACEChlU+pcUz2Ph3eyeT44X7db2Pbz77hzzDnm55N8Mbq/U5kbAAAgADlKAAAAAQiUAAAAAhAoAQAABCBQAgAACECglEVNnTrVKlasaAkJCdasWTNbu3ZtqqabOXOm6ym8e/fuoWG//PKL3X333Va3bl0rUKCA6420X79+rmfxcFqepg3/TJw40eJ52+Xmm29Osl0dO3aMSHPw4EG78cYbXSdmF154oQ0cONCOHTtm8bzdibfZ/zzyyCNxfcxfeOGFJOus6RL3vDt27FgrXbq05c+f39q1a2eff/55XB/zlLY7O1/nqTnm8XKdn4ttj5drfWoa73GHDh2yQYMGuetYLd2qVatmCxcuTNM8T5486eZRtGhRK1iwoPXq1cv27t2bthX3kOXMnDnTy5s3r/f88897n376qXfLLbd4F154obd3795kp9u1a5dXtmxZ73e/+53XrVu30PBDhw557dq182bNmuVt27bNW7Nmjde0aVOvUaNGEdNXqFDBe+CBB7wffvgh9Dl27JgXz9su/fv39zp27BixXQcPHoxIo/H169f3PvjgA2/lypVelSpVvL59+3rxvN3h26uP5p0jRw5v586dcX3MZ8yY4RUuXDhinffs2RORZuLEiV6RIkW8uXPneh9//LHXtWtXr1KlSt7PP/8ct8c8pe3Oztd5ao55PFzn52rb4+Fan5nG7T516pTXuHFj75prrvFWrVrl7nXLly/3PvroozTN87bbbvPKlSvnLV261Fu/fr3XvHlzr2XLlmladwKlLEg3t0GDBoW+nzlzxitTpow3YcKEwGl+/fVXd/CfffZZd8NI/NBMbO3atXptsvf1119HXEiPP/64l922PaX98dlnn7l9sW7dutCwt99+291ovvvuOy+7HHONv/LKKyOGxeMx14NDQVCQs2fPeqVKlfIeeeSRiCAiX7583r/+9a+4PeYpbXd2vs5Ts+3xcJ1n1nHPitd60zRu91NPPeVVrlzZO336dLrnqes+T5483uzZs0Nptm7d6s4D/ZBILYrespjTp0/bhg0bXFFB+Hvl9H3NmjWB0z3wwANWokQJl5WcGocPH3ZZr8p+DqesWGVRNmzY0GXb/vrrr5Ydtn358uUuTfXq1e3222+3H3/8MTRO89Z+aNy4cWiYlqllf/jhh5YdjrmymhcsWBA1bTwecxWXVKhQwb0gs1u3bvbpp5+Gxu3atcv27NkTMU+910nZ8v484/WYJ7fd2f06T822Z+XrPLOOe1a81k+nY7vfeusta9GihSs2K1mypNWpU8cefPBBO3PmTKrnqfEqkg5PU6NGDStfvnyy+zsxXoqbxRw4cMCdCDoxwun7tm3bok6zatUqe+655+yjjz5K1TJUZqu6DH379o14seCQIUPssssus4svvthWr15to0aNsh9++MEee+wxi+dtVz2Fnj17WqVKlWznzp02evRo69Spk7tQcuXK5R6qurmGy507t9sPGpcdjvmLL75ohQoVcvshXDwecz0En3/+eatXr54LBB599FFr2bKle3joRdX+MYs2T39cPB7zlLY7O1/nqdn2rH6dZ9Zxz4rX+oF0bPeXX35py5Ytc3XKVC/piy++sDvuuMMFPuqdOzXz1HHNmzdvkh8K4feC1CBQinNHjx61m266yZ555hkrVqxYiul1kt1www2usutTTz0VMW748OGhv3VR6gS79dZbbcKECVny1RCp3fY+ffqE/lZFV23bpZde6n59XnXVVZbdj7noRqsbTuJKoPF2zEW/MvXx6aFRs2ZNe/rpp238+PGWXaVlu7PTdZ7abc9u13l6z/fscq2fPXvWBbbTp093gW6jRo3su+++czlhCpQyE4FSFqMHn06KxLXy9b1UqVJJ0uuX01dffWXXXnttxAnm/1ravn27u1mE3zy//vprF6mH/8qMRkUVyprV/PWrJp63PVzlypXdsvQLRTdQzXvfvn0RabTdaiETbbnxtt0rV650w2bNmpXiumT1Yx5Nnjx5XFGCjqf402keai0TPs8GDRqE0sTTMU/Ndvuy23Welm3Pytd5Zmx7Vr3Wi6Vju3Xtals1nU8BonKCVOyWmnnqf6VV67nwXKW07G+hjlIWoyhfkfPSpUsjHoL6Hv6rIry89ZNPPnFFMP6na9eu1rZtW/e3yrTDb55qIv3uu++6cuqUaHqV+SbOro63bU/s22+/dXUX/Ieo5q0LSeXZPj1gtGzdTOJ9u1VEp/nXr18/7o95NMp+1/7wj6eKXnQTDJ/nkSNHXD0Uf57xdsxTs93Z9TpP7bZn9es8M7Y9q17redOx3ZdffrkLBv0fgbJjxw633Zpfauap8Qq2wtMokNy9e3eq97eT6mrfyDRq8qgWOi+88IJrqfGnP/3JNXn0m4TedNNN3siRI1Pd+kOtBtQ8+pJLLnFNK8Obh6oJpqxevdq1iNB4NSl95ZVXvOLFi3v9+vXz4nnbjx496v31r391LRzUvPTdd9/1LrvsMq9q1areyZMnI5oNN2zY0Pvwww9dU1SNz+ym4hm53b7Dhw97F1xwgWtBkli8HvP777/fe+edd9w6b9iwwevTp4+XkJDgmgeHdw+gebz55pve5s2b3b6J1j1APB3zlLY7O1/nKW17vFzn52Lb4+Van5nG7d69e7dXqFAh78477/S2b9/uzZ8/3ytRooT3t7/9LdXz9LsHKF++vLds2TLXPUCLFi3cJy0IlLKoJ554wh1c9RGhJpDq98PXpk0b92BM7UNTNw7FxNE+7733nkujC7BZs2auGaouwpo1a3oPPvhgxE0mHrf9xIkTXvv27d1NQc1E1URWfW0k7ofkxx9/dDfMggULuj5LBgwY4G6+8brdvqefftrLnz+/ayabWLwe8z//+c+htCVLlnT9rGzcuDFJFwH33nuvG68b6VVXXeVutvF8zFPa7ux8nae07fF0nZ+L8z1ervUn0niPU4Cn9dY1rK4C/v73v7tuUVI7T9GPozvuuMO76KKLXCDZo0cP9+MhLXLon7RmowEAAJwPqKMEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAhBTL7zwQpK3e59vcuTIYXPnzs3UZeodX1quXmXxW1SsWNEmT56c5bYPyCgESkA2cfPNN7sHkj56v1HJkiXt6quvdm8TD39fUmrcd999oRfIZqRoD9XevXu7dzida1dccUVo/+ij/XP99de7l8dm1jITfzQeQNZGoARkIx07drQffvjB5Ra8/fbb7kW5Q4cOtS5durg3hWdF+fPnz7QXst5yyy1u/3z//ff25ptv2jfffGN/+MMfztnyXn/9dbc8fdauXeuG6WW1/jCNTw+9UCGrHk8guyFQArKRfPnyWalSpaxs2bJ22WWX2ejRo11AoKBJRVw+vUX9j3/8oxUvXtwKFy5sV155pX388cdunNLdf//97ruf8+FPm9x0vnnz5lmTJk0sISHBihUrZj169HDDlXui3Jthw4aF5htU9PbUU0/ZpZde6t4QXr16dXv55ZcjxmvaZ5991s37ggsusKpVq9pbb72V4v5RWu0fvYG8efPmduedd9rGjRsj0qxYscKaNm3q9qXSjRw5MhSUvPTSS1awYEH7/PPPQ+nvuOMOq1Gjhp04cSLJ8i6++GK3PH20z6Ro0aKhYRrvO3DgQOD2LF++3G2zjqPeiK51W7VqlcspnDBhglWqVMkFnHpr/GuvvRaa7qeffrIbb7zRLVvjNd8ZM2ZErOOXX37pAmotV9OvWbMmYvycOXOsdu3abpnKEZw0aVKy+1j7pnXr1u7416pVy5YsWZLicQGytPS82A5A1hP0YlypX7++16lTp9D3du3aeddee623bt06b8eOHd5f/vIXr2jRou6loXrBqL7Xrl079PZ5DUtpOtEbvnPlyuWNHTvWvc1bbyvXyzdFafRm+wceeCA0X5kxY4Z7Wafv9ddfdy82nTp1qnuR7aRJk9w89fZvn25dmterr77qff75596QIUPci0799YhGL90cOnRo6LvSalvatm0bGvbtt9+6F2fqJZpbt2713njjDa9YsWLeuHHjQmmuv/56r0mTJt4vv/zitlfrqreSp8R/ae2mTZuSjEtpe/RSW6WpV6+et3jxYu+LL75w4/Qm9Ro1aniLFi1yb4XXvtQLRJcvX+6mGzRokNegQQN3vLT8JUuWeG+99VbE+mh6bYf29XXXXedeKKttE21Xzpw53THTeM1fL17V/z6l15vp5cyZM16dOnXcS4h17FesWOE1bNjQLUf7EohHBErAeRAo9e7d270xXFauXOnenJ74zeGXXnqpewO5KDBQcBUuNdO1aNHCu/HGGwPXMfyh6kscKLVs2dK9+T2cghO9Nd2nB++YMWNC348dO+aGvf3228kGSgpqChQo4IIhpa9WrZoLGHyjR4/2qlev7p09ezY0TAGbghYFAXLw4EEX1Nx+++3ube56o3lqpBQoJbc9fqA0d+7cUBodB22H3rAebuDAgV7fvn3d3woEBwwYkOz6PPvss6Fhn376qRumIFF+//vfe1dffXXEdCNGjPBq1aoV9Zi+8847Xu7cub3vvvsuNF7bQKCEeEbRG3Ae0LPYL+pSUdmxY8dcEZCKkfzPrl27bOfOnYHzSM10akF11VVX/aZ13bp1q11++eURw/Rdw8PVq1cv9HeBAgVcUeC+ffuSnbeKobSO2hYVXVWpUsXat29vR48eDS27RYsWoX3lL1vb/e2337rvF110kT333HOh4kEVzWWE1GxP48aNQ39/8cUXrrhPFfbDj4eKB/3jcfvtt9vMmTNdxfy77rrLVq9enexyVdQo/nKDjoWK186cOZNkXkpfrlw5K1OmTGiY9icQz3LHegUAnHt6gKkei+ihrwei6r0kllwz/dRMp3owmUUt+8IpuEmpdV+RIkVccCT6XwGPtmnWrFmu7lVq/ec//7FcuXK5CtnHjx+3QoUKWWZsjwKo8OMhCxYscHXSwqk+kXTq1MnVC1u4cKGrK6QgdtCgQfboo49GXa4fIKa1lSSQnZGjBGRzy5Yts08++cR69erlvquS9549eyx37twuWAj/qPK1qBJ14hyD1Eyn3ImlS5cGrku0+SZWs2ZNe//99yOG6bsqBmc0BTvy888/h5atysz/Vxr232UrELrkkkvcd+XKPPTQQ67SunJwVCE8FrQ/FBDt3r07yfFQro5PFbn79+9vr7zyiuuaYfr06aleRtCxqFatWmjfJU6vloQKIH0ffPBBurcRyArIUQKykVOnTrlgRsHI3r17bdGiRa5VlLoH6Nevn0vTrl07VxzSvXt3e/jhh91DT83llTOhVlcq3lHrJhWpqZhKAYIChdRMN27cOJdroSKpPn36uNZiys24++673bI1X+XGaJwe8n6AFW7EiBF2ww03WMOGDd0yFZCoGb2a1f9WKqrS/hHtn/Hjx7vWWSp+81uwKZgYPHiwC4C2b9/utmn48OGWM2dOV0R300032ZAhQ1xujfaNWvhde+21dt1111lm0jH561//6loRKgeoVatWdvjwYRfIqNhOwdHYsWNdKzm1WtO5MX/+fBfMpNZf/vIXt33aT+rvSkHklClT7Mknn4yaXsdL54WW/cgjj9iRI0fsnnvuycCtBmIg1pWkAGRcZW5d0vqoQm3x4sVdK7Xnn38+VBHZd+TIEW/w4MFemTJlXAXncuXKuUrYu3fvDlUU7tWrl3fhhRe6+fmtnFKaTubMmeNaWuXNm9e1GOvZs2do3Jo1a1zLLbXM8m8/iStzy5NPPulVrlzZLUMVrl966aWI8dEqB2se4a2xolXm9vePPhdddJEbFt6aTtRiTK3atP6lSpXy7r777lArMFWMrlu3bkSFdrXKu/jii12Lud9SmTu57fErc//0008RaVTpfPLkya4CuvaVjnmHDh1cazMZP368q8SvlmpaR1X2//LLLwPXR/PXMC3P99prr7nK25p/+fLlvUceeSTZCvpqHdeqVSu3/3Ts1CKPytyIZzn0TywCNAAAgKyOOkoAAAABCJQAAAACECgBAAAEIFACAAAIQKAEAAAQgEAJAAAgAIESAABAAAIlAACAAARKAAAAAQiUAAAAAhAoAQAABCBQAgAAsOj+H/J0DCCfyHwoAAAAAElFTkSuQmCC", - "text/plain": [ - "
| Current time: | 2025-12-07 19:23:17 |
| Running for: | 03:19:21.23 |
| Memory: | 4.4/15.9 GiB |
| Trial name | status | loc | text_det_box_thresh | text_det_thresh | text_det_unclip_rati\n", + "o | text_rec_score_thres\n", + "h | textline_orientation | use_doc_orientation_\n", + "classify | use_doc_unwarping | iter | total time (s) | CER | WER | TIME |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| trainable_paddle_ocr_d5238c33 | TERMINATED | 127.0.0.1:19452 | 0.623029 | 0.0887821 | 0 | 0.229944 | True | True | False | 1 | 374.278 | 0.0135159 | 0.105003 | 353.851 |
| trainable_paddle_ocr_ea8a2f7a | TERMINATED | 127.0.0.1:7472 | 0.671201 | 0.393201 | 0 | 0.168802 | False | False | False | 1 | 374.3 | 0.039052 | 0.132086 | 354.615 |
| trainable_paddle_ocr_ebb12e5b | TERMINATED | 127.0.0.1:21480 | 0.235725 | 0.432878 | 0 | 0.184435 | True | True | True | 1 | 379.544 | 0.0660624 | 0.166192 | 359.097 |
| trainable_paddle_ocr_b3775034 | TERMINATED | 127.0.0.1:23084 | 0.337744 | 0.0641288 | 0 | 0.576405 | False | True | True | 1 | 356.526 | 0.418109 | 0.50371 | 336.661 |
| trainable_paddle_ocr_bf10d370 | TERMINATED | 127.0.0.1:26140 | 0.690232 | 0.671955 | 0 | 0.39649 | True | True | True | 1 | 370.903 | 0.197252 | 0.295353 | 350.147 |
| trainable_paddle_ocr_111e5a9e | TERMINATED | 127.0.0.1:20664 | 0.483266 | 0.044816 | 0 | 0.546416 | False | True | False | 1 | 341.071 | 0.38641 | 0.455836 | 320.966 |
| trainable_paddle_ocr_415d7ba1 | TERMINATED | 127.0.0.1:23848 | 0.523385 | 0.0169971 | 0 | 0.208331 | True | True | True | 1 | 347.299 | 0.516069 | 0.59453 | 326.657 |
| trainable_paddle_ocr_a58d8109 | TERMINATED | 127.0.0.1:25248 | 0.670589 | 0.0402432 | 0 | 0.188585 | True | False | True | 1 | 346.09 | 0.502513 | 0.567716 | 326.916 |
| trainable_paddle_ocr_33bdf2a9 | TERMINATED | 127.0.0.1:24024 | 0.490009 | 0.434737 | 0 | 0.151906 | False | False | True | 1 | 388.151 | 0.0709203 | 0.17391 | 368.571 |
| trainable_paddle_ocr_d9df79f3 | TERMINATED | 127.0.0.1:5368 | 0.626194 | 0.178064 | 0 | 0.385477 | False | True | True | 1 | 384.677 | 0.116825 | 0.22213 | 364.623 |
| trainable_paddle_ocr_80ea65f2 | TERMINATED | 127.0.0.1:14064 | 0.251382 | 0.601112 | 0 | 0.313124 | False | True | True | 1 | 387.679 | 0.0645948 | 0.164937 | 366.607 |
| trainable_paddle_ocr_2e978bfa | TERMINATED | 127.0.0.1:11060 | 0.0777319 | 0.234859 | 0 | 0.0236948 | True | False | False | 1 | 380.281 | 0.0134006 | 0.107419 | 359.597 |
| trainable_paddle_ocr_8518cc40 | TERMINATED | 127.0.0.1:21016 | 0.000241868 | 0.222556 | 0 | 0.00289108 | True | False | False | 1 | 368.546 | 0.0134006 | 0.107419 | 347.929 |
| trainable_paddle_ocr_2c691aaa | TERMINATED | 127.0.0.1:21540 | 0.0303334 | 0.224727 | 0 | 0.0509969 | True | False | False | 1 | 366.346 | 0.0134006 | 0.107419 | 347.145 |
| trainable_paddle_ocr_31e60691 | TERMINATED | 127.0.0.1:17532 | 0.00196041 | 0.259141 | 0 | 0.00350944 | True | False | False | 1 | 368.038 | 0.0130404 | 0.104854 | 347.22 |
| trainable_paddle_ocr_d4d288c6 | TERMINATED | 127.0.0.1:22216 | 0.00339892 | 0.273408 | 0 | 0.0154205 | True | False | False | 1 | 368.904 | 0.0125829 | 0.10328 | 349.232 |
| trainable_paddle_ocr_7645b77c | TERMINATED | 127.0.0.1:2272 | 0.113841 | 0.279242 | 0 | 0.0753151 | True | False | False | 1 | 367.456 | 0.0125829 | 0.10328 | 346.698 |
| trainable_paddle_ocr_3256ae36 | TERMINATED | 127.0.0.1:6604 | 0.129213 | 0.30993 | 0 | 0.11202 | True | False | False | 1 | 366.002 | 0.0124076 | 0.102016 | 346.52 |
| trainable_paddle_ocr_b0dda58b | TERMINATED | 127.0.0.1:9732 | 0.117838 | 0.314952 | 0 | 0.682573 | True | False | False | 1 | 364.828 | 0.0124076 | 0.102016 | 344.029 |
| trainable_paddle_ocr_e9d40333 | TERMINATED | 127.0.0.1:23416 | 0.156939 | 0.530252 | 0 | 0.100194 | True | False | False | 1 | 365.626 | 0.0124298 | 0.102051 | 346.118 |
| trainable_paddle_ocr_aa89fe7a | TERMINATED | 127.0.0.1:16200 | 0.162083 | 0.50397 | 0 | 0.676539 | True | False | False | 1 | 366.753 | 0.0119907 | 0.100476 | 346.54 |
| trainable_paddle_ocr_92c48d07 | TERMINATED | 127.0.0.1:15432 | 0.186443 | 0.333219 | 0 | 0.67753 | True | False | False | 1 | 365.094 | 0.0119685 | 0.100441 | 345.979 |
| trainable_paddle_ocr_187790d7 | TERMINATED | 127.0.0.1:24676 | 0.235252 | 0.337251 | 0 | 0.698732 | True | False | False | 1 | 364.474 | 0.0119685 | 0.100441 | 344.173 |
| trainable_paddle_ocr_442a2439 | TERMINATED | 127.0.0.1:7892 | 0.212276 | 0.509804 | 0 | 0.699247 | True | False | False | 1 | 364.755 | 0.0117601 | 0.0996499 | 345.943 |
| trainable_paddle_ocr_70862adc | TERMINATED | 127.0.0.1:15412 | 0.216306 | 0.396397 | 0 | 0.685918 | True | False | False | 1 | 365.975 | 0.0119685 | 0.100441 | 345.403 |
| trainable_paddle_ocr_e6821f34 | TERMINATED | 127.0.0.1:26088 | 0.240775 | 0.366898 | 0 | 0.573762 | True | False | False | 1 | 365.255 | 0.0124076 | 0.102016 | 345.881 |
| trainable_paddle_ocr_8b680875 | TERMINATED | 127.0.0.1:1720 | 0.319343 | 0.53125 | 0 | 0.591253 | True | False | False | 1 | 367.203 | 0.0121992 | 0.101225 | 347.056 |
| trainable_paddle_ocr_fc54867b | TERMINATED | 127.0.0.1:4888 | 0.304286 | 0.503408 | 0 | 0.502491 | True | False | False | 1 | 368.736 | 0.0124298 | 0.102051 | 349.607 |
| trainable_paddle_ocr_c32d0d5e | TERMINATED | 127.0.0.1:25808 | 0.398489 | 0.153007 | 0 | 0.516768 | True | False | False | 1 | 364.423 | 0.0133855 | 0.109273 | 343.855 |
| trainable_paddle_ocr_4762fbbb | TERMINATED | 127.0.0.1:20760 | 0.40101 | 0.133426 | 0 | 0.618812 | True | False | False | 1 | 363.326 | 0.0135372 | 0.108525 | 344.601 |
| trainable_paddle_ocr_522ac97c | TERMINATED | 127.0.0.1:2372 | 0.402755 | 0.448976 | 0 | 0.642637 | True | False | False | 1 | 364.72 | 0.0117638 | 0.099689 | 344.038 |
| trainable_paddle_ocr_5784f433 | TERMINATED | 127.0.0.1:22900 | 0.192769 | 0.46205 | 0 | 0.632828 | True | False | False | 1 | 362.93 | 0.0116503 | 0.0989016 | 343.513 |
| trainable_paddle_ocr_83af0528 | TERMINATED | 127.0.0.1:9832 | 0.184587 | 0.466314 | 0 | 0.629921 | True | False | False | 1 | 364.585 | 0.0116503 | 0.0989016 | 343.81 |
| trainable_paddle_ocr_12cbaa22 | TERMINATED | 127.0.0.1:5968 | 0.405622 | 0.472779 | 0 | 0.631499 | True | False | False | 1 | 364.247 | 0.0116503 | 0.0989016 | 344.114 |
| trainable_paddle_ocr_a3a87765 | TERMINATED | 127.0.0.1:24372 | 0.28557 | 0.4501 | 0 | 0.635152 | True | False | False | 1 | 369.274 | 0.0117638 | 0.099689 | 348.58 |
| trainable_paddle_ocr_cf2bad0c | TERMINATED | 127.0.0.1:3272 | 0.283661 | 0.589012 | 0 | 0.460291 | False | False | False | 1 | 366.188 | 0.044199 | 0.132047 | 347.034 |
| trainable_paddle_ocr_9a9b91e7 | TERMINATED | 127.0.0.1:2272 | 0.364609 | 0.608959 | 0 | 0.465225 | False | False | False | 1 | 364.017 | 0.044199 | 0.132047 | 343.539 |
| trainable_paddle_ocr_e326d901 | TERMINATED | 127.0.0.1:24932 | 0.373537 | 0.593229 | 0 | 0.463688 | True | False | False | 1 | 365.428 | 0.0121992 | 0.101225 | 345.762 |
| trainable_paddle_ocr_ccb3f19a | TERMINATED | 127.0.0.1:1104 | 0.453777 | 0.686641 | 0 | 0.305928 | True | True | False | 1 | 365.147 | 0.0119903 | 0.0991043 | 344.408 |
| trainable_paddle_ocr_8c12c55f | TERMINATED | 127.0.0.1:19700 | 0.444416 | 0.67104 | 0 | 0.264132 | True | True | False | 1 | 363.297 | 0.0121862 | 0.101228 | 343.939 |
| trainable_paddle_ocr_5a62d5b6 | TERMINATED | 127.0.0.1:26528 | 0.201047 | 0.404141 | 0 | 0.599257 | True | True | True | 1 | 380.333 | 0.0662709 | 0.168515 | 359.467 |
| trainable_paddle_ocr_bb4495b7 | TERMINATED | 127.0.0.1:21772 | 0.576439 | 0.390737 | 0 | 0.541396 | False | False | True | 1 | 375.977 | 0.0707008 | 0.17391 | 356.322 |
| trainable_paddle_ocr_9d90711d | TERMINATED | 127.0.0.1:17592 | 0.541158 | 0.468954 | 0 | 0.635015 | True | False | False | 1 | 365.77 | 0.0115351 | 0.0989016 | 344.718 |
| trainable_paddle_ocr_daaec3f8 | TERMINATED | 127.0.0.1:21292 | 0.521341 | 0.474351 | 0 | 0.644567 | True | False | False | 1 | 363.019 | 0.0115351 | 0.0989016 | 343.697 |
| trainable_paddle_ocr_51fb5915 | TERMINATED | 127.0.0.1:21772 | 0.58105 | 0.485412 | 0 | 0.64636 | True | False | False | 1 | 364.02 | 0.0115351 | 0.0989016 | 343.604 |
| trainable_paddle_ocr_18966a33 | TERMINATED | 127.0.0.1:16900 | 0.51329 | 0.550159 | 0 | 0.648982 | True | False | False | 1 | 363.337 | 0.0116449 | 0.0996499 | 344.261 |
| trainable_paddle_ocr_b67080f9 | TERMINATED | 127.0.0.1:20948 | 0.576074 | 0.553412 | 0 | 0.560972 | True | False | False | 1 | 366.019 | 0.0123145 | 0.102051 | 345.495 |
| trainable_paddle_ocr_2533f368 | TERMINATED | 127.0.0.1:11208 | 0.524608 | 0.557227 | 0 | 0.558307 | True | False | True | 1 | 371.205 | 0.0720912 | 0.179189 | 351.967 |
| trainable_paddle_ocr_451d018d | TERMINATED | 127.0.0.1:3616 | 0.549464 | 0.634019 | 0 | 0.652105 | False | False | True | 1 | 378.827 | 0.0647995 | 0.164937 | 357.17 |
| trainable_paddle_ocr_2256e752 | TERMINATED | 127.0.0.1:25468 | 0.622863 | 0.647804 | 0 | 0.654609 | False | True | False | 1 | 369.88 | 0.0442921 | 0.132838 | 349.417 |
| trainable_paddle_ocr_0a892729 | TERMINATED | 127.0.0.1:26212 | 0.542929 | 0.421733 | 0 | 0.601587 | True | False | False | 1 | 367.237 | 0.0122923 | 0.102016 | 346.072 |
| trainable_paddle_ocr_495075f5 | TERMINATED | 127.0.0.1:23604 | 0.631875 | 0.418675 | 0 | 0.595618 | True | False | False | 1 | 365.536 | 0.0122923 | 0.102016 | 346.425 |
| trainable_paddle_ocr_54c45552 | TERMINATED | 127.0.0.1:25352 | 0.619687 | 0.463823 | 0 | 0.612612 | True | False | False | 1 | 367.947 | 0.0119742 | 0.100476 | 346.941 |
| trainable_paddle_ocr_6b2e9b93 | TERMINATED | 127.0.0.1:25400 | 0.48925 | 0.475185 | 0 | 0.515482 | True | False | False | 1 | 365.989 | 0.0119742 | 0.100476 | 346.414 |
| trainable_paddle_ocr_e9a6b81f | TERMINATED | 127.0.0.1:4036 | 0.492552 | 0.48793 | 0 | 0.648349 | True | False | False | 1 | 367.332 | 0.0115351 | 0.0989016 | 346.259 |
| trainable_paddle_ocr_076c5450 | TERMINATED | 127.0.0.1:4832 | 0.588133 | 0.488422 | 0 | 0.656919 | True | False | False | 1 | 365.188 | 0.0115351 | 0.0989016 | 345.843 |
| trainable_paddle_ocr_4a42a3ea | TERMINATED | 127.0.0.1:14912 | 0.594041 | 0.559036 | 0 | 0.657323 | True | False | False | 1 | 370.997 | 0.0118754 | 0.100476 | 350.244 |
| trainable_paddle_ocr_041795f1 | TERMINATED | 127.0.0.1:22372 | 0.661744 | 0.565009 | 0 | 0.66295 | True | False | False | 1 | 370.946 | 0.0120801 | 0.100476 | 351.5 |
| trainable_paddle_ocr_8abb3f37 | TERMINATED | 127.0.0.1:22012 | 0.463682 | 0.489821 | 0 | 0.394583 | True | False | False | 1 | 364.675 | 0.0123145 | 0.102051 | 343.539 |
| trainable_paddle_ocr_f2cb682e | TERMINATED | 127.0.0.1:5752 | 0.452248 | 0.491795 | 0 | 0.425971 | True | True | False | 1 | 364.908 | 0.0123145 | 0.102051 | 345.592 |
| trainable_paddle_ocr_463fe5e7 | TERMINATED | 127.0.0.1:16524 | 0.520238 | 0.537344 | 0 | 0.534057 | True | True | False | 1 | 370.564 | 0.0123145 | 0.102051 | 349.509 |
| trainable_paddle_ocr_88bbe87d | TERMINATED | 127.0.0.1:15084 | 0.511078 | 0.527459 | 0 | 0.536896 | True | False | False | 1 | 369.55 | 0.0120839 | 0.101225 | 350.144 |
| trainable_paddle_ocr_33ea1cc6 | TERMINATED | 127.0.0.1:17380 | 0.515807 | 0.522992 | 0 | 0.667966 | True | False | False | 1 | 376.746 | 0.0118754 | 0.100476 | 355.524 |
| trainable_paddle_ocr_1243723e | TERMINATED | 127.0.0.1:11232 | 0.557315 | 0.372677 | 0 | 0.676613 | True | False | False | 1 | 375.444 | 0.0118532 | 0.100441 | 355.679 |
| Trial name | CER | PAGES | TIME | TIME_PER_PAGE | WER |
|---|---|---|---|---|---|
| trainable_paddle_ocr_041795f1 | 0.0120801 | 5 | 351.5 | 70.1901 | 0.100476 |
| trainable_paddle_ocr_076c5450 | 0.0115351 | 5 | 345.843 | 69.0678 | 0.0989016 |
| trainable_paddle_ocr_0a892729 | 0.0122923 | 5 | 346.072 | 69.1243 | 0.102016 |
| trainable_paddle_ocr_111e5a9e | 0.38641 | 5 | 320.966 | 64.0952 | 0.455836 |
| trainable_paddle_ocr_1243723e | 0.0118532 | 5 | 355.679 | 71.0243 | 0.100441 |
| trainable_paddle_ocr_12cbaa22 | 0.0116503 | 5 | 344.114 | 68.724 | 0.0989016 |
| trainable_paddle_ocr_187790d7 | 0.0119685 | 5 | 344.173 | 68.7423 | 0.100441 |
| trainable_paddle_ocr_18966a33 | 0.0116449 | 5 | 344.261 | 68.7594 | 0.0996499 |
| trainable_paddle_ocr_2256e752 | 0.0442921 | 5 | 349.417 | 69.7759 | 0.132838 |
| trainable_paddle_ocr_2533f368 | 0.0720912 | 5 | 351.967 | 70.2954 | 0.179189 |
| trainable_paddle_ocr_2c691aaa | 0.0134006 | 5 | 347.145 | 69.3242 | 0.107419 |
| trainable_paddle_ocr_2e978bfa | 0.0134006 | 5 | 359.597 | 71.8043 | 0.107419 |
| trainable_paddle_ocr_31e60691 | 0.0130404 | 5 | 347.22 | 69.3455 | 0.104854 |
| trainable_paddle_ocr_3256ae36 | 0.0124076 | 5 | 346.52 | 69.1998 | 0.102016 |
| trainable_paddle_ocr_33bdf2a9 | 0.0709203 | 5 | 368.571 | 73.625 | 0.17391 |
| trainable_paddle_ocr_33ea1cc6 | 0.0118754 | 5 | 355.524 | 71.0081 | 0.100476 |
| trainable_paddle_ocr_415d7ba1 | 0.516069 | 5 | 326.657 | 65.2351 | 0.59453 |
| trainable_paddle_ocr_442a2439 | 0.0117601 | 5 | 345.943 | 69.0839 | 0.0996499 |
| trainable_paddle_ocr_451d018d | 0.0647995 | 5 | 357.17 | 71.3372 | 0.164937 |
| trainable_paddle_ocr_463fe5e7 | 0.0123145 | 5 | 349.509 | 69.8077 | 0.102051 |
| trainable_paddle_ocr_4762fbbb | 0.0135372 | 5 | 344.601 | 68.8145 | 0.108525 |
| trainable_paddle_ocr_495075f5 | 0.0122923 | 5 | 346.425 | 69.1919 | 0.102016 |
| trainable_paddle_ocr_4a42a3ea | 0.0118754 | 5 | 350.244 | 69.9484 | 0.100476 |
| trainable_paddle_ocr_51fb5915 | 0.0115351 | 5 | 343.604 | 68.6293 | 0.0989016 |
| trainable_paddle_ocr_522ac97c | 0.0117638 | 5 | 344.038 | 68.7183 | 0.099689 |
| trainable_paddle_ocr_54c45552 | 0.0119742 | 5 | 346.941 | 69.2981 | 0.100476 |
| trainable_paddle_ocr_5784f433 | 0.0116503 | 5 | 343.513 | 68.6003 | 0.0989016 |
| trainable_paddle_ocr_5a62d5b6 | 0.0662709 | 5 | 359.467 | 71.7971 | 0.168515 |
| trainable_paddle_ocr_6b2e9b93 | 0.0119742 | 5 | 346.414 | 69.1859 | 0.100476 |
| trainable_paddle_ocr_70862adc | 0.0119685 | 5 | 345.403 | 68.9856 | 0.100441 |
| trainable_paddle_ocr_7645b77c | 0.0125829 | 5 | 346.698 | 69.2407 | 0.10328 |
| trainable_paddle_ocr_80ea65f2 | 0.0645948 | 5 | 366.607 | 73.222 | 0.164937 |
| trainable_paddle_ocr_83af0528 | 0.0116503 | 5 | 343.81 | 68.6691 | 0.0989016 |
| trainable_paddle_ocr_8518cc40 | 0.0134006 | 5 | 347.929 | 69.49 | 0.107419 |
| trainable_paddle_ocr_88bbe87d | 0.0120839 | 5 | 350.144 | 69.9281 | 0.101225 |
| trainable_paddle_ocr_8abb3f37 | 0.0123145 | 5 | 343.539 | 68.6134 | 0.102051 |
| trainable_paddle_ocr_8b680875 | 0.0121992 | 5 | 347.056 | 69.3187 | 0.101225 |
| trainable_paddle_ocr_8c12c55f | 0.0121862 | 5 | 343.939 | 68.6927 | 0.101228 |
| trainable_paddle_ocr_92c48d07 | 0.0119685 | 5 | 345.979 | 69.0932 | 0.100441 |
| trainable_paddle_ocr_9a9b91e7 | 0.044199 | 5 | 343.539 | 68.6156 | 0.132047 |
| trainable_paddle_ocr_9d90711d | 0.0115351 | 5 | 344.718 | 68.8583 | 0.0989016 |
| trainable_paddle_ocr_a3a87765 | 0.0117638 | 5 | 348.58 | 69.6186 | 0.099689 |
| trainable_paddle_ocr_a58d8109 | 0.502513 | 5 | 326.916 | 65.2834 | 0.567716 |
| trainable_paddle_ocr_aa89fe7a | 0.0119907 | 5 | 346.54 | 69.2183 | 0.100476 |
| trainable_paddle_ocr_b0dda58b | 0.0124076 | 5 | 344.029 | 68.7135 | 0.102016 |
| trainable_paddle_ocr_b3775034 | 0.418109 | 5 | 336.661 | 67.2269 | 0.50371 |
| trainable_paddle_ocr_b67080f9 | 0.0123145 | 5 | 345.495 | 69.0121 | 0.102051 |
| trainable_paddle_ocr_bb4495b7 | 0.0707008 | 5 | 356.322 | 71.1644 | 0.17391 |
| trainable_paddle_ocr_bf10d370 | 0.197252 | 5 | 350.147 | 69.9364 | 0.295353 |
| trainable_paddle_ocr_c32d0d5e | 0.0133855 | 5 | 343.855 | 68.6756 | 0.109273 |
| trainable_paddle_ocr_ccb3f19a | 0.0119903 | 5 | 344.408 | 68.7897 | 0.0991043 |
| trainable_paddle_ocr_cf2bad0c | 0.044199 | 5 | 347.034 | 69.311 | 0.132047 |
| trainable_paddle_ocr_d4d288c6 | 0.0125829 | 5 | 349.232 | 69.7463 | 0.10328 |
| trainable_paddle_ocr_d5238c33 | 0.0135159 | 5 | 353.851 | 70.6623 | 0.105003 |
| trainable_paddle_ocr_d9df79f3 | 0.116825 | 5 | 364.623 | 72.8248 | 0.22213 |
| trainable_paddle_ocr_daaec3f8 | 0.0115351 | 5 | 343.697 | 68.6424 | 0.0989016 |
| trainable_paddle_ocr_e326d901 | 0.0121992 | 5 | 345.762 | 69.0578 | 0.101225 |
| trainable_paddle_ocr_e6821f34 | 0.0124076 | 5 | 345.881 | 69.0774 | 0.102016 |
| trainable_paddle_ocr_e9a6b81f | 0.0115351 | 5 | 346.259 | 69.1552 | 0.0989016 |
| trainable_paddle_ocr_e9d40333 | 0.0124298 | 5 | 346.118 | 69.1253 | 0.102051 |
| trainable_paddle_ocr_ea8a2f7a | 0.039052 | 5 | 354.615 | 70.8221 | 0.132086 |
| trainable_paddle_ocr_ebb12e5b | 0.0660624 | 5 | 359.097 | 71.7257 | 0.166192 |
| trainable_paddle_ocr_f2cb682e | 0.0123145 | 5 | 345.592 | 69.0238 | 0.102051 |
| trainable_paddle_ocr_fc54867b | 0.0124298 | 5 | 349.607 | 69.8253 | 0.102051 |
| \n", + " | CER | \n", + "WER | \n", + "TIME | \n", + "PAGES | \n", + "TIME_PER_PAGE | \n", + "timestamp | \n", + "checkpoint_dir_name | \n", + "training_iteration | \n", + "time_this_iter_s | \n", + "time_total_s | \n", + "pid | \n", + "time_since_restore | \n", + "iterations_since_restore | \n", + "config/text_det_thresh | \n", + "config/text_det_box_thresh | \n", + "config/text_det_unclip_ratio | \n", + "config/text_rec_score_thresh | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.0 | \n", + "64.000000 | \n", + "6.400000e+01 | \n", + "0.0 | \n", + "64.0 | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.0 | \n", + "64.000000 | \n", + "64.000000 | \n", + "64.0 | \n", + "64.000000 | \n", + "
| mean | \n", + "0.052482 | \n", + "0.142770 | \n", + "347.605870 | \n", + "5.0 | \n", + "69.423734 | \n", + "1.765126e+09 | \n", + "NaN | \n", + "1.0 | \n", + "367.715945 | \n", + "367.715945 | \n", + "16306.750000 | \n", + "367.715945 | \n", + "1.0 | \n", + "0.419091 | \n", + "0.392965 | \n", + "0.0 | \n", + "0.470584 | \n", + "
| std | \n", + "0.110269 | \n", + "0.107515 | \n", + "7.876539 | \n", + "0.0 | \n", + "1.574470 | \n", + "3.473487e+03 | \n", + "NaN | \n", + "0.0 | \n", + "8.011554 | \n", + "8.011554 | \n", + "8179.917114 | \n", + "8.011554 | \n", + "0.0 | \n", + "0.167178 | \n", + "0.195419 | \n", + "0.0 | \n", + "0.219216 | \n", + "
| min | \n", + "0.011535 | \n", + "0.098902 | \n", + "320.966205 | \n", + "5.0 | \n", + "64.095210 | \n", + "1.765120e+09 | \n", + "NaN | \n", + "1.0 | \n", + "341.071264 | \n", + "341.071264 | \n", + "1104.000000 | \n", + "341.071264 | \n", + "1.0 | \n", + "0.016997 | \n", + "0.000242 | \n", + "0.0 | \n", + "0.002891 | \n", + "
| 25% | \n", + "0.011968 | \n", + "0.100441 | \n", + "344.239116 | \n", + "5.0 | \n", + "68.755118 | \n", + "1.765123e+09 | \n", + "NaN | \n", + "1.0 | \n", + "364.708660 | \n", + "364.708660 | \n", + "9272.000000 | \n", + "364.708660 | \n", + "1.0 | \n", + "0.328652 | \n", + "0.230515 | \n", + "0.0 | \n", + "0.311325 | \n", + "
| 50% | \n", + "0.012314 | \n", + "0.102033 | \n", + "346.419682 | \n", + "5.0 | \n", + "69.188875 | \n", + "1.765126e+09 | \n", + "NaN | \n", + "1.0 | \n", + "366.103412 | \n", + "366.103412 | \n", + "18522.000000 | \n", + "366.103412 | \n", + "1.0 | \n", + "0.465068 | \n", + "0.448332 | \n", + "0.0 | \n", + "0.559640 | \n", + "
| 75% | \n", + "0.040339 | \n", + "0.132047 | \n", + "350.144563 | \n", + "5.0 | \n", + "69.930173 | \n", + "1.765129e+09 | \n", + "NaN | \n", + "1.0 | \n", + "370.648662 | \n", + "370.648662 | \n", + "23167.000000 | \n", + "370.648662 | \n", + "1.0 | \n", + "0.530501 | \n", + "0.544563 | \n", + "0.0 | \n", + "0.645015 | \n", + "
| max | \n", + "0.516069 | \n", + "0.594530 | \n", + "368.571180 | \n", + "5.0 | \n", + "73.625040 | \n", + "1.765132e+09 | \n", + "NaN | \n", + "1.0 | \n", + "388.150608 | \n", + "388.150608 | \n", + "26528.000000 | \n", + "388.150608 | \n", + "1.0 | \n", + "0.686641 | \n", + "0.690232 | \n", + "0.0 | \n", + "0.699247 | \n", + "