Files
MastersThesis/thesis_output/presentation.html
sergio 125b16c8f7
Some checks failed
build_docker / essential (push) Successful in 1s
build_docker / build_paddle_ocr (push) Failing after 5m31s
build_docker / build_easyocr (push) Failing after 7m40s
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_raytune (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
presentation
2026-04-19 13:34:48 +02:00

1320 lines
44 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>TFM - Optimización de Hiperparámetros OCR con Ray Tune</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/theme/white.min.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
<style>
:root {
--unir-blue: #0098CD;
--unir-blue-dark: #007AA3;
--unir-light: #E6F4F9;
--unir-text: #404040;
--unir-gray: #E7E6E6;
--unir-red: #E8654A;
--unir-orange: #F0A030;
}
.reveal {
font-family: 'Calibri', 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
font-size: 28px;
color: var(--unir-text);
}
.reveal h1, .reveal h2, .reveal h3 {
font-family: 'Calibri Light', 'Calibri', 'Segoe UI', Arial, sans-serif;
color: var(--unir-blue);
text-transform: none;
letter-spacing: -0.02em;
font-weight: 600;
}
.reveal h1 { font-size: 1.8em; }
.reveal h2 { font-size: 1.4em; margin-bottom: 0.6em; }
.reveal h3 { font-size: 1.1em; }
.reveal .slides section {
text-align: left;
padding: 20px 40px;
}
/* Corner logo on all slides except title */
.reveal .slides section:not(.title-slide)::after {
content: '';
position: absolute;
top: 15px;
right: 20px;
width: 110px;
height: 30px;
background: url('../instructions/plantilla_individual_files/image001.png') no-repeat center;
background-size: contain;
opacity: 0.8;
}
/* Bottom accent line */
.reveal .slides section:not(.title-slide)::before {
content: '';
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, var(--unir-blue), var(--unir-light));
}
/* Title slide */
.title-slide {
text-align: center !important;
}
.title-slide h1 {
font-size: 1.5em !important;
line-height: 1.3;
margin-top: 0.2em;
}
.title-slide .subtitle {
color: var(--unir-blue);
font-size: 0.75em;
font-weight: 600;
margin-bottom: 0.5em;
text-transform: uppercase;
letter-spacing: 0.1em;
}
.title-slide .meta {
font-size: 0.65em;
color: #666;
line-height: 1.8;
}
.title-slide .meta strong {
color: var(--unir-text);
}
.title-slide .logo-large {
width: 220px;
margin-bottom: 10px;
}
.title-slide .divider {
width: 120px;
height: 3px;
background: var(--unir-blue);
margin: 15px auto;
}
/* Thank you slide */
.thanks-slide {
text-align: center !important;
}
.thanks-slide h1 {
font-size: 2.5em !important;
margin-bottom: 0.3em;
}
.thanks-slide .questions {
font-size: 1.2em;
color: #666;
margin-bottom: 1em;
}
/* Two column layout */
.two-columns {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
align-items: start;
}
.two-columns-60-40 {
display: grid;
grid-template-columns: 3fr 2fr;
gap: 30px;
align-items: start;
}
.two-columns-40-60 {
display: grid;
grid-template-columns: 2fr 3fr;
gap: 30px;
align-items: start;
}
.three-columns {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 20px;
align-items: start;
}
/* Highlight box */
.highlight-box {
background: var(--unir-light);
border-left: 5px solid var(--unir-blue);
padding: 15px 20px;
border-radius: 0 8px 8px 0;
margin: 15px 0;
font-size: 0.85em;
}
.highlight-box.center-box {
border-left: none;
border-top: 3px solid var(--unir-blue);
border-radius: 0 0 8px 8px;
text-align: center;
}
/* Metric cards */
.metric-cards {
display: flex;
gap: 15px;
flex-wrap: wrap;
justify-content: center;
}
.metric-card {
background: white;
border: 2px solid var(--unir-light);
border-radius: 12px;
padding: 15px 20px;
text-align: center;
min-width: 130px;
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
transition: transform 0.2s;
}
.metric-card:hover {
transform: translateY(-2px);
}
.metric-card .number {
font-size: 1.6em;
font-weight: 700;
color: var(--unir-blue);
line-height: 1.2;
}
.metric-card .number.success { color: #2EAD4B; }
.metric-card .number.warning { color: var(--unir-orange); }
.metric-card .number.danger { color: var(--unir-red); }
.metric-card .label {
font-size: 0.6em;
color: #888;
margin-top: 4px;
}
/* Data table */
.data-table {
width: 100%;
border-collapse: collapse;
font-size: 0.75em;
margin: 10px 0;
}
.data-table thead th {
background: var(--unir-blue);
color: white;
padding: 10px 14px;
text-align: left;
font-weight: 600;
}
.data-table thead th:first-child {
border-radius: 8px 0 0 0;
}
.data-table thead th:last-child {
border-radius: 0 8px 0 0;
}
.data-table tbody td {
padding: 8px 14px;
border-bottom: 1px solid var(--unir-gray);
}
.data-table tbody tr:nth-child(even) {
background: #FAFCFE;
}
.data-table tbody tr.highlight {
background: var(--unir-light);
font-weight: 600;
}
.data-table tbody tr:last-child td:first-child {
border-radius: 0 0 0 8px;
}
.data-table tbody tr:last-child td:last-child {
border-radius: 0 0 8px 0;
}
/* Engine cards */
.engine-card {
background: white;
border: 2px solid var(--unir-gray);
border-radius: 12px;
padding: 18px;
text-align: center;
transition: all 0.3s;
}
.engine-card.selected {
border-color: var(--unir-blue);
background: var(--unir-light);
box-shadow: 0 4px 16px rgba(0,152,205,0.2);
}
.engine-card h3 {
margin: 0 0 5px 0;
font-size: 0.9em;
}
.engine-card .developer {
font-size: 0.55em;
color: #999;
margin-bottom: 10px;
}
.engine-card .arch {
font-size: 0.6em;
color: var(--unir-text);
background: var(--unir-light);
border-radius: 6px;
padding: 4px 8px;
display: inline-block;
margin-bottom: 8px;
}
.engine-card.selected .arch {
background: white;
}
.engine-card ul {
text-align: left;
font-size: 0.6em;
margin: 0;
padding-left: 16px;
}
/* Agenda items */
.agenda-item {
display: flex;
align-items: center;
gap: 15px;
margin: 12px 0;
font-size: 0.85em;
}
.agenda-number {
width: 36px;
height: 36px;
background: var(--unir-blue);
color: white;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
font-size: 0.9em;
flex-shrink: 0;
}
/* Tags */
.tag {
display: inline-block;
background: var(--unir-light);
color: var(--unir-blue-dark);
border-radius: 20px;
padding: 3px 12px;
font-size: 0.6em;
font-weight: 600;
margin: 2px;
}
.tag.bool { background: #FFF3E0; color: #E65100; }
.tag.cont { background: #E8F5E9; color: #2E7D32; }
.tag.fixed { background: var(--unir-gray); color: #888; }
/* Chart containers */
.chart-container {
position: relative;
width: 100%;
max-height: 420px;
}
.chart-container canvas {
max-height: 420px;
}
/* Bullet lists */
.reveal ul, .reveal ol {
font-size: 0.8em;
line-height: 1.6;
}
.reveal li {
margin-bottom: 6px;
}
/* Compact list */
.compact-list { font-size: 0.7em; }
.compact-list li { margin-bottom: 3px; }
/* Objective check */
.obj-list {
list-style: none;
padding: 0;
font-size: 0.72em;
}
.obj-list li {
padding: 6px 0 6px 30px;
position: relative;
}
.obj-list li::before {
content: '';
position: absolute;
left: 0;
top: 8px;
width: 18px;
height: 18px;
border: 2px solid var(--unir-blue);
border-radius: 50%;
}
.obj-list li.done::before {
background: var(--unir-blue);
box-shadow: inset 0 0 0 3px white;
}
/* Conclusion items */
.contribution-item {
display: flex;
align-items: start;
gap: 10px;
margin: 8px 0;
font-size: 0.72em;
}
.contribution-icon {
width: 28px;
height: 28px;
background: var(--unir-blue);
color: white;
border-radius: 6px;
display: flex;
align-items: center;
justify-content: center;
font-size: 14px;
flex-shrink: 0;
}
.limitation-icon {
width: 28px;
height: 28px;
background: var(--unir-orange);
color: white;
border-radius: 6px;
display: flex;
align-items: center;
justify-content: center;
font-size: 14px;
flex-shrink: 0;
}
/* Figure caption */
.fig-caption {
text-align: center;
font-size: 0.55em;
color: #999;
font-style: italic;
margin-top: 5px;
}
/* Comparison table for fine-tuning vs HPO */
.compare-row {
display: grid;
grid-template-columns: 140px 1fr 1fr;
gap: 0;
font-size: 0.65em;
}
.compare-row.header > div {
background: var(--unir-blue);
color: white;
padding: 8px 12px;
font-weight: 600;
}
.compare-row > div {
padding: 6px 12px;
border-bottom: 1px solid var(--unir-gray);
}
.compare-row .label-col {
font-weight: 600;
background: #FAFCFE;
}
.compare-row .highlight-col {
background: var(--unir-light);
}
/* Slide number */
.reveal .slide-number {
color: var(--unir-blue);
font-size: 14px;
font-family: 'Calibri', sans-serif;
}
/* Progress bar */
.reveal .progress span {
background: var(--unir-blue);
}
/* Improvements arrow */
.improvement {
color: #2EAD4B;
font-weight: 700;
font-size: 0.8em;
}
/* Code/param names */
code, .param {
font-family: 'Consolas', 'Courier New', monospace;
background: var(--unir-light);
padding: 2px 6px;
border-radius: 4px;
font-size: 0.85em;
}
/* Section divider */
.section-divider {
text-align: center !important;
}
.section-divider h2 {
font-size: 1.8em !important;
}
.section-divider .section-number {
font-size: 3em;
color: var(--unir-blue);
opacity: 0.2;
font-weight: 700;
}
</style>
</head>
<body>
<div class="reveal">
<div class="slides">
<!-- ====== SLIDE 1: TITLE ====== -->
<section class="title-slide" data-transition="fade">
<img src="../instructions/plantilla_individual_files/image001.png" class="logo-large" alt="UNIR">
<div class="subtitle">Trabajo Fin de Máster</div>
<h1>Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español</h1>
<div class="divider"></div>
<div class="meta">
<strong>Sergio Jiménez Jiménez</strong><br>
Director: Javier Rodrigo Villazón Terrazas<br>
Máster Universitario en Inteligencia Artificial<br>
2025
</div>
</section>
<!-- ====== SLIDE 2: AGENDA ====== -->
<section>
<h2>Agenda</h2>
<div class="agenda-item fragment fade-up"><div class="agenda-number">1</div><div>Motivación y planteamiento del problema</div></div>
<div class="agenda-item fragment fade-up"><div class="agenda-number">2</div><div>Objetivos y estado del arte</div></div>
<div class="agenda-item fragment fade-up"><div class="agenda-number">3</div><div>Metodología y arquitectura</div></div>
<div class="agenda-item fragment fade-up"><div class="agenda-number">4</div><div>Resultados experimentales</div></div>
<div class="agenda-item fragment fade-up"><div class="agenda-number">5</div><div>Conclusiones y trabajo futuro</div></div>
</section>
<!-- ====== SLIDE 3: MOTIVATION ====== -->
<section>
<h2>Motivación</h2>
<div class="two-columns">
<div>
<ul>
<li>La digitalización documental es una <strong>necesidad estratégica</strong> para organizaciones</li>
<li>OCR como puente entre el mundo físico y digital</li>
<li>Documentos en español: caracteres especiales ausentes en conjuntos de entrenamiento internacionales</li>
<li>Modelos preentrenados: <strong>rendimiento subóptimo</strong> fuera de benchmarks estándar</li>
<li>Fine-tuning requiere infraestructura costosa y datos etiquetados</li>
</ul>
</div>
<div>
<h3 style="font-size:0.8em; text-align:center; margin-bottom: 10px;">Errores típicos en español</h3>
<table class="data-table" style="font-size:0.85em;">
<thead><tr><th>Original</th><th>OCR</th><th>Error</th></tr></thead>
<tbody>
<tr><td>más</td><td>mas</td><td>Pérdida de acento</td></tr>
<tr><td>año</td><td>ano</td><td>Pérdida de eñe</td></tr>
<tr><td>¿Cómo</td><td>Como</td><td>Signos especiales</td></tr>
<tr><td>titulación</td><td>titulacióon</td><td>Duplicación</td></tr>
</tbody>
</table>
</div>
</div>
</section>
<!-- ====== SLIDE 4: PROBLEM STATEMENT ====== -->
<section>
<h2>Planteamiento del Problema</h2>
<div class="highlight-box center-box" style="font-size:0.78em; margin-bottom: 20px;">
<em>¿Es posible mejorar significativamente el rendimiento de modelos OCR preentrenados para documentos en español mediante la optimización sistemática de hiperparámetros, sin requerir fine-tuning?</em>
</div>
<div style="margin-top:10px;">
<div class="compare-row header">
<div></div><div>Fine-tuning completo</div><div>Optimización de hiperparámetros</div>
</div>
<div class="compare-row">
<div class="label-col">Datos</div><div>Miles de imágenes etiquetadas</div><div class="highlight-col">Subconjunto de validación</div>
</div>
<div class="compare-row">
<div class="label-col">Hardware</div><div>GPU alta memoria (>16 GB)</div><div class="highlight-col">CPU / GPU consumo</div>
</div>
<div class="compare-row">
<div class="label-col">Tiempo</div><div>Días / semanas</div><div class="highlight-col">Minutos / horas</div>
</div>
<div class="compare-row">
<div class="label-col">Expertise</div><div>Alto (ML avanzado)</div><div class="highlight-col">Bajo-medio</div>
</div>
<div class="compare-row">
<div class="label-col">Riesgo</div><div>Sobreajuste, catastrófico</div><div class="highlight-col">Limitado, reversible</div>
</div>
</div>
</section>
<!-- ====== SLIDE 5: OBJECTIVES ====== -->
<section>
<h2>Objetivos</h2>
<div class="highlight-box" style="margin-bottom: 15px;">
<strong>Objetivo general:</strong> Optimizar PaddleOCR para documentos académicos en español alcanzando un <strong>CER &lt; 2%</strong> sin fine-tuning del modelo base.
</div>
<ul class="obj-list">
<li class="done fragment fade-up"><strong>OE1:</strong> Comparar tres motores OCR open-source (EasyOCR, PaddleOCR, DocTR)</li>
<li class="done fragment fade-up"><strong>OE2:</strong> Preparar dataset de evaluación de 45 páginas con ground truth</li>
<li class="done fragment fade-up"><strong>OE3:</strong> Identificar hiperparámetros críticos mediante análisis de correlación</li>
<li class="done fragment fade-up"><strong>OE4:</strong> Ejecutar 64 trials de optimización con Ray Tune + Optuna</li>
<li class="done fragment fade-up"><strong>OE5:</strong> Validar la configuración optimizada frente al baseline</li>
</ul>
</section>
<!-- ====== SLIDE 6: STATE OF THE ART ====== -->
<section>
<h2>Estado del Arte: Motores OCR</h2>
<div class="three-columns" style="margin-bottom: 15px;">
<div class="engine-card">
<h3>EasyOCR</h3>
<div class="developer">JaidedAI</div>
<div class="arch">CRAFT + CRNN</div>
<ul>
<li>80+ idiomas</li>
<li>Fácil de usar</li>
<li>Baja configurabilidad</li>
</ul>
</div>
<div class="engine-card selected">
<h3>PaddleOCR</h3>
<div class="developer">Baidu / PaddlePaddle</div>
<div class="arch">DB + SVTR (PP-OCRv5)</div>
<ul>
<li>Alta configurabilidad</li>
<li>Pipeline modular</li>
<li>Soporte español dedicado</li>
</ul>
</div>
<div class="engine-card">
<h3>DocTR</h3>
<div class="developer">Mindee</div>
<div class="arch">DB/LinkNet + CRNN/SAR</div>
<ul>
<li>TF y PyTorch</li>
<li>Soporte español limitado</li>
<li>Rápido en inferencia</li>
</ul>
</div>
</div>
<img src="figures/figura_1.png" alt="Pipeline OCR" style="width: 85%; display: block; margin: 0 auto;">
<div class="fig-caption">Pipeline de un sistema OCR moderno</div>
</section>
<!-- ====== SLIDE 7: METHODOLOGY ====== -->
<section>
<h2>Metodología: 5 Fases</h2>
<img src="figures/figura_3.png" alt="Metodología" style="width: 90%; display: block; margin: 0 auto 15px;">
<div class="fig-caption" style="margin-bottom: 15px;">Fases de la metodología experimental</div>
<div style="display: grid; grid-template-columns: repeat(5, 1fr); gap: 8px; font-size: 0.55em; text-align: center;">
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">1</div>Preparación del dataset<br><span style="color:#999">PDF → 300 DPI + GT</span></div>
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">2</div>Benchmark comparativo<br><span style="color:#999">3 motores, CER/WER</span></div>
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">3</div>Espacio de búsqueda<br><span style="color:#999">7 hiperparámetros</span></div>
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">4</div>Optimización<br><span style="color:#999">64 trials, TPE</span></div>
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">5</div>Validación<br><span style="color:#999">45 páginas completas</span></div>
</div>
</section>
<!-- ====== SLIDE 8: ARCHITECTURE ====== -->
<section>
<h2>Arquitectura: Microservicios Docker</h2>
<div class="two-columns-60-40">
<div>
<img src="figures/figura_6.png" alt="Arquitectura" style="width: 100%; border-radius: 8px;">
<div class="fig-caption">Arquitectura de microservicios para optimización OCR</div>
</div>
<div>
<ul class="compact-list">
<li><strong>Contenedor Ray Tune:</strong> Orquestador de trials (Optuna TPE)</li>
<li><strong>Contenedor OCR:</strong> PaddleOCR con acceso GPU</li>
<li><strong>Comunicación:</strong> REST API (HTTP POST /evaluate)</li>
<li><strong>Respuesta:</strong> JSON {CER, WER, TIME}</li>
<li><strong>Docker Compose:</strong> Despliegue reproducible</li>
</ul>
<div class="highlight-box" style="font-size:0.65em; margin-top:15px;">
<strong>Hardware:</strong><br>
RTX 3060 Laptop (5.66 GB VRAM)<br>
AMD Ryzen 7 5800H<br>
16 GB DDR4 | Ubuntu 24.04
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 9: SEARCH SPACE ====== -->
<section>
<h2>Espacio de Búsqueda: 7 Hiperparámetros</h2>
<div class="two-columns-60-40">
<div>
<table class="data-table" style="font-size:0.72em;">
<thead><tr><th>Parámetro</th><th>Tipo</th><th>Rango</th></tr></thead>
<tbody>
<tr><td><code>textline_orientation</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
<tr><td><code>use_doc_orientation_classify</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
<tr><td><code>use_doc_unwarping</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
<tr><td><code>text_det_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.50]</td></tr>
<tr><td><code>text_det_box_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.90]</td></tr>
<tr><td><code>text_rec_score_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.99]</td></tr>
<tr style="opacity:0.5"><td><code>text_det_unclip_ratio</code></td><td><span class="tag fixed">Fijo</span></td><td>0.0</td></tr>
</tbody>
</table>
</div>
<div>
<img src="figures/figura_2.png" alt="Ray Tune Cycle" style="width: 100%; border-radius: 8px;">
<div class="fig-caption">Ciclo de optimización con Ray Tune y Optuna</div>
<div class="highlight-box" style="font-size:0.62em; margin-top:10px;">
<strong>Algoritmo:</strong> TPE (Tree-structured Parzen Estimator)<br>
<strong>Trials:</strong> 64 | <strong>Concurrencia:</strong> 2 workers<br>
<strong>Métrica:</strong> Minimizar CER
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 10: BENCHMARK ====== -->
<section data-chart="benchmark">
<h2>Resultados: Benchmark Comparativo</h2>
<div class="two-columns">
<div class="chart-container">
<canvas id="chartBenchmark"></canvas>
</div>
<div>
<table class="data-table" style="font-size:0.68em;">
<thead><tr><th>Motor</th><th>CER</th><th>WER</th><th>s/pág</th><th>VRAM</th></tr></thead>
<tbody>
<tr><td>EasyOCR</td><td>11.23%</td><td>36.36%</td><td>1.88</td><td>~2 GB</td></tr>
<tr class="highlight"><td><strong>PaddleOCR</strong></td><td><strong>7.76%</strong></td><td><strong>11.62%</strong></td><td>0.58</td><td>0.06 GB</td></tr>
<tr><td>DocTR</td><td>12.06%</td><td>42.01%</td><td>0.50</td><td>~1 GB</td></tr>
</tbody>
</table>
<div class="highlight-box" style="font-size:0.65em; margin-top:15px;">
<strong>PaddleOCR seleccionado:</strong> Mejor CER (7.76%) con el menor consumo de VRAM (0.06 GB) y alta configurabilidad.
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 11: 64 TRIALS ====== -->
<section data-chart="trials">
<h2>Resultados: 64 Trials de Optimización</h2>
<div class="two-columns">
<div class="chart-container">
<canvas id="chartTrials"></canvas>
</div>
<div>
<div class="metric-cards" style="flex-direction: column;">
<div class="metric-card" style="width:100%;">
<div class="number success">0.79%</div>
<div class="label">Mejor CER (Trial #1)</div>
</div>
<div class="metric-card" style="width:100%;">
<div class="number">0.87%</div>
<div class="label">Mediana CER</div>
</div>
<div class="metric-card" style="width:100%;">
<div class="number warning">7.30%</div>
<div class="label">Peor CER</div>
</div>
<div class="metric-card" style="width:100%;">
<div class="number">67.2%</div>
<div class="label">Trials con CER &lt; 2%</div>
</div>
</div>
<div class="highlight-box" style="font-size:0.62em; margin-top:12px;">
<strong>0 fallos</strong> en 64 trials<br>
Tiempo total: <strong>~5 minutos</strong> (GPU)
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 12: KEY FINDING ====== -->
<section data-chart="textline">
<h2>Hallazgo Clave: <code>textline_orientation</code></h2>
<div class="two-columns">
<div class="chart-container">
<canvas id="chartTextline"></canvas>
</div>
<div>
<div class="metric-card" style="width:100%; margin-bottom: 15px;">
<div class="number success" style="font-size: 2em;">-63.2%</div>
<div class="label" style="font-size:0.75em;">Reducción en CER</div>
</div>
<ul class="compact-list">
<li>Un <strong>único parámetro booleano</strong> tiene mayor impacto que todos los umbrales numéricos combinados</li>
<li><strong>Decisiones arquitecturales</strong> &gt; ajustes numéricos finos</li>
<li>Crítico para documentos con <strong>layouts complejos</strong> (índices, listas, encabezados)</li>
<li>52 de 64 trials (81%) lo activaron automáticamente (Optuna aprendió rápido)</li>
</ul>
</div>
</div>
</section>
<!-- ====== SLIDE 13: CORRELATIONS ====== -->
<section data-chart="correlations">
<h2>Análisis de Hiperparámetros</h2>
<div class="two-columns">
<div>
<h3 style="font-size:0.75em; text-align:center;">Correlación Pearson con CER</h3>
<div class="chart-container">
<canvas id="chartCorrelation"></canvas>
</div>
</div>
<div>
<h3 style="font-size:0.75em; text-align:center;">Importancia de Hiperparámetros</h3>
<div class="chart-container">
<canvas id="chartImportance"></canvas>
</div>
</div>
</div>
<div class="highlight-box" style="font-size:0.62em; margin-top:10px;">
<strong>Insight:</strong> <code>use_doc_unwarping</code> (+0.88) es perjudicial en PDFs digitales (añade procesamiento innecesario). Los parámetros booleanos (arquitecturales) dominan sobre los umbrales numéricos.
</div>
</section>
<!-- ====== SLIDE 14: VALIDATION ====== -->
<section data-chart="validation">
<h2>Validación: Baseline vs Optimizado</h2>
<div class="two-columns">
<div class="chart-container">
<canvas id="chartValidation"></canvas>
</div>
<div>
<table class="data-table" style="font-size:0.7em;">
<thead><tr><th>Métrica</th><th>Baseline</th><th>Optimizado</th><th>Mejora</th></tr></thead>
<tbody>
<tr><td>CER (45 pág)</td><td>8.85%</td><td>7.72%</td><td class="improvement">-12.8%</td></tr>
<tr><td>WER (45 pág)</td><td>13.05%</td><td>11.40%</td><td class="improvement">-12.6%</td></tr>
<tr class="highlight"><td>CER (mejor trial, 5 pág)</td><td>7.76%</td><td>0.79%</td><td class="improvement">-89.8%</td></tr>
</tbody>
</table>
<div class="highlight-box" style="font-size:0.62em; margin-top:15px;">
<strong>Nota:</strong> La diferencia entre el mejor trial (0.79%) y la validación completa (7.72%) evidencia <strong>sobreajuste</strong> al subconjunto de 5 páginas usado en la optimización. Un subconjunto más amplio (15-20 páginas) mejoraría la generalización.
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 15: GPU ACCELERATION ====== -->
<section data-chart="gpu">
<h2>Aceleración GPU</h2>
<div class="two-columns">
<div class="chart-container">
<canvas id="chartGPU"></canvas>
</div>
<div>
<div class="metric-cards" style="flex-direction: column; gap: 12px;">
<div class="metric-card" style="width:100%;">
<div class="number" style="font-size: 2.2em; color: var(--unir-blue);">82x</div>
<div class="label" style="font-size:0.8em;">Factor de aceleración</div>
</div>
<div class="metric-card" style="width:100%;">
<div class="number">0.84 s</div>
<div class="label">GPU: segundos por página</div>
</div>
<div class="metric-card" style="width:100%;">
<div class="number warning">69.4 s</div>
<div class="label">CPU: segundos por página</div>
</div>
</div>
<div class="highlight-box" style="font-size:0.62em; margin-top:12px;">
64 trials × 5 páginas:<br>
<strong>CPU:</strong> ~6.2 horas<br>
<strong>GPU:</strong> ~5 minutos
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 16: OPTIMAL CONFIG ====== -->
<section>
<h2>Configuración Óptima</h2>
<div class="two-columns">
<div>
<div style="background: #1e1e1e; color: #d4d4d4; border-radius: 10px; padding: 20px; font-family: Consolas, monospace; font-size: 0.6em; line-height: 1.7;">
<span style="color:#569cd6">config_optimizada</span> = {<br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"textline_orientation"</span>: <span style="color:#4ec9b0">True</span>, <span style="color:#6a9955">&nbsp;# CRÍTICO</span><br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"use_doc_orientation_classify"</span>: <span style="color:#4ec9b0">True</span>,<br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"use_doc_unwarping"</span>: <span style="color:#4ec9b0">False</span>, <span style="color:#6a9955">&nbsp;# Innecesario</span><br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"text_det_thresh"</span>: <span style="color:#b5cea8">0.0462</span>,<br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"text_det_box_thresh"</span>: <span style="color:#b5cea8">0.4862</span>,<br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"text_det_unclip_ratio"</span>: <span style="color:#b5cea8">0.0</span>,<br>
&nbsp;&nbsp;<span style="color:#9cdcfe">"text_rec_score_thresh"</span>: <span style="color:#b5cea8">0.5658</span>,<br>
}
</div>
</div>
<div>
<h3 style="font-size:0.8em;">Insights clave</h3>
<ul class="compact-list">
<li class="fragment fade-up"><strong><code>textline_orientation = True</code></strong>: Parámetro más impactante (-63.2% CER)</li>
<li class="fragment fade-up"><strong><code>use_doc_unwarping = False</code></strong>: Procesamiento innecesario para PDFs digitales</li>
<li class="fragment fade-up"><strong><code>text_det_thresh</code> bajo</strong>: Captura más regiones de texto, reduce omisiones</li>
<li class="fragment fade-up"><strong>Parámetros booleanos</strong> dominan sobre umbrales numéricos</li>
</ul>
<div class="highlight-box" style="font-size: 0.62em; margin-top:12px;">
Esta configuración es directamente aplicable a otros documentos académicos en español con layouts similares.
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 17: CONCLUSIONS ====== -->
<section>
<h2>Conclusiones</h2>
<div class="two-columns">
<div>
<h3 style="font-size:0.78em; margin-bottom:10px;">Contribuciones</h3>
<div class="contribution-item fragment fade-up">
<div class="contribution-icon">1</div>
<div><strong>Metodología reproducible</strong> para optimización de hiperparámetros OCR con código abierto</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="contribution-icon">2</div>
<div><strong>Análisis sistemático</strong> de hiperparámetros PaddleOCR con correlaciones Pearson</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="contribution-icon">3</div>
<div><strong>Configuración validada</strong> para documentos académicos en español (CER 0.79%)</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="contribution-icon">4</div>
<div><strong>Infraestructura dockerizada</strong> reproducible con imágenes públicas</div>
</div>
</div>
<div>
<h3 style="font-size:0.78em; margin-bottom:10px;">Limitaciones</h3>
<div class="contribution-item fragment fade-up">
<div class="limitation-icon">!</div>
<div>Un único tipo de documento (académico UNIR)</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="limitation-icon">!</div>
<div>Corpus modesto (45 páginas)</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="limitation-icon">!</div>
<div>Sobreajuste al subconjunto de optimización (5 páginas)</div>
</div>
<div class="contribution-item fragment fade-up">
<div class="limitation-icon">!</div>
<div><code>text_det_unclip_ratio</code> no explorado</div>
</div>
</div>
</div>
</section>
<!-- ====== SLIDE 18: FUTURE WORK ====== -->
<section>
<h2>Líneas de Trabajo Futuro</h2>
<div class="three-columns" style="font-size:0.72em;">
<div>
<h3 style="font-size:0.95em;">Extensiones inmediatas</h3>
<ul class="compact-list">
<li>Validación cruzada en otros tipos de documentos (facturas, formularios, manuscritos)</li>
<li>Subconjunto de optimización más amplio (15-20 páginas)</li>
<li>Exploración de <code>text_det_unclip_ratio</code></li>
</ul>
</div>
<div>
<h3 style="font-size:0.95em;">Líneas de investigación</h3>
<ul class="compact-list">
<li>Transfer learning de hiperparámetros entre dominios</li>
<li>Optimización multi-objetivo (CER + WER + velocidad)</li>
<li>Comparación rigurosa HPO vs fine-tuning</li>
</ul>
</div>
<div>
<h3 style="font-size:0.95em;">Aplicaciones prácticas</h3>
<ul class="compact-list">
<li>Herramienta de configuración automática por tipo de documento</li>
<li>Integración en pipelines de producción</li>
<li>Benchmark público de OCR en español</li>
</ul>
</div>
</div>
</section>
<!-- ====== SLIDE 19: THANK YOU ====== -->
<section class="title-slide thanks-slide" data-transition="fade">
<img src="../instructions/plantilla_individual_files/image001.png" class="logo-large" style="width:180px; margin-bottom:20px;" alt="UNIR">
<h1 style="margin-bottom: 0.1em;">Gracias</h1>
<div class="questions">Preguntas?</div>
<div class="divider"></div>
<div class="meta" style="font-size:0.6em;">
<strong>Sergio Jiménez Jiménez</strong><br>
Máster Universitario en Inteligencia Artificial<br>
Universidad Internacional de La Rioja (UNIR) | 2025
</div>
</section>
</div><!-- .slides -->
</div><!-- .reveal -->
<script src="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.js"></script>
<script>
// Chart instances
const charts = {};
// UNIR Colors
const BLUE = '#0098CD';
const BLUE_DARK = '#007AA3';
const LIGHT = '#E6F4F9';
const RED = '#E8654A';
const ORANGE = '#F0A030';
const GREEN = '#2EAD4B';
const GRAY = '#CCCCCC';
// Common chart options
const commonOptions = {
responsive: true,
maintainAspectRatio: true,
animation: { duration: 1200, easing: 'easeOutQuart' },
plugins: { legend: { display: false } }
};
function createBenchmarkChart() {
const ctx = document.getElementById('chartBenchmark');
if (!ctx || charts.benchmark) return;
charts.benchmark = new Chart(ctx, {
type: 'bar',
data: {
labels: ['EasyOCR', 'PaddleOCR', 'DocTR'],
datasets: [
{
label: 'CER (%)',
data: [11.23, 7.76, 12.06],
backgroundColor: [GRAY, BLUE, GRAY],
borderColor: [GRAY, BLUE_DARK, GRAY],
borderWidth: 2,
borderRadius: 6,
barPercentage: 0.6
},
{
label: 'WER (%)',
data: [36.36, 11.62, 42.01],
backgroundColor: ['rgba(204,204,204,0.4)', 'rgba(0,152,205,0.4)', 'rgba(204,204,204,0.4)'],
borderColor: [GRAY, BLUE, GRAY],
borderWidth: 2,
borderRadius: 6,
barPercentage: 0.6
}
]
},
options: {
...commonOptions,
indexAxis: 'y',
plugins: {
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 12 } } }
},
scales: {
x: { title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } }, grid: { color: '#f0f0f0' } },
y: { grid: { display: false }, ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } } }
}
}
});
}
function createTrialsChart() {
const ctx = document.getElementById('chartTrials');
if (!ctx || charts.trials) return;
charts.trials = new Chart(ctx, {
type: 'doughnut',
data: {
labels: ['CER < 1%', 'CER 1-2%', 'CER 2-5%', 'CER 5-10%'],
datasets: [{
data: [15, 28, 10, 11],
backgroundColor: [BLUE_DARK, BLUE, '#7EC8E3', GRAY],
borderColor: 'white',
borderWidth: 3,
hoverOffset: 8
}]
},
options: {
...commonOptions,
cutout: '55%',
plugins: {
legend: {
display: true,
position: 'bottom',
labels: { font: { family: 'Calibri', size: 12 }, padding: 15, usePointStyle: true, pointStyle: 'rectRounded' }
}
}
}
});
}
function createTextlineChart() {
const ctx = document.getElementById('chartTextline');
if (!ctx || charts.textline) return;
charts.textline = new Chart(ctx, {
type: 'bar',
data: {
labels: ['False', 'True'],
datasets: [{
label: 'CER medio (%)',
data: [4.73, 1.74],
backgroundColor: [GRAY, BLUE],
borderColor: ['#aaa', BLUE_DARK],
borderWidth: 2,
borderRadius: 8,
barPercentage: 0.5
}]
},
options: {
...commonOptions,
scales: {
y: {
beginAtZero: true,
max: 6,
title: { display: true, text: 'CER (%)', font: { family: 'Calibri', size: 13 } },
grid: { color: '#f0f0f0' }
},
x: {
title: { display: true, text: 'textline_orientation', font: { family: 'Calibri', size: 13, weight: 'bold' } },
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 16, weight: 'bold' } }
}
},
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: (ctx) => `CER: ${ctx.parsed.y}%`
}
}
}
}
});
}
function createCorrelationChart() {
const ctx = document.getElementById('chartCorrelation');
if (!ctx || charts.correlation) return;
const params = [
'use_doc_unwarping',
'text_det_thresh',
'text_det_box_thresh',
'text_rec_score_thresh',
'textline_orientation',
'use_doc_orient_classify'
];
const values = [0.879, 0.428, 0.311, -0.268, -0.535, -0.712];
const colors = values.map(v => v > 0 ? RED : BLUE);
charts.correlation = new Chart(ctx, {
type: 'bar',
data: {
labels: params,
datasets: [{
data: values,
backgroundColor: colors,
borderColor: colors.map(c => c === RED ? '#C04030' : BLUE_DARK),
borderWidth: 1.5,
borderRadius: 4,
barPercentage: 0.65
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
min: -1, max: 1,
title: { display: true, text: 'Correlación Pearson', font: { family: 'Calibri', size: 11 } },
grid: { color: (ctx) => ctx.tick.value === 0 ? '#666' : '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
}
}
}
});
}
function createImportanceChart() {
const ctx = document.getElementById('chartImportance');
if (!ctx || charts.importance) return;
const params = [
'use_doc_unwarping',
'use_doc_orient_classify',
'textline_orientation',
'text_det_thresh',
'text_det_box_thresh',
'text_rec_score_thresh'
];
const values = [0.879, 0.712, 0.535, 0.428, 0.311, 0.268];
const colors = values.map((_, i) => {
const alpha = 1 - (i * 0.12);
return `rgba(0, 152, 205, ${alpha})`;
});
charts.importance = new Chart(ctx, {
type: 'bar',
data: {
labels: params,
datasets: [{
data: values,
backgroundColor: colors,
borderColor: BLUE_DARK,
borderWidth: 1,
borderRadius: 4,
barPercentage: 0.65
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
beginAtZero: true, max: 1,
title: { display: true, text: '|Correlación|', font: { family: 'Calibri', size: 11 } },
grid: { color: '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
}
}
}
});
}
function createValidationChart() {
const ctx = document.getElementById('chartValidation');
if (!ctx || charts.validation) return;
charts.validation = new Chart(ctx, {
type: 'bar',
data: {
labels: ['CER (45 pág)', 'WER (45 pág)', 'CER (mejor trial)'],
datasets: [
{
label: 'Baseline',
data: [8.85, 13.05, 7.76],
backgroundColor: 'rgba(204,204,204,0.7)',
borderColor: '#aaa',
borderWidth: 2,
borderRadius: 6,
barPercentage: 0.7
},
{
label: 'Optimizado',
data: [7.72, 11.40, 0.79],
backgroundColor: BLUE,
borderColor: BLUE_DARK,
borderWidth: 2,
borderRadius: 6,
barPercentage: 0.7
}
]
},
options: {
...commonOptions,
plugins: {
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 13 } } }
},
scales: {
y: {
beginAtZero: true,
title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } },
grid: { color: '#f0f0f0' }
},
x: {
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 12 } }
}
}
}
});
}
function createGPUChart() {
const ctx = document.getElementById('chartGPU');
if (!ctx || charts.gpu) return;
charts.gpu = new Chart(ctx, {
type: 'bar',
data: {
labels: ['CPU (Ryzen 7 5800H)', 'GPU (RTX 3060)'],
datasets: [{
label: 'Segundos por página',
data: [69.4, 0.84],
backgroundColor: [ORANGE, BLUE],
borderColor: ['#CC8020', BLUE_DARK],
borderWidth: 2,
borderRadius: 8,
barPercentage: 0.5
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
beginAtZero: true,
title: { display: true, text: 'Segundos por página', font: { family: 'Calibri', size: 13 } },
grid: { color: '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } }
}
},
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: (ctx) => `${ctx.parsed.x} s/página`
}
}
}
}
});
}
// Chart creation map
const chartCreators = {
benchmark: createBenchmarkChart,
trials: createTrialsChart,
textline: createTextlineChart,
correlations: () => { createCorrelationChart(); createImportanceChart(); },
validation: createValidationChart,
gpu: createGPUChart
};
// Initialize Reveal.js
Reveal.initialize({
hash: true,
slideNumber: 'c/t',
transition: 'slide',
transitionSpeed: 'default',
width: 1280,
height: 720,
margin: 0.06,
center: false,
controlsTutorial: false
});
// Create charts on slide change
Reveal.on('slidechanged', (event) => {
const chartType = event.currentSlide.dataset.chart;
if (chartType && chartCreators[chartType]) {
// Small delay to ensure canvas is visible before rendering
setTimeout(() => chartCreators[chartType](), 100);
}
});
// Also check initial slide
Reveal.on('ready', (event) => {
const chartType = event.currentSlide.dataset.chart;
if (chartType && chartCreators[chartType]) {
setTimeout(() => chartCreators[chartType](), 300);
}
});
</script>
</body>
</html>