1320 lines
44 KiB
HTML
1320 lines
44 KiB
HTML
|
|
<!DOCTYPE html>
|
|||
|
|
<html lang="es">
|
|||
|
|
<head>
|
|||
|
|
<meta charset="utf-8">
|
|||
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|||
|
|
<title>TFM - Optimización de Hiperparámetros OCR con Ray Tune</title>
|
|||
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.css">
|
|||
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/theme/white.min.css">
|
|||
|
|
<script src="https://cdn.jsdelivr.net/npm/chart.js@4"></script>
|
|||
|
|
<style>
|
|||
|
|
:root {
|
|||
|
|
--unir-blue: #0098CD;
|
|||
|
|
--unir-blue-dark: #007AA3;
|
|||
|
|
--unir-light: #E6F4F9;
|
|||
|
|
--unir-text: #404040;
|
|||
|
|
--unir-gray: #E7E6E6;
|
|||
|
|
--unir-red: #E8654A;
|
|||
|
|
--unir-orange: #F0A030;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
.reveal {
|
|||
|
|
font-family: 'Calibri', 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
|
|||
|
|
font-size: 28px;
|
|||
|
|
color: var(--unir-text);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
.reveal h1, .reveal h2, .reveal h3 {
|
|||
|
|
font-family: 'Calibri Light', 'Calibri', 'Segoe UI', Arial, sans-serif;
|
|||
|
|
color: var(--unir-blue);
|
|||
|
|
text-transform: none;
|
|||
|
|
letter-spacing: -0.02em;
|
|||
|
|
font-weight: 600;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
.reveal h1 { font-size: 1.8em; }
|
|||
|
|
.reveal h2 { font-size: 1.4em; margin-bottom: 0.6em; }
|
|||
|
|
.reveal h3 { font-size: 1.1em; }
|
|||
|
|
|
|||
|
|
.reveal .slides section {
|
|||
|
|
text-align: left;
|
|||
|
|
padding: 20px 40px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Corner logo on all slides except title */
|
|||
|
|
.reveal .slides section:not(.title-slide)::after {
|
|||
|
|
content: '';
|
|||
|
|
position: absolute;
|
|||
|
|
top: 15px;
|
|||
|
|
right: 20px;
|
|||
|
|
width: 110px;
|
|||
|
|
height: 30px;
|
|||
|
|
background: url('../instructions/plantilla_individual_files/image001.png') no-repeat center;
|
|||
|
|
background-size: contain;
|
|||
|
|
opacity: 0.8;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Bottom accent line */
|
|||
|
|
.reveal .slides section:not(.title-slide)::before {
|
|||
|
|
content: '';
|
|||
|
|
position: absolute;
|
|||
|
|
bottom: 0;
|
|||
|
|
left: 0;
|
|||
|
|
right: 0;
|
|||
|
|
height: 4px;
|
|||
|
|
background: linear-gradient(90deg, var(--unir-blue), var(--unir-light));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Title slide */
|
|||
|
|
.title-slide {
|
|||
|
|
text-align: center !important;
|
|||
|
|
}
|
|||
|
|
.title-slide h1 {
|
|||
|
|
font-size: 1.5em !important;
|
|||
|
|
line-height: 1.3;
|
|||
|
|
margin-top: 0.2em;
|
|||
|
|
}
|
|||
|
|
.title-slide .subtitle {
|
|||
|
|
color: var(--unir-blue);
|
|||
|
|
font-size: 0.75em;
|
|||
|
|
font-weight: 600;
|
|||
|
|
margin-bottom: 0.5em;
|
|||
|
|
text-transform: uppercase;
|
|||
|
|
letter-spacing: 0.1em;
|
|||
|
|
}
|
|||
|
|
.title-slide .meta {
|
|||
|
|
font-size: 0.65em;
|
|||
|
|
color: #666;
|
|||
|
|
line-height: 1.8;
|
|||
|
|
}
|
|||
|
|
.title-slide .meta strong {
|
|||
|
|
color: var(--unir-text);
|
|||
|
|
}
|
|||
|
|
.title-slide .logo-large {
|
|||
|
|
width: 220px;
|
|||
|
|
margin-bottom: 10px;
|
|||
|
|
}
|
|||
|
|
.title-slide .divider {
|
|||
|
|
width: 120px;
|
|||
|
|
height: 3px;
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
margin: 15px auto;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Thank you slide */
|
|||
|
|
.thanks-slide {
|
|||
|
|
text-align: center !important;
|
|||
|
|
}
|
|||
|
|
.thanks-slide h1 {
|
|||
|
|
font-size: 2.5em !important;
|
|||
|
|
margin-bottom: 0.3em;
|
|||
|
|
}
|
|||
|
|
.thanks-slide .questions {
|
|||
|
|
font-size: 1.2em;
|
|||
|
|
color: #666;
|
|||
|
|
margin-bottom: 1em;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Two column layout */
|
|||
|
|
.two-columns {
|
|||
|
|
display: grid;
|
|||
|
|
grid-template-columns: 1fr 1fr;
|
|||
|
|
gap: 30px;
|
|||
|
|
align-items: start;
|
|||
|
|
}
|
|||
|
|
.two-columns-60-40 {
|
|||
|
|
display: grid;
|
|||
|
|
grid-template-columns: 3fr 2fr;
|
|||
|
|
gap: 30px;
|
|||
|
|
align-items: start;
|
|||
|
|
}
|
|||
|
|
.two-columns-40-60 {
|
|||
|
|
display: grid;
|
|||
|
|
grid-template-columns: 2fr 3fr;
|
|||
|
|
gap: 30px;
|
|||
|
|
align-items: start;
|
|||
|
|
}
|
|||
|
|
.three-columns {
|
|||
|
|
display: grid;
|
|||
|
|
grid-template-columns: 1fr 1fr 1fr;
|
|||
|
|
gap: 20px;
|
|||
|
|
align-items: start;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Highlight box */
|
|||
|
|
.highlight-box {
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
border-left: 5px solid var(--unir-blue);
|
|||
|
|
padding: 15px 20px;
|
|||
|
|
border-radius: 0 8px 8px 0;
|
|||
|
|
margin: 15px 0;
|
|||
|
|
font-size: 0.85em;
|
|||
|
|
}
|
|||
|
|
.highlight-box.center-box {
|
|||
|
|
border-left: none;
|
|||
|
|
border-top: 3px solid var(--unir-blue);
|
|||
|
|
border-radius: 0 0 8px 8px;
|
|||
|
|
text-align: center;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Metric cards */
|
|||
|
|
.metric-cards {
|
|||
|
|
display: flex;
|
|||
|
|
gap: 15px;
|
|||
|
|
flex-wrap: wrap;
|
|||
|
|
justify-content: center;
|
|||
|
|
}
|
|||
|
|
.metric-card {
|
|||
|
|
background: white;
|
|||
|
|
border: 2px solid var(--unir-light);
|
|||
|
|
border-radius: 12px;
|
|||
|
|
padding: 15px 20px;
|
|||
|
|
text-align: center;
|
|||
|
|
min-width: 130px;
|
|||
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
|
|||
|
|
transition: transform 0.2s;
|
|||
|
|
}
|
|||
|
|
.metric-card:hover {
|
|||
|
|
transform: translateY(-2px);
|
|||
|
|
}
|
|||
|
|
.metric-card .number {
|
|||
|
|
font-size: 1.6em;
|
|||
|
|
font-weight: 700;
|
|||
|
|
color: var(--unir-blue);
|
|||
|
|
line-height: 1.2;
|
|||
|
|
}
|
|||
|
|
.metric-card .number.success { color: #2EAD4B; }
|
|||
|
|
.metric-card .number.warning { color: var(--unir-orange); }
|
|||
|
|
.metric-card .number.danger { color: var(--unir-red); }
|
|||
|
|
.metric-card .label {
|
|||
|
|
font-size: 0.6em;
|
|||
|
|
color: #888;
|
|||
|
|
margin-top: 4px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Data table */
|
|||
|
|
.data-table {
|
|||
|
|
width: 100%;
|
|||
|
|
border-collapse: collapse;
|
|||
|
|
font-size: 0.75em;
|
|||
|
|
margin: 10px 0;
|
|||
|
|
}
|
|||
|
|
.data-table thead th {
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
color: white;
|
|||
|
|
padding: 10px 14px;
|
|||
|
|
text-align: left;
|
|||
|
|
font-weight: 600;
|
|||
|
|
}
|
|||
|
|
.data-table thead th:first-child {
|
|||
|
|
border-radius: 8px 0 0 0;
|
|||
|
|
}
|
|||
|
|
.data-table thead th:last-child {
|
|||
|
|
border-radius: 0 8px 0 0;
|
|||
|
|
}
|
|||
|
|
.data-table tbody td {
|
|||
|
|
padding: 8px 14px;
|
|||
|
|
border-bottom: 1px solid var(--unir-gray);
|
|||
|
|
}
|
|||
|
|
.data-table tbody tr:nth-child(even) {
|
|||
|
|
background: #FAFCFE;
|
|||
|
|
}
|
|||
|
|
.data-table tbody tr.highlight {
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
font-weight: 600;
|
|||
|
|
}
|
|||
|
|
.data-table tbody tr:last-child td:first-child {
|
|||
|
|
border-radius: 0 0 0 8px;
|
|||
|
|
}
|
|||
|
|
.data-table tbody tr:last-child td:last-child {
|
|||
|
|
border-radius: 0 0 8px 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Engine cards */
|
|||
|
|
.engine-card {
|
|||
|
|
background: white;
|
|||
|
|
border: 2px solid var(--unir-gray);
|
|||
|
|
border-radius: 12px;
|
|||
|
|
padding: 18px;
|
|||
|
|
text-align: center;
|
|||
|
|
transition: all 0.3s;
|
|||
|
|
}
|
|||
|
|
.engine-card.selected {
|
|||
|
|
border-color: var(--unir-blue);
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
box-shadow: 0 4px 16px rgba(0,152,205,0.2);
|
|||
|
|
}
|
|||
|
|
.engine-card h3 {
|
|||
|
|
margin: 0 0 5px 0;
|
|||
|
|
font-size: 0.9em;
|
|||
|
|
}
|
|||
|
|
.engine-card .developer {
|
|||
|
|
font-size: 0.55em;
|
|||
|
|
color: #999;
|
|||
|
|
margin-bottom: 10px;
|
|||
|
|
}
|
|||
|
|
.engine-card .arch {
|
|||
|
|
font-size: 0.6em;
|
|||
|
|
color: var(--unir-text);
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
border-radius: 6px;
|
|||
|
|
padding: 4px 8px;
|
|||
|
|
display: inline-block;
|
|||
|
|
margin-bottom: 8px;
|
|||
|
|
}
|
|||
|
|
.engine-card.selected .arch {
|
|||
|
|
background: white;
|
|||
|
|
}
|
|||
|
|
.engine-card ul {
|
|||
|
|
text-align: left;
|
|||
|
|
font-size: 0.6em;
|
|||
|
|
margin: 0;
|
|||
|
|
padding-left: 16px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Agenda items */
|
|||
|
|
.agenda-item {
|
|||
|
|
display: flex;
|
|||
|
|
align-items: center;
|
|||
|
|
gap: 15px;
|
|||
|
|
margin: 12px 0;
|
|||
|
|
font-size: 0.85em;
|
|||
|
|
}
|
|||
|
|
.agenda-number {
|
|||
|
|
width: 36px;
|
|||
|
|
height: 36px;
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
color: white;
|
|||
|
|
border-radius: 50%;
|
|||
|
|
display: flex;
|
|||
|
|
align-items: center;
|
|||
|
|
justify-content: center;
|
|||
|
|
font-weight: 700;
|
|||
|
|
font-size: 0.9em;
|
|||
|
|
flex-shrink: 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Tags */
|
|||
|
|
.tag {
|
|||
|
|
display: inline-block;
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
color: var(--unir-blue-dark);
|
|||
|
|
border-radius: 20px;
|
|||
|
|
padding: 3px 12px;
|
|||
|
|
font-size: 0.6em;
|
|||
|
|
font-weight: 600;
|
|||
|
|
margin: 2px;
|
|||
|
|
}
|
|||
|
|
.tag.bool { background: #FFF3E0; color: #E65100; }
|
|||
|
|
.tag.cont { background: #E8F5E9; color: #2E7D32; }
|
|||
|
|
.tag.fixed { background: var(--unir-gray); color: #888; }
|
|||
|
|
|
|||
|
|
/* Chart containers */
|
|||
|
|
.chart-container {
|
|||
|
|
position: relative;
|
|||
|
|
width: 100%;
|
|||
|
|
max-height: 420px;
|
|||
|
|
}
|
|||
|
|
.chart-container canvas {
|
|||
|
|
max-height: 420px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Bullet lists */
|
|||
|
|
.reveal ul, .reveal ol {
|
|||
|
|
font-size: 0.8em;
|
|||
|
|
line-height: 1.6;
|
|||
|
|
}
|
|||
|
|
.reveal li {
|
|||
|
|
margin-bottom: 6px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Compact list */
|
|||
|
|
.compact-list { font-size: 0.7em; }
|
|||
|
|
.compact-list li { margin-bottom: 3px; }
|
|||
|
|
|
|||
|
|
/* Objective check */
|
|||
|
|
.obj-list {
|
|||
|
|
list-style: none;
|
|||
|
|
padding: 0;
|
|||
|
|
font-size: 0.72em;
|
|||
|
|
}
|
|||
|
|
.obj-list li {
|
|||
|
|
padding: 6px 0 6px 30px;
|
|||
|
|
position: relative;
|
|||
|
|
}
|
|||
|
|
.obj-list li::before {
|
|||
|
|
content: '';
|
|||
|
|
position: absolute;
|
|||
|
|
left: 0;
|
|||
|
|
top: 8px;
|
|||
|
|
width: 18px;
|
|||
|
|
height: 18px;
|
|||
|
|
border: 2px solid var(--unir-blue);
|
|||
|
|
border-radius: 50%;
|
|||
|
|
}
|
|||
|
|
.obj-list li.done::before {
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
box-shadow: inset 0 0 0 3px white;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Conclusion items */
|
|||
|
|
.contribution-item {
|
|||
|
|
display: flex;
|
|||
|
|
align-items: start;
|
|||
|
|
gap: 10px;
|
|||
|
|
margin: 8px 0;
|
|||
|
|
font-size: 0.72em;
|
|||
|
|
}
|
|||
|
|
.contribution-icon {
|
|||
|
|
width: 28px;
|
|||
|
|
height: 28px;
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
color: white;
|
|||
|
|
border-radius: 6px;
|
|||
|
|
display: flex;
|
|||
|
|
align-items: center;
|
|||
|
|
justify-content: center;
|
|||
|
|
font-size: 14px;
|
|||
|
|
flex-shrink: 0;
|
|||
|
|
}
|
|||
|
|
.limitation-icon {
|
|||
|
|
width: 28px;
|
|||
|
|
height: 28px;
|
|||
|
|
background: var(--unir-orange);
|
|||
|
|
color: white;
|
|||
|
|
border-radius: 6px;
|
|||
|
|
display: flex;
|
|||
|
|
align-items: center;
|
|||
|
|
justify-content: center;
|
|||
|
|
font-size: 14px;
|
|||
|
|
flex-shrink: 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Figure caption */
|
|||
|
|
.fig-caption {
|
|||
|
|
text-align: center;
|
|||
|
|
font-size: 0.55em;
|
|||
|
|
color: #999;
|
|||
|
|
font-style: italic;
|
|||
|
|
margin-top: 5px;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Comparison table for fine-tuning vs HPO */
|
|||
|
|
.compare-row {
|
|||
|
|
display: grid;
|
|||
|
|
grid-template-columns: 140px 1fr 1fr;
|
|||
|
|
gap: 0;
|
|||
|
|
font-size: 0.65em;
|
|||
|
|
}
|
|||
|
|
.compare-row.header > div {
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
color: white;
|
|||
|
|
padding: 8px 12px;
|
|||
|
|
font-weight: 600;
|
|||
|
|
}
|
|||
|
|
.compare-row > div {
|
|||
|
|
padding: 6px 12px;
|
|||
|
|
border-bottom: 1px solid var(--unir-gray);
|
|||
|
|
}
|
|||
|
|
.compare-row .label-col {
|
|||
|
|
font-weight: 600;
|
|||
|
|
background: #FAFCFE;
|
|||
|
|
}
|
|||
|
|
.compare-row .highlight-col {
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Slide number */
|
|||
|
|
.reveal .slide-number {
|
|||
|
|
color: var(--unir-blue);
|
|||
|
|
font-size: 14px;
|
|||
|
|
font-family: 'Calibri', sans-serif;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Progress bar */
|
|||
|
|
.reveal .progress span {
|
|||
|
|
background: var(--unir-blue);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Improvements arrow */
|
|||
|
|
.improvement {
|
|||
|
|
color: #2EAD4B;
|
|||
|
|
font-weight: 700;
|
|||
|
|
font-size: 0.8em;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Code/param names */
|
|||
|
|
code, .param {
|
|||
|
|
font-family: 'Consolas', 'Courier New', monospace;
|
|||
|
|
background: var(--unir-light);
|
|||
|
|
padding: 2px 6px;
|
|||
|
|
border-radius: 4px;
|
|||
|
|
font-size: 0.85em;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Section divider */
|
|||
|
|
.section-divider {
|
|||
|
|
text-align: center !important;
|
|||
|
|
}
|
|||
|
|
.section-divider h2 {
|
|||
|
|
font-size: 1.8em !important;
|
|||
|
|
}
|
|||
|
|
.section-divider .section-number {
|
|||
|
|
font-size: 3em;
|
|||
|
|
color: var(--unir-blue);
|
|||
|
|
opacity: 0.2;
|
|||
|
|
font-weight: 700;
|
|||
|
|
}
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<div class="reveal">
|
|||
|
|
<div class="slides">
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 1: TITLE ====== -->
|
|||
|
|
<section class="title-slide" data-transition="fade">
|
|||
|
|
<img src="../instructions/plantilla_individual_files/image001.png" class="logo-large" alt="UNIR">
|
|||
|
|
<div class="subtitle">Trabajo Fin de Máster</div>
|
|||
|
|
<h1>Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español</h1>
|
|||
|
|
<div class="divider"></div>
|
|||
|
|
<div class="meta">
|
|||
|
|
<strong>Sergio Jiménez Jiménez</strong><br>
|
|||
|
|
Director: Javier Rodrigo Villazón Terrazas<br>
|
|||
|
|
Máster Universitario en Inteligencia Artificial<br>
|
|||
|
|
2025
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 2: AGENDA ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Agenda</h2>
|
|||
|
|
<div class="agenda-item fragment fade-up"><div class="agenda-number">1</div><div>Motivación y planteamiento del problema</div></div>
|
|||
|
|
<div class="agenda-item fragment fade-up"><div class="agenda-number">2</div><div>Objetivos y estado del arte</div></div>
|
|||
|
|
<div class="agenda-item fragment fade-up"><div class="agenda-number">3</div><div>Metodología y arquitectura</div></div>
|
|||
|
|
<div class="agenda-item fragment fade-up"><div class="agenda-number">4</div><div>Resultados experimentales</div></div>
|
|||
|
|
<div class="agenda-item fragment fade-up"><div class="agenda-number">5</div><div>Conclusiones y trabajo futuro</div></div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 3: MOTIVATION ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Motivación</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div>
|
|||
|
|
<ul>
|
|||
|
|
<li>La digitalización documental es una <strong>necesidad estratégica</strong> para organizaciones</li>
|
|||
|
|
<li>OCR como puente entre el mundo físico y digital</li>
|
|||
|
|
<li>Documentos en español: caracteres especiales ausentes en conjuntos de entrenamiento internacionales</li>
|
|||
|
|
<li>Modelos preentrenados: <strong>rendimiento subóptimo</strong> fuera de benchmarks estándar</li>
|
|||
|
|
<li>Fine-tuning requiere infraestructura costosa y datos etiquetados</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.8em; text-align:center; margin-bottom: 10px;">Errores típicos en español</h3>
|
|||
|
|
<table class="data-table" style="font-size:0.85em;">
|
|||
|
|
<thead><tr><th>Original</th><th>OCR</th><th>Error</th></tr></thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr><td>más</td><td>mas</td><td>Pérdida de acento</td></tr>
|
|||
|
|
<tr><td>año</td><td>ano</td><td>Pérdida de eñe</td></tr>
|
|||
|
|
<tr><td>¿Cómo</td><td>Como</td><td>Signos especiales</td></tr>
|
|||
|
|
<tr><td>titulación</td><td>titulacióon</td><td>Duplicación</td></tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 4: PROBLEM STATEMENT ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Planteamiento del Problema</h2>
|
|||
|
|
<div class="highlight-box center-box" style="font-size:0.78em; margin-bottom: 20px;">
|
|||
|
|
<em>¿Es posible mejorar significativamente el rendimiento de modelos OCR preentrenados para documentos en español mediante la optimización sistemática de hiperparámetros, sin requerir fine-tuning?</em>
|
|||
|
|
</div>
|
|||
|
|
<div style="margin-top:10px;">
|
|||
|
|
<div class="compare-row header">
|
|||
|
|
<div></div><div>Fine-tuning completo</div><div>Optimización de hiperparámetros</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="compare-row">
|
|||
|
|
<div class="label-col">Datos</div><div>Miles de imágenes etiquetadas</div><div class="highlight-col">Subconjunto de validación</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="compare-row">
|
|||
|
|
<div class="label-col">Hardware</div><div>GPU alta memoria (>16 GB)</div><div class="highlight-col">CPU / GPU consumo</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="compare-row">
|
|||
|
|
<div class="label-col">Tiempo</div><div>Días / semanas</div><div class="highlight-col">Minutos / horas</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="compare-row">
|
|||
|
|
<div class="label-col">Expertise</div><div>Alto (ML avanzado)</div><div class="highlight-col">Bajo-medio</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="compare-row">
|
|||
|
|
<div class="label-col">Riesgo</div><div>Sobreajuste, catastrófico</div><div class="highlight-col">Limitado, reversible</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 5: OBJECTIVES ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Objetivos</h2>
|
|||
|
|
<div class="highlight-box" style="margin-bottom: 15px;">
|
|||
|
|
<strong>Objetivo general:</strong> Optimizar PaddleOCR para documentos académicos en español alcanzando un <strong>CER < 2%</strong> sin fine-tuning del modelo base.
|
|||
|
|
</div>
|
|||
|
|
<ul class="obj-list">
|
|||
|
|
<li class="done fragment fade-up"><strong>OE1:</strong> Comparar tres motores OCR open-source (EasyOCR, PaddleOCR, DocTR)</li>
|
|||
|
|
<li class="done fragment fade-up"><strong>OE2:</strong> Preparar dataset de evaluación de 45 páginas con ground truth</li>
|
|||
|
|
<li class="done fragment fade-up"><strong>OE3:</strong> Identificar hiperparámetros críticos mediante análisis de correlación</li>
|
|||
|
|
<li class="done fragment fade-up"><strong>OE4:</strong> Ejecutar 64 trials de optimización con Ray Tune + Optuna</li>
|
|||
|
|
<li class="done fragment fade-up"><strong>OE5:</strong> Validar la configuración optimizada frente al baseline</li>
|
|||
|
|
</ul>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 6: STATE OF THE ART ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Estado del Arte: Motores OCR</h2>
|
|||
|
|
<div class="three-columns" style="margin-bottom: 15px;">
|
|||
|
|
<div class="engine-card">
|
|||
|
|
<h3>EasyOCR</h3>
|
|||
|
|
<div class="developer">JaidedAI</div>
|
|||
|
|
<div class="arch">CRAFT + CRNN</div>
|
|||
|
|
<ul>
|
|||
|
|
<li>80+ idiomas</li>
|
|||
|
|
<li>Fácil de usar</li>
|
|||
|
|
<li>Baja configurabilidad</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
<div class="engine-card selected">
|
|||
|
|
<h3>PaddleOCR</h3>
|
|||
|
|
<div class="developer">Baidu / PaddlePaddle</div>
|
|||
|
|
<div class="arch">DB + SVTR (PP-OCRv5)</div>
|
|||
|
|
<ul>
|
|||
|
|
<li>Alta configurabilidad</li>
|
|||
|
|
<li>Pipeline modular</li>
|
|||
|
|
<li>Soporte español dedicado</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
<div class="engine-card">
|
|||
|
|
<h3>DocTR</h3>
|
|||
|
|
<div class="developer">Mindee</div>
|
|||
|
|
<div class="arch">DB/LinkNet + CRNN/SAR</div>
|
|||
|
|
<ul>
|
|||
|
|
<li>TF y PyTorch</li>
|
|||
|
|
<li>Soporte español limitado</li>
|
|||
|
|
<li>Rápido en inferencia</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<img src="figures/figura_1.png" alt="Pipeline OCR" style="width: 85%; display: block; margin: 0 auto;">
|
|||
|
|
<div class="fig-caption">Pipeline de un sistema OCR moderno</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 7: METHODOLOGY ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Metodología: 5 Fases</h2>
|
|||
|
|
<img src="figures/figura_3.png" alt="Metodología" style="width: 90%; display: block; margin: 0 auto 15px;">
|
|||
|
|
<div class="fig-caption" style="margin-bottom: 15px;">Fases de la metodología experimental</div>
|
|||
|
|
<div style="display: grid; grid-template-columns: repeat(5, 1fr); gap: 8px; font-size: 0.55em; text-align: center;">
|
|||
|
|
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">1</div>Preparación del dataset<br><span style="color:#999">PDF → 300 DPI + GT</span></div>
|
|||
|
|
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">2</div>Benchmark comparativo<br><span style="color:#999">3 motores, CER/WER</span></div>
|
|||
|
|
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">3</div>Espacio de búsqueda<br><span style="color:#999">7 hiperparámetros</span></div>
|
|||
|
|
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">4</div>Optimización<br><span style="color:#999">64 trials, TPE</span></div>
|
|||
|
|
<div class="fragment fade-up"><div class="agenda-number" style="width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;">5</div>Validación<br><span style="color:#999">45 páginas completas</span></div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 8: ARCHITECTURE ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Arquitectura: Microservicios Docker</h2>
|
|||
|
|
<div class="two-columns-60-40">
|
|||
|
|
<div>
|
|||
|
|
<img src="figures/figura_6.png" alt="Arquitectura" style="width: 100%; border-radius: 8px;">
|
|||
|
|
<div class="fig-caption">Arquitectura de microservicios para optimización OCR</div>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li><strong>Contenedor Ray Tune:</strong> Orquestador de trials (Optuna TPE)</li>
|
|||
|
|
<li><strong>Contenedor OCR:</strong> PaddleOCR con acceso GPU</li>
|
|||
|
|
<li><strong>Comunicación:</strong> REST API (HTTP POST /evaluate)</li>
|
|||
|
|
<li><strong>Respuesta:</strong> JSON {CER, WER, TIME}</li>
|
|||
|
|
<li><strong>Docker Compose:</strong> Despliegue reproducible</li>
|
|||
|
|
</ul>
|
|||
|
|
<div class="highlight-box" style="font-size:0.65em; margin-top:15px;">
|
|||
|
|
<strong>Hardware:</strong><br>
|
|||
|
|
RTX 3060 Laptop (5.66 GB VRAM)<br>
|
|||
|
|
AMD Ryzen 7 5800H<br>
|
|||
|
|
16 GB DDR4 | Ubuntu 24.04
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 9: SEARCH SPACE ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Espacio de Búsqueda: 7 Hiperparámetros</h2>
|
|||
|
|
<div class="two-columns-60-40">
|
|||
|
|
<div>
|
|||
|
|
<table class="data-table" style="font-size:0.72em;">
|
|||
|
|
<thead><tr><th>Parámetro</th><th>Tipo</th><th>Rango</th></tr></thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr><td><code>textline_orientation</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
|
|||
|
|
<tr><td><code>use_doc_orientation_classify</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
|
|||
|
|
<tr><td><code>use_doc_unwarping</code></td><td><span class="tag bool">Booleano</span></td><td>True / False</td></tr>
|
|||
|
|
<tr><td><code>text_det_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.50]</td></tr>
|
|||
|
|
<tr><td><code>text_det_box_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.90]</td></tr>
|
|||
|
|
<tr><td><code>text_rec_score_thresh</code></td><td><span class="tag cont">Continuo</span></td><td>[0.01, 0.99]</td></tr>
|
|||
|
|
<tr style="opacity:0.5"><td><code>text_det_unclip_ratio</code></td><td><span class="tag fixed">Fijo</span></td><td>0.0</td></tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<img src="figures/figura_2.png" alt="Ray Tune Cycle" style="width: 100%; border-radius: 8px;">
|
|||
|
|
<div class="fig-caption">Ciclo de optimización con Ray Tune y Optuna</div>
|
|||
|
|
<div class="highlight-box" style="font-size:0.62em; margin-top:10px;">
|
|||
|
|
<strong>Algoritmo:</strong> TPE (Tree-structured Parzen Estimator)<br>
|
|||
|
|
<strong>Trials:</strong> 64 | <strong>Concurrencia:</strong> 2 workers<br>
|
|||
|
|
<strong>Métrica:</strong> Minimizar CER
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 10: BENCHMARK ====== -->
|
|||
|
|
<section data-chart="benchmark">
|
|||
|
|
<h2>Resultados: Benchmark Comparativo</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartBenchmark"></canvas>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<table class="data-table" style="font-size:0.68em;">
|
|||
|
|
<thead><tr><th>Motor</th><th>CER</th><th>WER</th><th>s/pág</th><th>VRAM</th></tr></thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr><td>EasyOCR</td><td>11.23%</td><td>36.36%</td><td>1.88</td><td>~2 GB</td></tr>
|
|||
|
|
<tr class="highlight"><td><strong>PaddleOCR</strong></td><td><strong>7.76%</strong></td><td><strong>11.62%</strong></td><td>0.58</td><td>0.06 GB</td></tr>
|
|||
|
|
<tr><td>DocTR</td><td>12.06%</td><td>42.01%</td><td>0.50</td><td>~1 GB</td></tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<div class="highlight-box" style="font-size:0.65em; margin-top:15px;">
|
|||
|
|
<strong>PaddleOCR seleccionado:</strong> Mejor CER (7.76%) con el menor consumo de VRAM (0.06 GB) y alta configurabilidad.
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 11: 64 TRIALS ====== -->
|
|||
|
|
<section data-chart="trials">
|
|||
|
|
<h2>Resultados: 64 Trials de Optimización</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartTrials"></canvas>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<div class="metric-cards" style="flex-direction: column;">
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number success">0.79%</div>
|
|||
|
|
<div class="label">Mejor CER (Trial #1)</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number">0.87%</div>
|
|||
|
|
<div class="label">Mediana CER</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number warning">7.30%</div>
|
|||
|
|
<div class="label">Peor CER</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number">67.2%</div>
|
|||
|
|
<div class="label">Trials con CER < 2%</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="highlight-box" style="font-size:0.62em; margin-top:12px;">
|
|||
|
|
<strong>0 fallos</strong> en 64 trials<br>
|
|||
|
|
Tiempo total: <strong>~5 minutos</strong> (GPU)
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 12: KEY FINDING ====== -->
|
|||
|
|
<section data-chart="textline">
|
|||
|
|
<h2>Hallazgo Clave: <code>textline_orientation</code></h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartTextline"></canvas>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<div class="metric-card" style="width:100%; margin-bottom: 15px;">
|
|||
|
|
<div class="number success" style="font-size: 2em;">-63.2%</div>
|
|||
|
|
<div class="label" style="font-size:0.75em;">Reducción en CER</div>
|
|||
|
|
</div>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li>Un <strong>único parámetro booleano</strong> tiene mayor impacto que todos los umbrales numéricos combinados</li>
|
|||
|
|
<li><strong>Decisiones arquitecturales</strong> > ajustes numéricos finos</li>
|
|||
|
|
<li>Crítico para documentos con <strong>layouts complejos</strong> (índices, listas, encabezados)</li>
|
|||
|
|
<li>52 de 64 trials (81%) lo activaron automáticamente (Optuna aprendió rápido)</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 13: CORRELATIONS ====== -->
|
|||
|
|
<section data-chart="correlations">
|
|||
|
|
<h2>Análisis de Hiperparámetros</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.75em; text-align:center;">Correlación Pearson con CER</h3>
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartCorrelation"></canvas>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.75em; text-align:center;">Importancia de Hiperparámetros</h3>
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartImportance"></canvas>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="highlight-box" style="font-size:0.62em; margin-top:10px;">
|
|||
|
|
<strong>Insight:</strong> <code>use_doc_unwarping</code> (+0.88) es perjudicial en PDFs digitales (añade procesamiento innecesario). Los parámetros booleanos (arquitecturales) dominan sobre los umbrales numéricos.
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 14: VALIDATION ====== -->
|
|||
|
|
<section data-chart="validation">
|
|||
|
|
<h2>Validación: Baseline vs Optimizado</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartValidation"></canvas>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<table class="data-table" style="font-size:0.7em;">
|
|||
|
|
<thead><tr><th>Métrica</th><th>Baseline</th><th>Optimizado</th><th>Mejora</th></tr></thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr><td>CER (45 pág)</td><td>8.85%</td><td>7.72%</td><td class="improvement">-12.8%</td></tr>
|
|||
|
|
<tr><td>WER (45 pág)</td><td>13.05%</td><td>11.40%</td><td class="improvement">-12.6%</td></tr>
|
|||
|
|
<tr class="highlight"><td>CER (mejor trial, 5 pág)</td><td>7.76%</td><td>0.79%</td><td class="improvement">-89.8%</td></tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<div class="highlight-box" style="font-size:0.62em; margin-top:15px;">
|
|||
|
|
<strong>Nota:</strong> La diferencia entre el mejor trial (0.79%) y la validación completa (7.72%) evidencia <strong>sobreajuste</strong> al subconjunto de 5 páginas usado en la optimización. Un subconjunto más amplio (15-20 páginas) mejoraría la generalización.
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 15: GPU ACCELERATION ====== -->
|
|||
|
|
<section data-chart="gpu">
|
|||
|
|
<h2>Aceleración GPU</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<canvas id="chartGPU"></canvas>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<div class="metric-cards" style="flex-direction: column; gap: 12px;">
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number" style="font-size: 2.2em; color: var(--unir-blue);">82x</div>
|
|||
|
|
<div class="label" style="font-size:0.8em;">Factor de aceleración</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number">0.84 s</div>
|
|||
|
|
<div class="label">GPU: segundos por página</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="metric-card" style="width:100%;">
|
|||
|
|
<div class="number warning">69.4 s</div>
|
|||
|
|
<div class="label">CPU: segundos por página</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="highlight-box" style="font-size:0.62em; margin-top:12px;">
|
|||
|
|
64 trials × 5 páginas:<br>
|
|||
|
|
<strong>CPU:</strong> ~6.2 horas<br>
|
|||
|
|
<strong>GPU:</strong> ~5 minutos
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 16: OPTIMAL CONFIG ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Configuración Óptima</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div>
|
|||
|
|
<div style="background: #1e1e1e; color: #d4d4d4; border-radius: 10px; padding: 20px; font-family: Consolas, monospace; font-size: 0.6em; line-height: 1.7;">
|
|||
|
|
<span style="color:#569cd6">config_optimizada</span> = {<br>
|
|||
|
|
<span style="color:#9cdcfe">"textline_orientation"</span>: <span style="color:#4ec9b0">True</span>, <span style="color:#6a9955"> # CRÍTICO</span><br>
|
|||
|
|
<span style="color:#9cdcfe">"use_doc_orientation_classify"</span>: <span style="color:#4ec9b0">True</span>,<br>
|
|||
|
|
<span style="color:#9cdcfe">"use_doc_unwarping"</span>: <span style="color:#4ec9b0">False</span>, <span style="color:#6a9955"> # Innecesario</span><br>
|
|||
|
|
<span style="color:#9cdcfe">"text_det_thresh"</span>: <span style="color:#b5cea8">0.0462</span>,<br>
|
|||
|
|
<span style="color:#9cdcfe">"text_det_box_thresh"</span>: <span style="color:#b5cea8">0.4862</span>,<br>
|
|||
|
|
<span style="color:#9cdcfe">"text_det_unclip_ratio"</span>: <span style="color:#b5cea8">0.0</span>,<br>
|
|||
|
|
<span style="color:#9cdcfe">"text_rec_score_thresh"</span>: <span style="color:#b5cea8">0.5658</span>,<br>
|
|||
|
|
}
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.8em;">Insights clave</h3>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li class="fragment fade-up"><strong><code>textline_orientation = True</code></strong>: Parámetro más impactante (-63.2% CER)</li>
|
|||
|
|
<li class="fragment fade-up"><strong><code>use_doc_unwarping = False</code></strong>: Procesamiento innecesario para PDFs digitales</li>
|
|||
|
|
<li class="fragment fade-up"><strong><code>text_det_thresh</code> bajo</strong>: Captura más regiones de texto, reduce omisiones</li>
|
|||
|
|
<li class="fragment fade-up"><strong>Parámetros booleanos</strong> dominan sobre umbrales numéricos</li>
|
|||
|
|
</ul>
|
|||
|
|
<div class="highlight-box" style="font-size: 0.62em; margin-top:12px;">
|
|||
|
|
Esta configuración es directamente aplicable a otros documentos académicos en español con layouts similares.
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 17: CONCLUSIONS ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Conclusiones</h2>
|
|||
|
|
<div class="two-columns">
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.78em; margin-bottom:10px;">Contribuciones</h3>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="contribution-icon">1</div>
|
|||
|
|
<div><strong>Metodología reproducible</strong> para optimización de hiperparámetros OCR con código abierto</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="contribution-icon">2</div>
|
|||
|
|
<div><strong>Análisis sistemático</strong> de hiperparámetros PaddleOCR con correlaciones Pearson</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="contribution-icon">3</div>
|
|||
|
|
<div><strong>Configuración validada</strong> para documentos académicos en español (CER 0.79%)</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="contribution-icon">4</div>
|
|||
|
|
<div><strong>Infraestructura dockerizada</strong> reproducible con imágenes públicas</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.78em; margin-bottom:10px;">Limitaciones</h3>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="limitation-icon">!</div>
|
|||
|
|
<div>Un único tipo de documento (académico UNIR)</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="limitation-icon">!</div>
|
|||
|
|
<div>Corpus modesto (45 páginas)</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="limitation-icon">!</div>
|
|||
|
|
<div>Sobreajuste al subconjunto de optimización (5 páginas)</div>
|
|||
|
|
</div>
|
|||
|
|
<div class="contribution-item fragment fade-up">
|
|||
|
|
<div class="limitation-icon">!</div>
|
|||
|
|
<div><code>text_det_unclip_ratio</code> no explorado</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 18: FUTURE WORK ====== -->
|
|||
|
|
<section>
|
|||
|
|
<h2>Líneas de Trabajo Futuro</h2>
|
|||
|
|
<div class="three-columns" style="font-size:0.72em;">
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.95em;">Extensiones inmediatas</h3>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li>Validación cruzada en otros tipos de documentos (facturas, formularios, manuscritos)</li>
|
|||
|
|
<li>Subconjunto de optimización más amplio (15-20 páginas)</li>
|
|||
|
|
<li>Exploración de <code>text_det_unclip_ratio</code></li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.95em;">Líneas de investigación</h3>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li>Transfer learning de hiperparámetros entre dominios</li>
|
|||
|
|
<li>Optimización multi-objetivo (CER + WER + velocidad)</li>
|
|||
|
|
<li>Comparación rigurosa HPO vs fine-tuning</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
<div>
|
|||
|
|
<h3 style="font-size:0.95em;">Aplicaciones prácticas</h3>
|
|||
|
|
<ul class="compact-list">
|
|||
|
|
<li>Herramienta de configuración automática por tipo de documento</li>
|
|||
|
|
<li>Integración en pipelines de producción</li>
|
|||
|
|
<li>Benchmark público de OCR en español</li>
|
|||
|
|
</ul>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
<!-- ====== SLIDE 19: THANK YOU ====== -->
|
|||
|
|
<section class="title-slide thanks-slide" data-transition="fade">
|
|||
|
|
<img src="../instructions/plantilla_individual_files/image001.png" class="logo-large" style="width:180px; margin-bottom:20px;" alt="UNIR">
|
|||
|
|
<h1 style="margin-bottom: 0.1em;">Gracias</h1>
|
|||
|
|
<div class="questions">Preguntas?</div>
|
|||
|
|
<div class="divider"></div>
|
|||
|
|
<div class="meta" style="font-size:0.6em;">
|
|||
|
|
<strong>Sergio Jiménez Jiménez</strong><br>
|
|||
|
|
Máster Universitario en Inteligencia Artificial<br>
|
|||
|
|
Universidad Internacional de La Rioja (UNIR) | 2025
|
|||
|
|
</div>
|
|||
|
|
</section>
|
|||
|
|
|
|||
|
|
</div><!-- .slides -->
|
|||
|
|
</div><!-- .reveal -->
|
|||
|
|
|
|||
|
|
<script src="https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.js"></script>
|
|||
|
|
<script>
|
|||
|
|
// Chart instances
|
|||
|
|
const charts = {};
|
|||
|
|
|
|||
|
|
// UNIR Colors
|
|||
|
|
const BLUE = '#0098CD';
|
|||
|
|
const BLUE_DARK = '#007AA3';
|
|||
|
|
const LIGHT = '#E6F4F9';
|
|||
|
|
const RED = '#E8654A';
|
|||
|
|
const ORANGE = '#F0A030';
|
|||
|
|
const GREEN = '#2EAD4B';
|
|||
|
|
const GRAY = '#CCCCCC';
|
|||
|
|
|
|||
|
|
// Common chart options
|
|||
|
|
const commonOptions = {
|
|||
|
|
responsive: true,
|
|||
|
|
maintainAspectRatio: true,
|
|||
|
|
animation: { duration: 1200, easing: 'easeOutQuart' },
|
|||
|
|
plugins: { legend: { display: false } }
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
function createBenchmarkChart() {
|
|||
|
|
const ctx = document.getElementById('chartBenchmark');
|
|||
|
|
if (!ctx || charts.benchmark) return;
|
|||
|
|
charts.benchmark = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: ['EasyOCR', 'PaddleOCR', 'DocTR'],
|
|||
|
|
datasets: [
|
|||
|
|
{
|
|||
|
|
label: 'CER (%)',
|
|||
|
|
data: [11.23, 7.76, 12.06],
|
|||
|
|
backgroundColor: [GRAY, BLUE, GRAY],
|
|||
|
|
borderColor: [GRAY, BLUE_DARK, GRAY],
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 6,
|
|||
|
|
barPercentage: 0.6
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
label: 'WER (%)',
|
|||
|
|
data: [36.36, 11.62, 42.01],
|
|||
|
|
backgroundColor: ['rgba(204,204,204,0.4)', 'rgba(0,152,205,0.4)', 'rgba(204,204,204,0.4)'],
|
|||
|
|
borderColor: [GRAY, BLUE, GRAY],
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 6,
|
|||
|
|
barPercentage: 0.6
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
indexAxis: 'y',
|
|||
|
|
plugins: {
|
|||
|
|
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 12 } } }
|
|||
|
|
},
|
|||
|
|
scales: {
|
|||
|
|
x: { title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } }, grid: { color: '#f0f0f0' } },
|
|||
|
|
y: { grid: { display: false }, ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } } }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createTrialsChart() {
|
|||
|
|
const ctx = document.getElementById('chartTrials');
|
|||
|
|
if (!ctx || charts.trials) return;
|
|||
|
|
charts.trials = new Chart(ctx, {
|
|||
|
|
type: 'doughnut',
|
|||
|
|
data: {
|
|||
|
|
labels: ['CER < 1%', 'CER 1-2%', 'CER 2-5%', 'CER 5-10%'],
|
|||
|
|
datasets: [{
|
|||
|
|
data: [15, 28, 10, 11],
|
|||
|
|
backgroundColor: [BLUE_DARK, BLUE, '#7EC8E3', GRAY],
|
|||
|
|
borderColor: 'white',
|
|||
|
|
borderWidth: 3,
|
|||
|
|
hoverOffset: 8
|
|||
|
|
}]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
cutout: '55%',
|
|||
|
|
plugins: {
|
|||
|
|
legend: {
|
|||
|
|
display: true,
|
|||
|
|
position: 'bottom',
|
|||
|
|
labels: { font: { family: 'Calibri', size: 12 }, padding: 15, usePointStyle: true, pointStyle: 'rectRounded' }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createTextlineChart() {
|
|||
|
|
const ctx = document.getElementById('chartTextline');
|
|||
|
|
if (!ctx || charts.textline) return;
|
|||
|
|
charts.textline = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: ['False', 'True'],
|
|||
|
|
datasets: [{
|
|||
|
|
label: 'CER medio (%)',
|
|||
|
|
data: [4.73, 1.74],
|
|||
|
|
backgroundColor: [GRAY, BLUE],
|
|||
|
|
borderColor: ['#aaa', BLUE_DARK],
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 8,
|
|||
|
|
barPercentage: 0.5
|
|||
|
|
}]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
scales: {
|
|||
|
|
y: {
|
|||
|
|
beginAtZero: true,
|
|||
|
|
max: 6,
|
|||
|
|
title: { display: true, text: 'CER (%)', font: { family: 'Calibri', size: 13 } },
|
|||
|
|
grid: { color: '#f0f0f0' }
|
|||
|
|
},
|
|||
|
|
x: {
|
|||
|
|
title: { display: true, text: 'textline_orientation', font: { family: 'Calibri', size: 13, weight: 'bold' } },
|
|||
|
|
grid: { display: false },
|
|||
|
|
ticks: { font: { family: 'Calibri', size: 16, weight: 'bold' } }
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
plugins: {
|
|||
|
|
legend: { display: false },
|
|||
|
|
tooltip: {
|
|||
|
|
callbacks: {
|
|||
|
|
label: (ctx) => `CER: ${ctx.parsed.y}%`
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createCorrelationChart() {
|
|||
|
|
const ctx = document.getElementById('chartCorrelation');
|
|||
|
|
if (!ctx || charts.correlation) return;
|
|||
|
|
const params = [
|
|||
|
|
'use_doc_unwarping',
|
|||
|
|
'text_det_thresh',
|
|||
|
|
'text_det_box_thresh',
|
|||
|
|
'text_rec_score_thresh',
|
|||
|
|
'textline_orientation',
|
|||
|
|
'use_doc_orient_classify'
|
|||
|
|
];
|
|||
|
|
const values = [0.879, 0.428, 0.311, -0.268, -0.535, -0.712];
|
|||
|
|
const colors = values.map(v => v > 0 ? RED : BLUE);
|
|||
|
|
|
|||
|
|
charts.correlation = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: params,
|
|||
|
|
datasets: [{
|
|||
|
|
data: values,
|
|||
|
|
backgroundColor: colors,
|
|||
|
|
borderColor: colors.map(c => c === RED ? '#C04030' : BLUE_DARK),
|
|||
|
|
borderWidth: 1.5,
|
|||
|
|
borderRadius: 4,
|
|||
|
|
barPercentage: 0.65
|
|||
|
|
}]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
indexAxis: 'y',
|
|||
|
|
scales: {
|
|||
|
|
x: {
|
|||
|
|
min: -1, max: 1,
|
|||
|
|
title: { display: true, text: 'Correlación Pearson', font: { family: 'Calibri', size: 11 } },
|
|||
|
|
grid: { color: (ctx) => ctx.tick.value === 0 ? '#666' : '#f0f0f0' }
|
|||
|
|
},
|
|||
|
|
y: {
|
|||
|
|
grid: { display: false },
|
|||
|
|
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createImportanceChart() {
|
|||
|
|
const ctx = document.getElementById('chartImportance');
|
|||
|
|
if (!ctx || charts.importance) return;
|
|||
|
|
const params = [
|
|||
|
|
'use_doc_unwarping',
|
|||
|
|
'use_doc_orient_classify',
|
|||
|
|
'textline_orientation',
|
|||
|
|
'text_det_thresh',
|
|||
|
|
'text_det_box_thresh',
|
|||
|
|
'text_rec_score_thresh'
|
|||
|
|
];
|
|||
|
|
const values = [0.879, 0.712, 0.535, 0.428, 0.311, 0.268];
|
|||
|
|
const colors = values.map((_, i) => {
|
|||
|
|
const alpha = 1 - (i * 0.12);
|
|||
|
|
return `rgba(0, 152, 205, ${alpha})`;
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
charts.importance = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: params,
|
|||
|
|
datasets: [{
|
|||
|
|
data: values,
|
|||
|
|
backgroundColor: colors,
|
|||
|
|
borderColor: BLUE_DARK,
|
|||
|
|
borderWidth: 1,
|
|||
|
|
borderRadius: 4,
|
|||
|
|
barPercentage: 0.65
|
|||
|
|
}]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
indexAxis: 'y',
|
|||
|
|
scales: {
|
|||
|
|
x: {
|
|||
|
|
beginAtZero: true, max: 1,
|
|||
|
|
title: { display: true, text: '|Correlación|', font: { family: 'Calibri', size: 11 } },
|
|||
|
|
grid: { color: '#f0f0f0' }
|
|||
|
|
},
|
|||
|
|
y: {
|
|||
|
|
grid: { display: false },
|
|||
|
|
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createValidationChart() {
|
|||
|
|
const ctx = document.getElementById('chartValidation');
|
|||
|
|
if (!ctx || charts.validation) return;
|
|||
|
|
charts.validation = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: ['CER (45 pág)', 'WER (45 pág)', 'CER (mejor trial)'],
|
|||
|
|
datasets: [
|
|||
|
|
{
|
|||
|
|
label: 'Baseline',
|
|||
|
|
data: [8.85, 13.05, 7.76],
|
|||
|
|
backgroundColor: 'rgba(204,204,204,0.7)',
|
|||
|
|
borderColor: '#aaa',
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 6,
|
|||
|
|
barPercentage: 0.7
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
label: 'Optimizado',
|
|||
|
|
data: [7.72, 11.40, 0.79],
|
|||
|
|
backgroundColor: BLUE,
|
|||
|
|
borderColor: BLUE_DARK,
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 6,
|
|||
|
|
barPercentage: 0.7
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
plugins: {
|
|||
|
|
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 13 } } }
|
|||
|
|
},
|
|||
|
|
scales: {
|
|||
|
|
y: {
|
|||
|
|
beginAtZero: true,
|
|||
|
|
title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } },
|
|||
|
|
grid: { color: '#f0f0f0' }
|
|||
|
|
},
|
|||
|
|
x: {
|
|||
|
|
grid: { display: false },
|
|||
|
|
ticks: { font: { family: 'Calibri', size: 12 } }
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function createGPUChart() {
|
|||
|
|
const ctx = document.getElementById('chartGPU');
|
|||
|
|
if (!ctx || charts.gpu) return;
|
|||
|
|
charts.gpu = new Chart(ctx, {
|
|||
|
|
type: 'bar',
|
|||
|
|
data: {
|
|||
|
|
labels: ['CPU (Ryzen 7 5800H)', 'GPU (RTX 3060)'],
|
|||
|
|
datasets: [{
|
|||
|
|
label: 'Segundos por página',
|
|||
|
|
data: [69.4, 0.84],
|
|||
|
|
backgroundColor: [ORANGE, BLUE],
|
|||
|
|
borderColor: ['#CC8020', BLUE_DARK],
|
|||
|
|
borderWidth: 2,
|
|||
|
|
borderRadius: 8,
|
|||
|
|
barPercentage: 0.5
|
|||
|
|
}]
|
|||
|
|
},
|
|||
|
|
options: {
|
|||
|
|
...commonOptions,
|
|||
|
|
indexAxis: 'y',
|
|||
|
|
scales: {
|
|||
|
|
x: {
|
|||
|
|
beginAtZero: true,
|
|||
|
|
title: { display: true, text: 'Segundos por página', font: { family: 'Calibri', size: 13 } },
|
|||
|
|
grid: { color: '#f0f0f0' }
|
|||
|
|
},
|
|||
|
|
y: {
|
|||
|
|
grid: { display: false },
|
|||
|
|
ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } }
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
plugins: {
|
|||
|
|
legend: { display: false },
|
|||
|
|
tooltip: {
|
|||
|
|
callbacks: {
|
|||
|
|
label: (ctx) => `${ctx.parsed.x} s/página`
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Chart creation map
|
|||
|
|
const chartCreators = {
|
|||
|
|
benchmark: createBenchmarkChart,
|
|||
|
|
trials: createTrialsChart,
|
|||
|
|
textline: createTextlineChart,
|
|||
|
|
correlations: () => { createCorrelationChart(); createImportanceChart(); },
|
|||
|
|
validation: createValidationChart,
|
|||
|
|
gpu: createGPUChart
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// Initialize Reveal.js
|
|||
|
|
Reveal.initialize({
|
|||
|
|
hash: true,
|
|||
|
|
slideNumber: 'c/t',
|
|||
|
|
transition: 'slide',
|
|||
|
|
transitionSpeed: 'default',
|
|||
|
|
width: 1280,
|
|||
|
|
height: 720,
|
|||
|
|
margin: 0.06,
|
|||
|
|
center: false,
|
|||
|
|
controlsTutorial: false
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// Create charts on slide change
|
|||
|
|
Reveal.on('slidechanged', (event) => {
|
|||
|
|
const chartType = event.currentSlide.dataset.chart;
|
|||
|
|
if (chartType && chartCreators[chartType]) {
|
|||
|
|
// Small delay to ensure canvas is visible before rendering
|
|||
|
|
setTimeout(() => chartCreators[chartType](), 100);
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// Also check initial slide
|
|||
|
|
Reveal.on('ready', (event) => {
|
|||
|
|
const chartType = event.currentSlide.dataset.chart;
|
|||
|
|
if (chartType && chartCreators[chartType]) {
|
|||
|
|
setTimeout(() => chartCreators[chartType](), 300);
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
</script>
|
|||
|
|
</body>
|
|||
|
|
</html>
|