2026-04-19 13:34:48 +02:00
<!DOCTYPE html>
< html lang = "es" >
< head >
< meta charset = "utf-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" >
< title > TFM - Optimización de Hiperparámetros OCR con Ray Tune< / title >
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.css" >
< link rel = "stylesheet" href = "https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/theme/white.min.css" >
< script src = "https://cdn.jsdelivr.net/npm/chart.js@4" > < / script >
< style >
:root {
--unir-blue: #0098CD;
--unir-blue-dark: #007AA3;
2026-04-21 20:46:05 +02:00
--unir-blue-deeper: #005F73;
2026-04-19 13:34:48 +02:00
--unir-light: #E6F4F9;
2026-04-21 20:46:05 +02:00
--unir-text: #2D3B45;
--unir-text-light: #555;
2026-04-19 13:34:48 +02:00
--unir-gray: #E7E6E6;
--unir-red: #E8654A;
2026-04-21 20:46:05 +02:00
--unir-orange: #E8832A;
--unir-green: #2EAD4B;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== BASE ===== */
2026-04-19 13:34:48 +02:00
.reveal {
font-family: 'Calibri', 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
font-size: 28px;
color: var(--unir-text);
}
.reveal h1, .reveal h2, .reveal h3 {
2026-04-21 20:46:05 +02:00
font-family: 'Calibri', 'Segoe UI', Arial, sans-serif;
2026-04-19 13:34:48 +02:00
color: var(--unir-blue);
text-transform: none;
2026-04-21 20:46:05 +02:00
letter-spacing: -0.01em;
font-weight: 700;
2026-04-19 13:34:48 +02:00
}
.reveal h1 { font-size: 1.8em; }
2026-04-21 20:46:05 +02:00
.reveal h2 {
font-size: 1.3em;
margin-bottom: 0.5em;
border-left: 5px solid var(--unir-blue);
padding-left: 15px;
line-height: 1.3;
}
.reveal h3 { font-size: 1em; color: var(--unir-text); }
2026-04-19 13:34:48 +02:00
.reveal .slides section {
text-align: left;
2026-04-21 20:46:05 +02:00
padding: 0;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* Content slides with footer use flex column */
.reveal .slides section:not(.title-slide):not(.index-slide):not(.thanks-slide) {
display: flex !important;
flex-direction: column !important;
height: 100% !important;
}
.slide-body {
flex: 1 1 0;
min-height: 0;
overflow: hidden;
padding: 20px 50px 5px 50px;
}
/* Scale images/charts to fit available space */
.slide-body img {
max-height: 100%;
object-fit: contain;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== SLIDE FOOTER — white bar at bottom ===== */
.slide-footer {
flex-shrink: 0;
height: 80px;
background: white;
display: flex;
justify-content: space-between;
align-items: center;
padding: 0 50px;
width: 100%;
box-sizing: border-box;
}
.slide-footer .footer-logo {
height: 42px;
}
.slide-footer .footer-page {
font-size: 18px;
color: #b0b0b0;
border-left: 4px solid var(--unir-blue);
padding-left: 12px;
line-height: 1.2;
text-align: right;
}
.slide-footer .footer-page .pag-label {
font-weight: 400;
}
.slide-footer .footer-page .pag-number {
font-weight: 700;
font-size: 1.4em;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== TITLE SLIDE ===== */
2026-04-19 13:34:48 +02:00
.title-slide {
2026-04-21 20:46:05 +02:00
background: var(--unir-blue) !important;
2026-04-19 13:34:48 +02:00
text-align: center !important;
2026-04-21 20:46:05 +02:00
padding: 0 !important;
display: flex !important;
flex-direction: column !important;
height: 100% !important;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
.title-slide .title-content {
flex: 1;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
padding: 40px 80px 0;
position: relative;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
.title-slide h1 {
color: white !important;
font-size: 2.2em !important;
font-weight: 700;
line-height: 1.2;
margin: 0 auto;
max-width: 80%;
border-left: none;
padding-left: 0;
2026-04-19 13:34:48 +02:00
}
.title-slide .meta {
2026-04-21 20:46:05 +02:00
color: rgba(255,255,255,0.85);
font-size: 0.7em;
line-height: 1.6;
margin-top: 20px;
2026-04-19 13:34:48 +02:00
}
.title-slide .meta strong {
2026-04-21 20:46:05 +02:00
color: white;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
.title-slide .title-footer {
height: 80px;
background: white;
display: flex;
align-items: center;
padding: 0 50px;
width: 100%;
flex-shrink: 0;
}
.title-slide .title-footer img {
height: 42px;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* Corner brackets */
.corner-tl,
.corner-br {
position: absolute;
width: 60px;
height: 60px;
}
.title-slide .corner-tl {
top: 15%;
left: 22%;
border-top: 4px solid rgba(255,255,255,0.5);
border-left: 4px solid rgba(255,255,255,0.5);
}
.title-slide .corner-br {
bottom: 22%;
right: 22%;
border-bottom: 4px solid rgba(255,255,255,0.5);
border-right: 4px solid rgba(255,255,255,0.5);
}
/* ===== INDEX / AGENDA SLIDE ===== */
.index-slide {
padding: 0 !important;
display: grid !important;
grid-template-columns: 42% 58%;
height: 100% !important;
}
.index-slide .index-left {
2026-04-19 13:34:48 +02:00
background: var(--unir-blue);
2026-04-21 20:46:05 +02:00
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
padding: 40px;
}
.index-slide .index-left h2 {
color: white !important;
font-size: 3em !important;
font-weight: 700;
border-left: none;
padding-left: 0;
text-align: center;
margin: 0;
letter-spacing: 0.02em;
}
.index-slide .index-right {
display: flex;
flex-direction: column;
justify-content: center;
padding: 40px 50px;
}
.index-item {
display: flex;
align-items: baseline;
gap: 12px;
margin: 16px 0;
font-size: 0.85em;
}
.index-item .index-num {
font-size: 1.1em;
font-weight: 700;
color: #bbb;
min-width: 30px;
}
.index-item .index-text {
font-weight: 700;
color: var(--unir-text);
font-size: 1.05em;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== THANKS SLIDE ===== */
2026-04-19 13:34:48 +02:00
.thanks-slide {
text-align: center !important;
2026-04-21 20:46:05 +02:00
padding: 0 !important;
background: white !important;
display: flex !important;
flex-direction: column !important;
height: 100% !important;
}
.thanks-slide .thanks-content {
flex: 1;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
position: relative;
2026-04-19 13:34:48 +02:00
}
.thanks-slide h1 {
2026-04-21 20:46:05 +02:00
color: var(--unir-blue) !important;
font-size: 3em !important;
font-weight: 700;
line-height: 1.1;
text-transform: lowercase;
border-left: none;
padding-left: 0;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
.thanks-slide .corner-tl {
top: 30%;
left: 28%;
border-top: 3px solid var(--unir-blue);
border-left: 3px solid var(--unir-blue);
opacity: 0.4;
}
.thanks-slide .corner-br {
bottom: 30%;
right: 28%;
border-bottom: 3px solid var(--unir-blue);
border-right: 3px solid var(--unir-blue);
opacity: 0.4;
}
.thanks-slide .title-footer {
height: 80px;
background: white;
display: flex;
align-items: center;
padding: 0 50px;
width: 100%;
flex-shrink: 0;
}
.thanks-slide .title-footer img {
height: 42px;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== TWO COLUMN LAYOUTS ===== */
2026-04-19 13:34:48 +02:00
.two-columns {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
align-items: start;
2026-04-21 20:46:05 +02:00
overflow: hidden;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
.two-columns > * { min-width: 0; }
2026-04-19 13:34:48 +02:00
.two-columns-60-40 {
display: grid;
grid-template-columns: 3fr 2fr;
gap: 30px;
align-items: start;
}
.two-columns-40-60 {
display: grid;
grid-template-columns: 2fr 3fr;
gap: 30px;
align-items: start;
}
.three-columns {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 20px;
align-items: start;
}
2026-04-21 20:46:05 +02:00
/* ===== HIGHLIGHT BOX ===== */
2026-04-19 13:34:48 +02:00
.highlight-box {
background: var(--unir-light);
border-left: 5px solid var(--unir-blue);
padding: 15px 20px;
2026-04-21 20:46:05 +02:00
border-radius: 0;
2026-04-19 13:34:48 +02:00
margin: 15px 0;
font-size: 0.85em;
}
.highlight-box.center-box {
border-left: none;
border-top: 3px solid var(--unir-blue);
text-align: center;
}
2026-04-21 20:46:05 +02:00
/* ===== METRIC CARDS ===== */
2026-04-19 13:34:48 +02:00
.metric-cards {
display: flex;
gap: 15px;
flex-wrap: wrap;
justify-content: center;
}
.metric-card {
background: white;
2026-04-21 20:46:05 +02:00
border: 2px solid var(--unir-gray);
border-radius: 4px;
2026-04-19 13:34:48 +02:00
padding: 15px 20px;
text-align: center;
min-width: 130px;
2026-04-21 20:46:05 +02:00
box-shadow: 0 2px 6px rgba(0,0,0,0.04);
2026-04-19 13:34:48 +02:00
}
.metric-card .number {
font-size: 1.6em;
font-weight: 700;
color: var(--unir-blue);
line-height: 1.2;
}
2026-04-21 20:46:05 +02:00
.metric-card .number.success { color: var(--unir-green); }
2026-04-19 13:34:48 +02:00
.metric-card .number.warning { color: var(--unir-orange); }
.metric-card .number.danger { color: var(--unir-red); }
.metric-card .label {
font-size: 0.6em;
color: #888;
margin-top: 4px;
}
2026-04-21 20:46:05 +02:00
/* ===== DATA TABLE ===== */
2026-04-19 13:34:48 +02:00
.data-table {
width: 100%;
border-collapse: collapse;
font-size: 0.75em;
margin: 10px 0;
}
.data-table thead th {
background: var(--unir-blue);
color: white;
padding: 10px 14px;
text-align: left;
font-weight: 600;
}
.data-table tbody td {
padding: 8px 14px;
border-bottom: 1px solid var(--unir-gray);
}
.data-table tbody tr:nth-child(even) {
2026-04-21 20:46:05 +02:00
background: #F5F9FC;
2026-04-19 13:34:48 +02:00
}
.data-table tbody tr.highlight {
background: var(--unir-light);
font-weight: 600;
}
2026-04-21 20:46:05 +02:00
/* ===== ENGINE CARDS ===== */
2026-04-19 13:34:48 +02:00
.engine-card {
background: white;
border: 2px solid var(--unir-gray);
2026-04-21 20:46:05 +02:00
border-radius: 4px;
2026-04-19 13:34:48 +02:00
padding: 18px;
text-align: center;
}
.engine-card.selected {
border-color: var(--unir-blue);
background: var(--unir-light);
2026-04-21 20:46:05 +02:00
box-shadow: 0 2px 12px rgba(0,152,205,0.15);
2026-04-19 13:34:48 +02:00
}
.engine-card h3 {
margin: 0 0 5px 0;
font-size: 0.9em;
}
.engine-card .developer {
font-size: 0.55em;
color: #999;
margin-bottom: 10px;
}
.engine-card .arch {
font-size: 0.6em;
color: var(--unir-text);
background: var(--unir-light);
2026-04-21 20:46:05 +02:00
border-radius: 4px;
2026-04-19 13:34:48 +02:00
padding: 4px 8px;
display: inline-block;
margin-bottom: 8px;
}
2026-04-21 20:46:05 +02:00
.engine-card.selected .arch { background: white; }
2026-04-19 13:34:48 +02:00
.engine-card ul {
text-align: left;
font-size: 0.6em;
margin: 0;
padding-left: 16px;
}
2026-04-21 20:46:05 +02:00
/* ===== AGENDA ITEMS (for methodology etc) ===== */
2026-04-19 13:34:48 +02:00
.agenda-item {
display: flex;
align-items: center;
gap: 15px;
margin: 12px 0;
font-size: 0.85em;
}
.agenda-number {
width: 36px;
height: 36px;
background: var(--unir-blue);
color: white;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
font-size: 0.9em;
flex-shrink: 0;
}
2026-04-21 20:46:05 +02:00
/* ===== TAGS ===== */
2026-04-19 13:34:48 +02:00
.tag {
display: inline-block;
background: var(--unir-light);
color: var(--unir-blue-dark);
2026-04-21 20:46:05 +02:00
border-radius: 4px;
2026-04-19 13:34:48 +02:00
padding: 3px 12px;
font-size: 0.6em;
font-weight: 600;
margin: 2px;
}
.tag.bool { background: #FFF3E0; color: #E65100; }
.tag.cont { background: #E8F5E9; color: #2E7D32; }
.tag.fixed { background: var(--unir-gray); color: #888; }
2026-04-21 20:46:05 +02:00
/* ===== CHART CONTAINERS ===== */
2026-04-19 13:34:48 +02:00
.chart-container {
position: relative;
width: 100%;
2026-04-21 20:46:05 +02:00
height: 400px;
max-width: 100%;
overflow: hidden;
2026-04-19 13:34:48 +02:00
}
.chart-container canvas {
2026-04-21 20:46:05 +02:00
width: 100% !important;
height: 100% !important;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== LISTS ===== */
2026-04-19 13:34:48 +02:00
.reveal ul, .reveal ol {
font-size: 0.8em;
line-height: 1.6;
}
2026-04-21 20:46:05 +02:00
.reveal li { margin-bottom: 6px; }
2026-04-19 13:34:48 +02:00
.compact-list { font-size: 0.7em; }
.compact-list li { margin-bottom: 3px; }
2026-04-21 20:46:05 +02:00
/* ===== OBJECTIVE LIST ===== */
2026-04-19 13:34:48 +02:00
.obj-list {
2026-04-21 20:46:05 +02:00
list-style: none !important;
padding: 0 !important;
2026-04-19 13:34:48 +02:00
font-size: 0.72em;
}
.obj-list li {
padding: 6px 0 6px 30px;
position: relative;
}
.obj-list li::before {
content: '';
position: absolute;
left: 0;
top: 8px;
width: 18px;
height: 18px;
border: 2px solid var(--unir-blue);
border-radius: 50%;
}
.obj-list li.done::before {
background: var(--unir-blue);
box-shadow: inset 0 0 0 3px white;
}
2026-04-21 20:46:05 +02:00
/* ===== CONTRIBUTION / LIMITATION ITEMS ===== */
2026-04-19 13:34:48 +02:00
.contribution-item {
display: flex;
align-items: start;
gap: 10px;
margin: 8px 0;
font-size: 0.72em;
}
.contribution-icon {
width: 28px;
height: 28px;
background: var(--unir-blue);
color: white;
2026-04-21 20:46:05 +02:00
border-radius: 4px;
2026-04-19 13:34:48 +02:00
display: flex;
align-items: center;
justify-content: center;
font-size: 14px;
flex-shrink: 0;
2026-04-21 20:46:05 +02:00
font-weight: 700;
2026-04-19 13:34:48 +02:00
}
.limitation-icon {
width: 28px;
height: 28px;
background: var(--unir-orange);
color: white;
2026-04-21 20:46:05 +02:00
border-radius: 4px;
2026-04-19 13:34:48 +02:00
display: flex;
align-items: center;
justify-content: center;
font-size: 14px;
flex-shrink: 0;
2026-04-21 20:46:05 +02:00
font-weight: 700;
2026-04-19 13:34:48 +02:00
}
2026-04-21 20:46:05 +02:00
/* ===== FIGURE CAPTION ===== */
2026-04-19 13:34:48 +02:00
.fig-caption {
text-align: center;
font-size: 0.55em;
color: #999;
font-style: italic;
margin-top: 5px;
}
2026-04-21 20:46:05 +02:00
/* ===== COMPARISON TABLE ===== */
2026-04-19 13:34:48 +02:00
.compare-row {
display: grid;
grid-template-columns: 140px 1fr 1fr;
gap: 0;
font-size: 0.65em;
}
.compare-row.header > div {
background: var(--unir-blue);
color: white;
padding: 8px 12px;
font-weight: 600;
}
.compare-row > div {
padding: 6px 12px;
border-bottom: 1px solid var(--unir-gray);
}
.compare-row .label-col {
font-weight: 600;
background: #FAFCFE;
}
.compare-row .highlight-col {
background: var(--unir-light);
}
2026-04-21 20:46:05 +02:00
/* ===== SLIDE NUMBER ===== */
.reveal .slide-number { display: none; }
.reveal .progress span { background: var(--unir-blue); }
2026-04-19 13:34:48 +02:00
2026-04-21 20:46:05 +02:00
/* ===== MISC ===== */
2026-04-19 13:34:48 +02:00
.improvement {
2026-04-21 20:46:05 +02:00
color: var(--unir-green);
2026-04-19 13:34:48 +02:00
font-weight: 700;
font-size: 0.8em;
}
code, .param {
font-family: 'Consolas', 'Courier New', monospace;
background: var(--unir-light);
padding: 2px 6px;
2026-04-21 20:46:05 +02:00
border-radius: 3px;
2026-04-19 13:34:48 +02:00
font-size: 0.85em;
}
.section-divider {
text-align: center !important;
}
.section-divider h2 {
font-size: 1.8em !important;
2026-04-21 20:46:05 +02:00
border-left: none;
padding-left: 0;
2026-04-19 13:34:48 +02:00
}
.section-divider .section-number {
font-size: 3em;
color: var(--unir-blue);
opacity: 0.2;
font-weight: 700;
}
2026-04-21 20:46:05 +02:00
/* ===== PRINT PDF ===== */
@page {
size: 1280px 720px;
margin: 0;
}
@media print {
* { -webkit-print-color-adjust: exact !important; print-color-adjust: exact !important; }
.reveal .slides section {
position: relative !important;
page-break-after: always;
page-break-inside: avoid;
width: 1280px !important;
min-height: 720px !important;
height: 720px !important;
max-height: 720px !important;
overflow: hidden !important;
box-sizing: border-box !important;
}
.slide-body {
overflow: hidden !important;
max-height: 620px !important;
}
.slide-body .two-columns,
.slide-body .two-columns-60-40,
.slide-body .two-columns-40-60,
.slide-body .three-columns {
overflow: hidden !important;
}
.slide-footer {
position: absolute !important;
bottom: 0 !important;
left: 0 !important;
right: 0 !important;
height: 80px !important;
display: flex !important;
background: white !important;
}
.title-slide .title-footer,
.thanks-slide .title-footer {
position: absolute !important;
bottom: 0 !important;
left: 0 !important;
right: 0 !important;
height: 80px !important;
display: flex !important;
background: white !important;
}
.thanks-slide .thanks-content,
.title-slide .title-content {
position: absolute !important;
top: 0 !important;
left: 0 !important;
right: 0 !important;
bottom: 80px !important;
display: flex !important;
flex-direction: column !important;
justify-content: center !important;
align-items: center !important;
}
/* Show fragments in print */
.fragment { opacity: 1 !important; visibility: visible !important; }
}
2026-04-19 13:34:48 +02:00
< / style >
< / head >
< body >
< div class = "reveal" >
< div class = "slides" >
<!-- ====== SLIDE 1: TITLE ====== -->
< section class = "title-slide" data-transition = "fade" >
2026-04-21 20:46:05 +02:00
< div class = "title-content" >
< div class = "corner-tl" > < / div >
< div class = "corner-br" > < / div >
< h1 > Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español< / h1 >
< div class = "meta" >
< strong > Sergio Jiménez Jiménez< / strong > < br >
Director: Javier Rodrigo Villazón Terrazas< br >
Máster Universitario en Inteligencia Artificial< br >
2025
< / div >
< / div >
< div class = "title-footer" >
< img src = "figures/unir_logo.png" alt = "UNIR" >
2026-04-19 13:34:48 +02:00
< / div >
< / section >
2026-04-21 20:46:05 +02:00
<!-- ====== SLIDE 2: INDEX ====== -->
< section class = "index-slide" data-transition = "fade" >
< div class = "index-left" >
< h2 > ÍNDICE< / h2 >
< / div >
< div class = "index-right" >
< div class = "index-item" > < span class = "index-num" > 01< / span > < span class = "index-text" > Motivación y planteamiento del problema< / span > < / div >
< div class = "index-item" > < span class = "index-num" > 02< / span > < span class = "index-text" > Objetivos y estado del arte< / span > < / div >
< div class = "index-item" > < span class = "index-num" > 03< / span > < span class = "index-text" > Metodología y arquitectura< / span > < / div >
< div class = "index-item" > < span class = "index-num" > 04< / span > < span class = "index-text" > Resultados experimentales< / span > < / div >
< div class = "index-item" > < span class = "index-num" > 05< / span > < span class = "index-text" > Conclusiones y trabajo futuro< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 3: MOTIVATION ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Motivación< / h2 >
< div class = "two-columns" >
< div >
< ul >
< li > La digitalización documental es una < strong > necesidad estratégica< / strong > para organizaciones< / li >
< li > OCR como puente entre el mundo físico y digital< / li >
< li > Documentos en español: caracteres especiales ausentes en conjuntos de entrenamiento internacionales< / li >
< li > Modelos preentrenados: < strong > rendimiento subóptimo< / strong > fuera de benchmarks estándar< / li >
< li > Fine-tuning requiere infraestructura costosa y datos etiquetados< / li >
< / ul >
< / div >
< div >
< h3 style = "font-size:0.8em; text-align:center; margin-bottom: 10px;" > Errores típicos en español< / h3 >
< table class = "data-table" style = "font-size:0.85em;" >
< thead > < tr > < th > Original< / th > < th > OCR< / th > < th > Error< / th > < / tr > < / thead >
< tbody >
< tr > < td > más< / td > < td > mas< / td > < td > Pérdida de acento< / td > < / tr >
< tr > < td > año< / td > < td > ano< / td > < td > Pérdida de eñe< / td > < / tr >
< tr > < td > ¿Cómo< / td > < td > Como< / td > < td > Signos especiales< / td > < / tr >
< tr > < td > titulación< / td > < td > titulacióon< / td > < td > Duplicación< / td > < / tr >
< / tbody >
< / table >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 03< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 4: PROBLEM STATEMENT ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Planteamiento del Problema< / h2 >
< div class = "highlight-box center-box" style = "font-size:0.78em; margin-bottom: 20px;" >
< em > ¿Es posible mejorar significativamente el rendimiento de modelos OCR preentrenados para documentos en español mediante la optimización sistemática de hiperparámetros, sin requerir fine-tuning?< / em >
< / div >
< div style = "margin-top:10px;" >
< div class = "compare-row header" >
< div > < / div > < div > Fine-tuning completo< / div > < div > Optimización de hiperparámetros< / div >
< / div >
< div class = "compare-row" >
< div class = "label-col" > Datos< / div > < div > Miles de imágenes etiquetadas< / div > < div class = "highlight-col" > Subconjunto de validación< / div >
< / div >
< div class = "compare-row" >
< div class = "label-col" > Hardware< / div > < div > GPU alta memoria (>16 GB)< / div > < div class = "highlight-col" > CPU / GPU consumo< / div >
< / div >
< div class = "compare-row" >
< div class = "label-col" > Tiempo< / div > < div > Días / semanas< / div > < div class = "highlight-col" > Minutos / horas< / div >
< / div >
< div class = "compare-row" >
< div class = "label-col" > Expertise< / div > < div > Alto (ML avanzado)< / div > < div class = "highlight-col" > Bajo-medio< / div >
< / div >
< div class = "compare-row" >
< div class = "label-col" > Riesgo< / div > < div > Sobreajuste, catastrófico< / div > < div class = "highlight-col" > Limitado, reversible< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 04< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 5: OBJECTIVES ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Objetivos< / h2 >
< div class = "highlight-box" style = "margin-bottom: 15px;" >
< strong > Objetivo general:< / strong > Optimizar PaddleOCR para documentos académicos en español alcanzando un < strong > CER < 2%< / strong > sin fine-tuning del modelo base.
< / div >
< ul class = "obj-list" >
< li class = "done fragment fade-up" > < strong > OE1:< / strong > Comparar tres motores OCR open-source (EasyOCR, PaddleOCR, DocTR)< / li >
< li class = "done fragment fade-up" > < strong > OE2:< / strong > Preparar dataset de evaluación de 45 páginas con ground truth< / li >
< li class = "done fragment fade-up" > < strong > OE3:< / strong > Identificar hiperparámetros críticos mediante análisis de correlación< / li >
< li class = "done fragment fade-up" > < strong > OE4:< / strong > Ejecutar 64 trials de optimización con Ray Tune + Optuna< / li >
< li class = "done fragment fade-up" > < strong > OE5:< / strong > Validar la configuración optimizada frente al baseline< / li >
< / ul >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 05< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 6: STATE OF THE ART ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Estado del Arte: Motores OCR< / h2 >
< div class = "three-columns" style = "margin-bottom: 15px;" >
< div class = "engine-card" >
< h3 > EasyOCR< / h3 >
< div class = "developer" > JaidedAI< / div >
< div class = "arch" > CRAFT + CRNN< / div >
< ul >
< li > 80+ idiomas< / li >
< li > Fácil de usar< / li >
< li > Baja configurabilidad< / li >
< / ul >
< / div >
< div class = "engine-card selected" >
< h3 > PaddleOCR< / h3 >
< div class = "developer" > Baidu / PaddlePaddle< / div >
< div class = "arch" > DB + SVTR (PP-OCRv5)< / div >
< ul >
< li > Alta configurabilidad< / li >
< li > Pipeline modular< / li >
< li > Soporte español dedicado< / li >
< / ul >
< / div >
< div class = "engine-card" >
< h3 > DocTR< / h3 >
< div class = "developer" > Mindee< / div >
< div class = "arch" > DB/LinkNet + CRNN/SAR< / div >
< ul >
< li > TF y PyTorch< / li >
< li > Soporte español limitado< / li >
< li > Rápido en inferencia< / li >
< / ul >
< / div >
< / div >
< img src = "figures/figura_1.png" alt = "Pipeline OCR" style = "width: 85%; display: block; margin: 0 auto;" >
< div class = "fig-caption" > Pipeline de un sistema OCR moderno< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 06< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 7: METHODOLOGY ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Metodología: 5 Fases< / h2 >
< img src = "figures/figura_3.png" alt = "Metodología" style = "width: 90%; display: block; margin: 0 auto 15px;" >
< div class = "fig-caption" style = "margin-bottom: 15px;" > Fases de la metodología experimental< / div >
< div style = "display: grid; grid-template-columns: repeat(5, 1fr); gap: 8px; font-size: 0.55em; text-align: center;" >
2026-04-21 20:46:05 +02:00
< div class = "fragment fade-up" > < div class = "agenda-number" style = "width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;" > 1< / div > Preparación del dataset< br > < span style = "color:#999" > PDF → 300 DPI + GT< / span > < / div >
2026-04-19 13:34:48 +02:00
< div class = "fragment fade-up" > < div class = "agenda-number" style = "width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;" > 2< / div > Benchmark comparativo< br > < span style = "color:#999" > 3 motores, CER/WER< / span > < / div >
< div class = "fragment fade-up" > < div class = "agenda-number" style = "width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;" > 3< / div > Espacio de búsqueda< br > < span style = "color:#999" > 7 hiperparámetros< / span > < / div >
< div class = "fragment fade-up" > < div class = "agenda-number" style = "width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;" > 4< / div > Optimización< br > < span style = "color:#999" > 64 trials, TPE< / span > < / div >
< div class = "fragment fade-up" > < div class = "agenda-number" style = "width:28px;height:28px;font-size:0.75em;margin:0 auto 5px;" > 5< / div > Validación< br > < span style = "color:#999" > 45 páginas completas< / span > < / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 07< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 8: ARCHITECTURE ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Arquitectura: Microservicios Docker< / h2 >
< div class = "two-columns-60-40" >
< div >
2026-04-21 20:46:05 +02:00
< img src = "figures/figura_6.png" alt = "Arquitectura" style = "width: 100%; border-radius: 4px;" >
2026-04-19 13:34:48 +02:00
< div class = "fig-caption" > Arquitectura de microservicios para optimización OCR< / div >
< / div >
< div >
< ul class = "compact-list" >
< li > < strong > Contenedor Ray Tune:< / strong > Orquestador de trials (Optuna TPE)< / li >
< li > < strong > Contenedor OCR:< / strong > PaddleOCR con acceso GPU< / li >
< li > < strong > Comunicación:< / strong > REST API (HTTP POST /evaluate)< / li >
< li > < strong > Respuesta:< / strong > JSON {CER, WER, TIME}< / li >
< li > < strong > Docker Compose:< / strong > Despliegue reproducible< / li >
< / ul >
< div class = "highlight-box" style = "font-size:0.65em; margin-top:15px;" >
< strong > Hardware:< / strong > < br >
RTX 3060 Laptop (5.66 GB VRAM)< br >
AMD Ryzen 7 5800H< br >
16 GB DDR4 | Ubuntu 24.04
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 08< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 9: SEARCH SPACE ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Espacio de Búsqueda: 7 Hiperparámetros< / h2 >
< div class = "two-columns-60-40" >
< div >
< table class = "data-table" style = "font-size:0.72em;" >
< thead > < tr > < th > Parámetro< / th > < th > Tipo< / th > < th > Rango< / th > < / tr > < / thead >
< tbody >
< tr > < td > < code > textline_orientation< / code > < / td > < td > < span class = "tag bool" > Booleano< / span > < / td > < td > True / False< / td > < / tr >
< tr > < td > < code > use_doc_orientation_classify< / code > < / td > < td > < span class = "tag bool" > Booleano< / span > < / td > < td > True / False< / td > < / tr >
< tr > < td > < code > use_doc_unwarping< / code > < / td > < td > < span class = "tag bool" > Booleano< / span > < / td > < td > True / False< / td > < / tr >
< tr > < td > < code > text_det_thresh< / code > < / td > < td > < span class = "tag cont" > Continuo< / span > < / td > < td > [0.01, 0.50]< / td > < / tr >
< tr > < td > < code > text_det_box_thresh< / code > < / td > < td > < span class = "tag cont" > Continuo< / span > < / td > < td > [0.01, 0.90]< / td > < / tr >
< tr > < td > < code > text_rec_score_thresh< / code > < / td > < td > < span class = "tag cont" > Continuo< / span > < / td > < td > [0.01, 0.99]< / td > < / tr >
< tr style = "opacity:0.5" > < td > < code > text_det_unclip_ratio< / code > < / td > < td > < span class = "tag fixed" > Fijo< / span > < / td > < td > 0.0< / td > < / tr >
< / tbody >
< / table >
< / div >
< div >
2026-04-21 20:46:05 +02:00
< img src = "figures/figura_2.png" alt = "Ray Tune Cycle" style = "width: 100%; border-radius: 4px;" >
2026-04-19 13:34:48 +02:00
< div class = "fig-caption" > Ciclo de optimización con Ray Tune y Optuna< / div >
< div class = "highlight-box" style = "font-size:0.62em; margin-top:10px;" >
< strong > Algoritmo:< / strong > TPE (Tree-structured Parzen Estimator)< br >
< strong > Trials:< / strong > 64 | < strong > Concurrencia:< / strong > 2 workers< br >
< strong > Métrica:< / strong > Minimizar CER
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 09< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 10: BENCHMARK ====== -->
< section data-chart = "benchmark" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Resultados: Benchmark Comparativo< / h2 >
2026-04-21 20:46:05 +02:00
< div class = "two-columns-60-40" >
2026-04-19 13:34:48 +02:00
< div class = "chart-container" >
< canvas id = "chartBenchmark" > < / canvas >
< / div >
< div >
2026-04-21 20:46:05 +02:00
< table class = "data-table" style = "font-size:0.55em;" >
< thead > < tr > < th > Motor< / th > < th > Base< / th > < th > HPO< / th > < th > Mejora< / th > < / tr > < / thead >
2026-04-19 13:34:48 +02:00
< tbody >
2026-04-21 20:46:05 +02:00
< tr > < td style = "color:#E8832A" > < strong > EasyOCR< / strong > < / td > < td > 11.23%< / td > < td > 5.84%< / td > < td > -48%< / td > < / tr >
< tr class = "highlight" > < td style = "color:#0098CD" > < strong > PaddleOCR< / strong > < / td > < td > < strong > 7.76%< / strong > < / td > < td > < strong > 0.79%< / strong > < / td > < td > < strong > -90%< / strong > < / td > < / tr >
< tr > < td style = "color:#2EAD4B" > < strong > DocTR< / strong > < / td > < td > 12.06%< / td > < td > 7.43%< / td > < td > -38%< / td > < / tr >
2026-04-19 13:34:48 +02:00
< / tbody >
< / table >
2026-04-21 20:46:05 +02:00
< div class = "highlight-box" style = "font-size:0.55em; margin-top:8px; padding:10px 15px;" >
Solo < strong > PaddleOCR< / strong > alcanza CER< 2% (43/64 trials). Mejora del < strong > 89.8%< / strong > .
2026-04-19 13:34:48 +02:00
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 10< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 11: 64 TRIALS ====== -->
< section data-chart = "trials" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Resultados: 64 Trials de Optimización< / h2 >
< div class = "two-columns" >
< div class = "chart-container" >
< canvas id = "chartTrials" > < / canvas >
< / div >
< div >
2026-04-21 20:46:05 +02:00
< div class = "metric-cards" style = "flex-direction: column; gap: 6px;" >
< div class = "metric-card" style = "width:100%; padding:8px 15px;" >
< div class = "number success" style = "font-size:1.4em;" > 0.79%< / div >
2026-04-19 13:34:48 +02:00
< div class = "label" > Mejor CER (Trial #1)< / div >
< / div >
2026-04-21 20:46:05 +02:00
< div class = "metric-card" style = "width:100%; padding:8px 15px;" >
< div class = "number" style = "font-size:1.4em;" > 0.87%< / div >
2026-04-19 13:34:48 +02:00
< div class = "label" > Mediana CER< / div >
< / div >
2026-04-21 20:46:05 +02:00
< div class = "metric-card" style = "width:100%; padding:8px 15px;" >
< div class = "number warning" style = "font-size:1.4em;" > 7.30%< / div >
2026-04-19 13:34:48 +02:00
< div class = "label" > Peor CER< / div >
< / div >
2026-04-21 20:46:05 +02:00
< div class = "metric-card" style = "width:100%; padding:8px 15px;" >
< div class = "number" style = "font-size:1.4em;" > 67.2%< / div >
2026-04-19 13:34:48 +02:00
< div class = "label" > Trials con CER < 2%< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< div class = "highlight-box" style = "font-size:0.55em; margin-top:6px; padding:8px 12px;" >
< strong > 0 fallos< / strong > en 64 trials | Tiempo total: < strong > ~5 min< / strong > (GPU)
2026-04-19 13:34:48 +02:00
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 11< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 12: KEY FINDING ====== -->
< section data-chart = "textline" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Hallazgo Clave: < code > textline_orientation< / code > < / h2 >
< div class = "two-columns" >
< div class = "chart-container" >
< canvas id = "chartTextline" > < / canvas >
< / div >
< div >
< div class = "metric-card" style = "width:100%; margin-bottom: 15px;" >
< div class = "number success" style = "font-size: 2em;" > -63.2%< / div >
< div class = "label" style = "font-size:0.75em;" > Reducción en CER< / div >
< / div >
< ul class = "compact-list" >
< li > Un < strong > único parámetro booleano< / strong > tiene mayor impacto que todos los umbrales numéricos combinados< / li >
< li > < strong > Decisiones arquitecturales< / strong > > ajustes numéricos finos< / li >
< li > Crítico para documentos con < strong > layouts complejos< / strong > (índices, listas, encabezados)< / li >
< li > 52 de 64 trials (81%) lo activaron automáticamente (Optuna aprendió rápido)< / li >
< / ul >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 12< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 13: CORRELATIONS ====== -->
< section data-chart = "correlations" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Análisis de Hiperparámetros< / h2 >
< div class = "two-columns" >
< div >
< h3 style = "font-size:0.75em; text-align:center;" > Correlación Pearson con CER< / h3 >
< div class = "chart-container" >
< canvas id = "chartCorrelation" > < / canvas >
< / div >
< / div >
< div >
< h3 style = "font-size:0.75em; text-align:center;" > Importancia de Hiperparámetros< / h3 >
< div class = "chart-container" >
< canvas id = "chartImportance" > < / canvas >
< / div >
< / div >
< / div >
< div class = "highlight-box" style = "font-size:0.62em; margin-top:10px;" >
< strong > Insight:< / strong > < code > use_doc_unwarping< / code > (+0.88) es perjudicial en PDFs digitales (añade procesamiento innecesario). Los parámetros booleanos (arquitecturales) dominan sobre los umbrales numéricos.
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 13< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 14: VALIDATION ====== -->
< section data-chart = "validation" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Validación: Baseline vs Optimizado< / h2 >
< div class = "two-columns" >
< div class = "chart-container" >
< canvas id = "chartValidation" > < / canvas >
< / div >
< div >
< table class = "data-table" style = "font-size:0.7em;" >
< thead > < tr > < th > Métrica< / th > < th > Baseline< / th > < th > Optimizado< / th > < th > Mejora< / th > < / tr > < / thead >
< tbody >
< tr > < td > CER (45 pág)< / td > < td > 8.85%< / td > < td > 7.72%< / td > < td class = "improvement" > -12.8%< / td > < / tr >
< tr > < td > WER (45 pág)< / td > < td > 13.05%< / td > < td > 11.40%< / td > < td class = "improvement" > -12.6%< / td > < / tr >
< tr class = "highlight" > < td > CER (mejor trial, 5 pág)< / td > < td > 7.76%< / td > < td > 0.79%< / td > < td class = "improvement" > -89.8%< / td > < / tr >
< / tbody >
< / table >
< div class = "highlight-box" style = "font-size:0.62em; margin-top:15px;" >
< strong > Nota:< / strong > La diferencia entre el mejor trial (0.79%) y la validación completa (7.72%) evidencia < strong > sobreajuste< / strong > al subconjunto de 5 páginas usado en la optimización. Un subconjunto más amplio (15-20 páginas) mejoraría la generalización.
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 14< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 15: GPU ACCELERATION ====== -->
< section data-chart = "gpu" >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Aceleración GPU< / h2 >
< div class = "two-columns" >
< div class = "chart-container" >
< canvas id = "chartGPU" > < / canvas >
< / div >
< div >
< div class = "metric-cards" style = "flex-direction: column; gap: 12px;" >
< div class = "metric-card" style = "width:100%;" >
< div class = "number" style = "font-size: 2.2em; color: var(--unir-blue);" > 82x< / div >
< div class = "label" style = "font-size:0.8em;" > Factor de aceleración< / div >
< / div >
< div class = "metric-card" style = "width:100%;" >
< div class = "number" > 0.84 s< / div >
< div class = "label" > GPU: segundos por página< / div >
< / div >
< div class = "metric-card" style = "width:100%;" >
< div class = "number warning" > 69.4 s< / div >
< div class = "label" > CPU: segundos por página< / div >
< / div >
< / div >
< div class = "highlight-box" style = "font-size:0.62em; margin-top:12px;" >
2026-04-21 20:46:05 +02:00
64 trials × 5 páginas:< br >
2026-04-19 13:34:48 +02:00
< strong > CPU:< / strong > ~6.2 horas< br >
< strong > GPU:< / strong > ~5 minutos
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 15< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 16: OPTIMAL CONFIG ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Configuración Óptima< / h2 >
< div class = "two-columns" >
< div >
2026-04-21 20:46:05 +02:00
< div style = "background: #1e1e1e; color: #d4d4d4; border-radius: 4px; padding: 20px; font-family: Consolas, monospace; font-size: 0.6em; line-height: 1.7;" >
2026-04-19 13:34:48 +02:00
< span style = "color:#569cd6" > config_optimizada< / span > = {< br >
< span style = "color:#9cdcfe" > "textline_orientation"< / span > : < span style = "color:#4ec9b0" > True< / span > , < span style = "color:#6a9955" > # CRÍTICO< / span > < br >
< span style = "color:#9cdcfe" > "use_doc_orientation_classify"< / span > : < span style = "color:#4ec9b0" > True< / span > ,< br >
< span style = "color:#9cdcfe" > "use_doc_unwarping"< / span > : < span style = "color:#4ec9b0" > False< / span > , < span style = "color:#6a9955" > # Innecesario< / span > < br >
< span style = "color:#9cdcfe" > "text_det_thresh"< / span > : < span style = "color:#b5cea8" > 0.0462< / span > ,< br >
< span style = "color:#9cdcfe" > "text_det_box_thresh"< / span > : < span style = "color:#b5cea8" > 0.4862< / span > ,< br >
< span style = "color:#9cdcfe" > "text_det_unclip_ratio"< / span > : < span style = "color:#b5cea8" > 0.0< / span > ,< br >
< span style = "color:#9cdcfe" > "text_rec_score_thresh"< / span > : < span style = "color:#b5cea8" > 0.5658< / span > ,< br >
}
< / div >
< / div >
< div >
< h3 style = "font-size:0.8em;" > Insights clave< / h3 >
< ul class = "compact-list" >
< li class = "fragment fade-up" > < strong > < code > textline_orientation = True< / code > < / strong > : Parámetro más impactante (-63.2% CER)< / li >
< li class = "fragment fade-up" > < strong > < code > use_doc_unwarping = False< / code > < / strong > : Procesamiento innecesario para PDFs digitales< / li >
< li class = "fragment fade-up" > < strong > < code > text_det_thresh< / code > bajo< / strong > : Captura más regiones de texto, reduce omisiones< / li >
< li class = "fragment fade-up" > < strong > Parámetros booleanos< / strong > dominan sobre umbrales numéricos< / li >
< / ul >
< div class = "highlight-box" style = "font-size: 0.62em; margin-top:12px;" >
Esta configuración es directamente aplicable a otros documentos académicos en español con layouts similares.
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 16< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 17: CONCLUSIONS ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Conclusiones< / h2 >
< div class = "two-columns" >
< div >
< h3 style = "font-size:0.78em; margin-bottom:10px;" > Contribuciones< / h3 >
< div class = "contribution-item fragment fade-up" >
< div class = "contribution-icon" > 1< / div >
< div > < strong > Metodología reproducible< / strong > para optimización de hiperparámetros OCR con código abierto< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "contribution-icon" > 2< / div >
< div > < strong > Análisis sistemático< / strong > de hiperparámetros PaddleOCR con correlaciones Pearson< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "contribution-icon" > 3< / div >
< div > < strong > Configuración validada< / strong > para documentos académicos en español (CER 0.79%)< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "contribution-icon" > 4< / div >
< div > < strong > Infraestructura dockerizada< / strong > reproducible con imágenes públicas< / div >
< / div >
< / div >
< div >
< h3 style = "font-size:0.78em; margin-bottom:10px;" > Limitaciones< / h3 >
< div class = "contribution-item fragment fade-up" >
< div class = "limitation-icon" > !< / div >
< div > Un único tipo de documento (académico UNIR)< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "limitation-icon" > !< / div >
< div > Corpus modesto (45 páginas)< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "limitation-icon" > !< / div >
< div > Sobreajuste al subconjunto de optimización (5 páginas)< / div >
< / div >
< div class = "contribution-item fragment fade-up" >
< div class = "limitation-icon" > !< / div >
< div > < code > text_det_unclip_ratio< / code > no explorado< / div >
< / div >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 17< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 18: FUTURE WORK ====== -->
< section >
2026-04-21 20:46:05 +02:00
< div class = "slide-body" >
2026-04-19 13:34:48 +02:00
< h2 > Líneas de Trabajo Futuro< / h2 >
< div class = "three-columns" style = "font-size:0.72em;" >
< div >
< h3 style = "font-size:0.95em;" > Extensiones inmediatas< / h3 >
< ul class = "compact-list" >
< li > Validación cruzada en otros tipos de documentos (facturas, formularios, manuscritos)< / li >
< li > Subconjunto de optimización más amplio (15-20 páginas)< / li >
< li > Exploración de < code > text_det_unclip_ratio< / code > < / li >
< / ul >
< / div >
< div >
< h3 style = "font-size:0.95em;" > Líneas de investigación< / h3 >
< ul class = "compact-list" >
< li > Transfer learning de hiperparámetros entre dominios< / li >
< li > Optimización multi-objetivo (CER + WER + velocidad)< / li >
< li > Comparación rigurosa HPO vs fine-tuning< / li >
< / ul >
< / div >
< div >
< h3 style = "font-size:0.95em;" > Aplicaciones prácticas< / h3 >
< ul class = "compact-list" >
< li > Herramienta de configuración automática por tipo de documento< / li >
< li > Integración en pipelines de producción< / li >
< li > Benchmark público de OCR en español< / li >
< / ul >
< / div >
< / div >
2026-04-21 20:46:05 +02:00
< / div >
< div class = "slide-footer" >
< img src = "figures/unir_logo.png" class = "footer-logo" alt = "UNIR" >
< div class = "footer-page" > < span class = "pag-label" > pág.< / span > < br > < span class = "pag-number" > 18< / span > < / div >
< / div >
2026-04-19 13:34:48 +02:00
< / section >
<!-- ====== SLIDE 19: THANK YOU ====== -->
2026-04-21 20:46:05 +02:00
< section class = "thanks-slide" data-transition = "fade" >
< div class = "thanks-content" >
< div class = "corner-tl" > < / div >
< div class = "corner-br" > < / div >
< h1 > muchas< br > gracias.< / h1 >
< / div >
< div class = "title-footer" >
< img src = "figures/unir_logo.png" alt = "UNIR" >
2026-04-19 13:34:48 +02:00
< / div >
< / section >
< / div > <!-- .slides -->
< / div > <!-- .reveal -->
< script src = "https://cdn.jsdelivr.net/npm/reveal.js@5.0.4/dist/reveal.min.js" > < / script >
< script >
// Chart instances
const charts = {};
// UNIR Colors
const BLUE = '#0098CD';
const BLUE_DARK = '#007AA3';
const LIGHT = '#E6F4F9';
const RED = '#E8654A';
2026-04-21 20:46:05 +02:00
const ORANGE = '#E8832A';
2026-04-19 13:34:48 +02:00
const GREEN = '#2EAD4B';
const GRAY = '#CCCCCC';
// Common chart options
2026-04-21 20:46:05 +02:00
const isPrint = window.location.search.includes('print-pdf');
2026-04-19 13:34:48 +02:00
const commonOptions = {
responsive: true,
2026-04-21 20:46:05 +02:00
maintainAspectRatio: false,
animation: isPrint ? false : { duration: 1200, easing: 'easeOutQuart' },
2026-04-19 13:34:48 +02:00
plugins: { legend: { display: false } }
};
function createBenchmarkChart() {
const ctx = document.getElementById('chartBenchmark');
if (!ctx || charts.benchmark) return;
charts.benchmark = new Chart(ctx, {
type: 'bar',
data: {
labels: ['EasyOCR', 'PaddleOCR', 'DocTR'],
datasets: [
{
label: 'CER (%)',
data: [11.23, 7.76, 12.06],
2026-04-21 20:46:05 +02:00
backgroundColor: [ORANGE, BLUE, GREEN],
borderColor: ['#CC6B1A', BLUE_DARK, '#1E8A3C'],
2026-04-19 13:34:48 +02:00
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 4,
2026-04-19 13:34:48 +02:00
barPercentage: 0.6
},
{
label: 'WER (%)',
data: [36.36, 11.62, 42.01],
2026-04-21 20:46:05 +02:00
backgroundColor: ['rgba(232,131,42,0.35)', 'rgba(0,152,205,0.35)', 'rgba(46,173,75,0.35)'],
borderColor: [ORANGE, BLUE, GREEN],
2026-04-19 13:34:48 +02:00
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 4,
2026-04-19 13:34:48 +02:00
barPercentage: 0.6
}
]
},
options: {
...commonOptions,
indexAxis: 'y',
plugins: {
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 12 } } }
},
scales: {
x: { title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } }, grid: { color: '#f0f0f0' } },
y: { grid: { display: false }, ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } } }
}
}
});
}
function createTrialsChart() {
const ctx = document.getElementById('chartTrials');
if (!ctx || charts.trials) return;
charts.trials = new Chart(ctx, {
type: 'doughnut',
data: {
labels: ['CER < 1 % ' , ' CER 1-2 % ' , ' CER 2-5 % ' , ' CER 5-10 % ' ] ,
datasets: [{
data: [15, 28, 10, 11],
backgroundColor: [BLUE_DARK, BLUE, '#7EC8E3', GRAY],
borderColor: 'white',
borderWidth: 3,
hoverOffset: 8
}]
},
options: {
...commonOptions,
cutout: '55%',
plugins: {
legend: {
display: true,
position: 'bottom',
labels: { font: { family: 'Calibri', size: 12 }, padding: 15, usePointStyle: true, pointStyle: 'rectRounded' }
}
}
}
});
}
function createTextlineChart() {
const ctx = document.getElementById('chartTextline');
if (!ctx || charts.textline) return;
charts.textline = new Chart(ctx, {
type: 'bar',
data: {
labels: ['False', 'True'],
datasets: [{
label: 'CER medio (%)',
data: [4.73, 1.74],
backgroundColor: [GRAY, BLUE],
borderColor: ['#aaa', BLUE_DARK],
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 6,
2026-04-19 13:34:48 +02:00
barPercentage: 0.5
}]
},
options: {
...commonOptions,
scales: {
y: {
beginAtZero: true,
max: 6,
title: { display: true, text: 'CER (%)', font: { family: 'Calibri', size: 13 } },
grid: { color: '#f0f0f0' }
},
x: {
title: { display: true, text: 'textline_orientation', font: { family: 'Calibri', size: 13, weight: 'bold' } },
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 16, weight: 'bold' } }
}
},
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: (ctx) => `CER: ${ctx.parsed.y}%`
}
}
}
}
});
}
function createCorrelationChart() {
const ctx = document.getElementById('chartCorrelation');
if (!ctx || charts.correlation) return;
const params = [
'use_doc_unwarping',
'text_det_thresh',
'text_det_box_thresh',
'text_rec_score_thresh',
'textline_orientation',
'use_doc_orient_classify'
];
const values = [0.879, 0.428, 0.311, -0.268, -0.535, -0.712];
const colors = values.map(v => v > 0 ? RED : BLUE);
charts.correlation = new Chart(ctx, {
type: 'bar',
data: {
labels: params,
datasets: [{
data: values,
backgroundColor: colors,
borderColor: colors.map(c => c === RED ? '#C04030' : BLUE_DARK),
borderWidth: 1.5,
borderRadius: 4,
barPercentage: 0.65
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
min: -1, max: 1,
title: { display: true, text: 'Correlación Pearson', font: { family: 'Calibri', size: 11 } },
grid: { color: (ctx) => ctx.tick.value === 0 ? '#666' : '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
}
}
}
});
}
function createImportanceChart() {
const ctx = document.getElementById('chartImportance');
if (!ctx || charts.importance) return;
const params = [
'use_doc_unwarping',
'use_doc_orient_classify',
'textline_orientation',
'text_det_thresh',
'text_det_box_thresh',
'text_rec_score_thresh'
];
const values = [0.879, 0.712, 0.535, 0.428, 0.311, 0.268];
const colors = values.map((_, i) => {
const alpha = 1 - (i * 0.12);
return `rgba(0, 152, 205, ${alpha})`;
});
charts.importance = new Chart(ctx, {
type: 'bar',
data: {
labels: params,
datasets: [{
data: values,
backgroundColor: colors,
borderColor: BLUE_DARK,
borderWidth: 1,
borderRadius: 4,
barPercentage: 0.65
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
beginAtZero: true, max: 1,
title: { display: true, text: '|Correlación|', font: { family: 'Calibri', size: 11 } },
grid: { color: '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Consolas, monospace', size: 10 } }
}
}
}
});
}
function createValidationChart() {
const ctx = document.getElementById('chartValidation');
if (!ctx || charts.validation) return;
charts.validation = new Chart(ctx, {
type: 'bar',
data: {
labels: ['CER (45 pág)', 'WER (45 pág)', 'CER (mejor trial)'],
datasets: [
{
label: 'Baseline',
data: [8.85, 13.05, 7.76],
backgroundColor: 'rgba(204,204,204,0.7)',
borderColor: '#aaa',
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 4,
2026-04-19 13:34:48 +02:00
barPercentage: 0.7
},
{
label: 'Optimizado',
data: [7.72, 11.40, 0.79],
backgroundColor: BLUE,
borderColor: BLUE_DARK,
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 4,
2026-04-19 13:34:48 +02:00
barPercentage: 0.7
}
]
},
options: {
...commonOptions,
plugins: {
legend: { display: true, position: 'top', labels: { font: { family: 'Calibri', size: 13 } } }
},
scales: {
y: {
beginAtZero: true,
title: { display: true, text: 'Error Rate (%)', font: { family: 'Calibri' } },
grid: { color: '#f0f0f0' }
},
x: {
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 12 } }
}
}
}
});
}
function createGPUChart() {
const ctx = document.getElementById('chartGPU');
if (!ctx || charts.gpu) return;
charts.gpu = new Chart(ctx, {
type: 'bar',
data: {
labels: ['CPU (Ryzen 7 5800H)', 'GPU (RTX 3060)'],
datasets: [{
label: 'Segundos por página',
data: [69.4, 0.84],
backgroundColor: [ORANGE, BLUE],
borderColor: ['#CC8020', BLUE_DARK],
borderWidth: 2,
2026-04-21 20:46:05 +02:00
borderRadius: 6,
2026-04-19 13:34:48 +02:00
barPercentage: 0.5
}]
},
options: {
...commonOptions,
indexAxis: 'y',
scales: {
x: {
beginAtZero: true,
title: { display: true, text: 'Segundos por página', font: { family: 'Calibri', size: 13 } },
grid: { color: '#f0f0f0' }
},
y: {
grid: { display: false },
ticks: { font: { family: 'Calibri', size: 14, weight: 'bold' } }
}
},
plugins: {
legend: { display: false },
tooltip: {
callbacks: {
label: (ctx) => `${ctx.parsed.x} s/página`
}
}
}
}
});
}
// Chart creation map
const chartCreators = {
benchmark: createBenchmarkChart,
trials: createTrialsChart,
textline: createTextlineChart,
correlations: () => { createCorrelationChart(); createImportanceChart(); },
validation: createValidationChart,
gpu: createGPUChart
};
// Initialize Reveal.js
Reveal.initialize({
hash: true,
2026-04-21 20:46:05 +02:00
slideNumber: false,
2026-04-19 13:34:48 +02:00
transition: 'slide',
transitionSpeed: 'default',
width: 1280,
height: 720,
2026-04-21 20:46:05 +02:00
margin: 0,
2026-04-19 13:34:48 +02:00
center: false,
2026-04-21 20:46:05 +02:00
controlsTutorial: false,
pdfSeparateFragments: false
2026-04-19 13:34:48 +02:00
});
// Create charts on slide change
Reveal.on('slidechanged', (event) => {
const chartType = event.currentSlide.dataset.chart;
if (chartType & & chartCreators[chartType]) {
setTimeout(() => chartCreators[chartType](), 100);
}
});
// Also check initial slide
Reveal.on('ready', (event) => {
2026-04-21 20:46:05 +02:00
// In print-pdf mode, create ALL charts immediately
if (window.location.search.includes('print-pdf')) {
// Create all charts synchronously for print
Object.values(chartCreators).forEach(fn => fn());
} else {
const chartType = event.currentSlide.dataset.chart;
if (chartType & & chartCreators[chartType]) {
setTimeout(() => chartCreators[chartType](), 300);
}
2026-04-19 13:34:48 +02:00
}
});
2026-04-21 20:46:05 +02:00
// Fallback: also try creating charts after DOM is fully loaded
if (window.location.search.includes('print-pdf')) {
window.addEventListener('load', () => {
Object.values(chartCreators).forEach(fn => fn());
});
}
2026-04-19 13:34:48 +02:00
< / script >
< / body >
< / html >