Leyenda

2026-02-04 19:56:30 +01:00
parent d746a3c73f
commit 868f748a8d
4 changed files with 617 additions and 357 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -4,23 +4,25 @@
 import re
 import os
 import shutil
-from bs4 import BeautifulSoup, NavigableString
+from bs4 import BeautifulSoup, NavigableString
-from latex2mathml.converter import convert as latex_to_mathml
+from latex2mathml.converter import convert as latex_to_mathml
-from PIL import Image
+from PIL import Image
-
+
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
+TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
-TEMPLATE_OUTPUT = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm')
+TEMPLATE_OUTPUT = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm')
-DOCS_DIR = os.path.join(BASE_DIR, 'docs')
+DOCS_DIR = os.path.join(BASE_DIR, 'docs')
-
+
-# Accept Fuente/Source lines with or without markdown bold
+# Accept Fuente/Source lines with or without markdown bold
-SOURCE_LINE_RE = re.compile(r'^\s*(?:\*{1,2})?(Fuente|Source):(?:\*{1,2})?\s*(.*)$', re.IGNORECASE)
+SOURCE_LINE_RE = re.compile(r'^\s*(?:\*{1,2})?(Fuente|Source):(?:\*{1,2})?\s*(.*)$', re.IGNORECASE)
-
+# Accept Leyenda lines with or without markdown bold
-# Global counters for tables and figures
+LEYENDA_LINE_RE = re.compile(r'^\s*(?:\*{1,2})?Leyenda:(?:\*{1,2})?\s*(.*)$', re.IGNORECASE)
-table_counter = 0
+
-figure_counter = 0
+# Global counters for tables and figures
-anexo_table_counter = 0
+table_counter = 0
-anexo_figure_counter = 0
+figure_counter = 0
 anexo_table_counter = 0
 anexo_figure_counter = 0
 # Global sequential counter for figure filenames (figura_1.png, figura_2.png, etc.)
 global_figure_index = 0
@@ -48,7 +50,7 @@ def md_to_html_para(text):
    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
    return text
-def convert_latex_formulas(text):
+def convert_latex_formulas(text):
    """Convert LaTeX formulas to MathML for Word compatibility."""
    # Block formulas $$...$$
    def convert_block(match):
@@ -69,22 +71,33 @@ def convert_latex_formulas(text):
        except:
            return match.group(0)
-    text = re.sub(r'\$([^$]+)\$', convert_inline, text)
+    text = re.sub(r'\$([^$]+)\$', convert_inline, text)
-    return text
+    return text
-
+
-def extract_source_from_line(line):
+def extract_source_from_line(line):
-    """Return source text if line is a Fuente/Source line, otherwise None."""
+    """Return source text if line is a Fuente/Source line, otherwise None."""
-    match = SOURCE_LINE_RE.match(line.strip())
+    match = SOURCE_LINE_RE.match(line.strip())
-    if not match:
+    if not match:
-        return None
+        return None
-    return match.group(2).strip()
+    return match.group(2).strip()
-
+
-def is_source_line(line):
+def is_source_line(line):
-    """Check whether a line starts with Fuente:/Source: (optionally bold)."""
+    """Check whether a line starts with Fuente:/Source: (optionally bold)."""
-    return SOURCE_LINE_RE.match(line.strip()) is not None
+    return SOURCE_LINE_RE.match(line.strip()) is not None
-
+
-def extract_table_title(lines, current_index):
+def extract_leyenda_from_line(line):
-    """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
+    """Return leyenda text if line is a Leyenda line, otherwise None."""
    match = LEYENDA_LINE_RE.match(line.strip())
    if not match:
        return None
    return match.group(1).strip()
 def is_leyenda_line(line):
    """Check whether a line starts with Leyenda: (optionally bold)."""
    return LEYENDA_LINE_RE.match(line.strip()) is not None
 def extract_table_title(lines, current_index):
    """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
    # Check previous non-empty lines for table title
    for i in range(current_index - 1, max(0, current_index - 5), -1):
        line = lines[i].strip()
@@ -172,8 +185,11 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            bookmark_id = f"_Ref_Fig{fig_num}"
            # mso-pagination:keep-with-next ensures caption stays with figure image (correct MSO property)
            # For Anexo figures, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
            # Add TC field so Anexo figures appear in Table of Figures index
            # Use \f c to match the TOC field identifier in the template
            if is_anexo:
-                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura {fig_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
+                tc_field = f'''<!--[if supportFields]><span style='mso-element:field-begin'></span> TC "Figura {fig_num}. {fig_title}" \\f c \\l 1 <span style='mso-element:field-end'></span><![endif]-->'''
                html_blocks.append(f'''<a name="{bookmark_id}"></a>{tc_field}<p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura {fig_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
            else:
                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{fig_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
@@ -204,19 +220,27 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            # Check if next non-empty line has custom Fuente
            custom_source = None
            fig_leyenda = None
            lookahead = i + 1
            while lookahead < len(lines) and not lines[lookahead].strip():
                lookahead += 1
-            if lookahead < len(lines):
+            if lookahead < len(lines):
-                next_line = lines[lookahead].strip()
+                next_line = lines[lookahead].strip()
-                if is_source_line(next_line):
+                if is_source_line(next_line):
-                    # Extract custom source, removing markdown formatting
+                    # Extract custom source, removing markdown formatting
-                    custom_source = extract_source_from_line(next_line)
+                    custom_source = extract_source_from_line(next_line)
-                    # Ensure it ends with a period
+                    # Ensure it ends with a period
-                    if custom_source and not custom_source.endswith('.'):
+                    if custom_source and not custom_source.endswith('.'):
-                        custom_source += '.'
+                        custom_source += '.'
-                    # Skip this line by advancing i past it
+                    # Skip this line by advancing i past it
-                    i = lookahead
+                    i = lookahead
                    # Check for Leyenda after source
                    leyenda_idx = i + 1
                    while leyenda_idx < len(lines) and not lines[leyenda_idx].strip():
                        leyenda_idx += 1
                    if leyenda_idx < len(lines) and is_leyenda_line(lines[leyenda_idx]):
                        fig_leyenda = extract_leyenda_from_line(lines[leyenda_idx])
                        i = leyenda_idx
            if custom_source:
                source_html = md_to_html_para(custom_source)
@@ -224,6 +248,13 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            else:
                html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Fuente: Elaboración propia.</span></p>''')
            # Add leyenda if present (same style as Fuente, new line)
            if fig_leyenda:
                leyenda_html = md_to_html_para(fig_leyenda)
                if not fig_leyenda.endswith('.'):
                    leyenda_html += '.'
                html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Leyenda: {leyenda_html}</span></p>''')
            html_blocks.append('<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>')
            i += 1
            continue
@@ -249,7 +280,7 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
        if line.startswith('####'):
            text = line.lstrip('#').strip()
            # Apply consistent styling like h2/h3, disable numbering for h4
-            html_blocks.append(f'<h4 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h4>')
+            html_blocks.append(f'<h4 style="mso-list:none"><b><span lang=ES style="text-transform:none">{text}</span></b></h4>')
            i += 1
            continue
        elif line.startswith('###'):
@@ -314,11 +345,19 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            # Look ahead for source (skip blank lines first)
            source_idx = i
-            while source_idx < len(lines) and not lines[source_idx].strip():
+            table_leyenda = None
-                source_idx += 1
+            while source_idx < len(lines) and not lines[source_idx].strip():
-            if source_idx < len(lines) and is_source_line(lines[source_idx]):
+                source_idx += 1
-                table_source = extract_source_from_line(lines[source_idx])
+            if source_idx < len(lines) and is_source_line(lines[source_idx]):
-                i = source_idx + 1
+                table_source = extract_source_from_line(lines[source_idx])
                i = source_idx + 1
                # Check for Leyenda after source (skip blank lines)
                leyenda_idx = i
                while leyenda_idx < len(lines) and not lines[leyenda_idx].strip():
                    leyenda_idx += 1
                if leyenda_idx < len(lines) and is_leyenda_line(lines[leyenda_idx]):
                    table_leyenda = extract_leyenda_from_line(lines[leyenda_idx])
                    i = leyenda_idx + 1
            # Add table title with MsoCaption class and proper Word SEQ field for cross-reference
            # Format: "Tabla X." in bold, title in italic (per UNIR guidelines)
@@ -334,8 +373,11 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
                clean_title = "Tabla de datos."
            # mso-pagination:keep-with-next ensures caption stays with table (correct MSO property)
            # For Anexo tables, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
            # Add TC field so Anexo tables appear in Table of Tables index
            # Use \f t identifier - template TOC field will be modified to include this
            if is_anexo:
-                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla {table_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
+                tc_field = f'''<!--[if supportFields]><span style='mso-element:field-begin'></span> TC "Tabla {table_num}. {clean_title}" \\f t \\l 1 <span style='mso-element:field-end'></span><![endif]-->'''
                html_blocks.append(f'''<a name="{bookmark_id}"></a>{tc_field}<p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla {table_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
            else:
                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
@@ -363,6 +405,14 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            if not table_source.endswith('.'):
                source_html += '.'
            html_blocks.append(f'<p class=Piedefoto-tabla style="margin-left:0cm"><span lang=ES>Fuente: {source_html}</span></p>')
            # Add leyenda if present (same style as Fuente, new line)
            if table_leyenda:
                leyenda_html = md_to_html_para(table_leyenda)
                if not table_leyenda.endswith('.'):
                    leyenda_html += '.'
                html_blocks.append(f'<p class=Piedefoto-tabla style="margin-left:0cm"><span lang=ES>Leyenda: {leyenda_html}</span></p>')
            html_blocks.append('<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>')
            continue
@@ -376,24 +426,63 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            html_blocks.append(f'<p class=MsoQuote><i><span lang=ES>{md_to_html_para(quote_text)}</span></i></p>')
            continue
-        # Bullet list
+        # Bullet list (handle blank lines between items)
        if re.match(r'^[\-\*\+]\s', line):
-            while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
+            # Collect all bullet items first
-                item_text = lines[i][2:].strip()
+            bullet_items = []
-                item_text = convert_latex_formulas(item_text)
+            while i < len(lines):
-                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
+                # Skip blank lines
-                i += 1
+                while i < len(lines) and not lines[i].strip():
                    i += 1
                # Check if next non-blank line is a bullet item
                if i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
                    item_text = lines[i][2:].strip()
                    item_text = convert_latex_formulas(item_text)
                    bullet_items.append(md_to_html_para(item_text))
                    i += 1
                else:
                    break
            # Output with proper First/Middle/Last classes
            for idx, item in enumerate(bullet_items):
                if len(bullet_items) == 1:
                    cls = 'MsoListParagraph'
                elif idx == 0:
                    cls = 'MsoListParagraphCxSpFirst'
                elif idx == len(bullet_items) - 1:
                    cls = 'MsoListParagraphCxSpLast'
                else:
                    cls = 'MsoListParagraphCxSpMiddle'
                html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{item}</span></p>')
            continue
-        # Numbered list
+        # Numbered list (handle blank lines between items)
        if re.match(r'^\d+\.\s', line):
-            num = 1
+            # Collect all numbered items first
-            while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
+            numbered_items = []
-                item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
+            while i < len(lines):
-                item_text = convert_latex_formulas(item_text)
+                # Skip blank lines
-                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{md_to_html_para(item_text)}</span></p>')
+                while i < len(lines) and not lines[i].strip():
-                num += 1
+                    i += 1
-                i += 1
+                # Check if next non-blank line is a numbered item
                if i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
                    item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
                    item_text = convert_latex_formulas(item_text)
                    numbered_items.append(md_to_html_para(item_text))
                    i += 1
                else:
                    break
            # Output with proper First/Middle/Last classes
            for idx, item in enumerate(numbered_items):
                num = idx + 1
                if len(numbered_items) == 1:
                    cls = 'MsoListParagraph'
                elif idx == 0:
                    cls = 'MsoListParagraphCxSpFirst'
                elif idx == len(numbered_items) - 1:
                    cls = 'MsoListParagraphCxSpLast'
                else:
                    cls = 'MsoListParagraphCxSpMiddle'
                html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{item}</span></p>')
            continue
        # Skip lines that are just table/figure titles (they'll be handled with the table/figure)
@@ -403,9 +492,12 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
        if line.strip().startswith('**Figura') or line.strip().startswith('*Figura'):
            i += 1
            continue
-        if is_source_line(line):
+        if is_source_line(line):
-            i += 1
+            i += 1
-            continue
+            continue
        if is_leyenda_line(line):
            i += 1
            continue
        # Regular paragraph
        para_lines = [line]
@@ -523,6 +615,17 @@ def main():
    print("Reading template...")
    html_content = read_file(TEMPLATE_INPUT)
    # Modify the Table of Tables TOC field to include TC entries with \f t identifier
    # Original: TOC \h \z \t "Tablas;1" \c "Tabla"
    # Modified: TOC \f t \h \z \t "Tablas;1" \c "Tabla"
    # Use regex to handle whitespace/HTML variations in the TOC field
    html_content = re.sub(
        r'(TOC\s+)(\\h\s+\\z\s+\\t\s*\n?\s*&quot;Tablas;1&quot;)',
        r'\1\\f t \2',
        html_content
    )
    soup = BeautifulSoup(html_content, 'html.parser')
    print("Reading docs content...")
@@ -671,10 +774,10 @@ def main():
            # Also remove surrounding caption and source
            prev_sib = table.find_previous_sibling()
            next_sib = table.find_next_sibling()
-            if prev_sib and 'Tabla 1. Ejemplo' in prev_sib.get_text():
+            if prev_sib and 'Tabla 1. Ejemplo' in prev_sib.get_text():
-                prev_sib.decompose()
+                prev_sib.decompose()
-            if next_sib and SOURCE_LINE_RE.search(next_sib.get_text().strip()):
+            if next_sib and SOURCE_LINE_RE.search(next_sib.get_text().strip()):
-                next_sib.decompose()
+                next_sib.decompose()
            table.decompose()
            print("    ✓ Removed template table example")
            break
--- a/docs/00_resumen.md
+++ b/docs/00_resumen.md
@@ -6,7 +6,7 @@ Se realizó un estudio comparativo de tres soluciones OCR de código abierto: Ea
 Los resultados demuestran que la optimización de hiperparámetros logró mejoras significativas: el mejor trial individual alcanzó un CER de 0.79% (precisión del 99.21%), cumpliendo el objetivo de CER < 2%. Al validar la configuración optimizada sobre el dataset completo de 45 páginas, se obtuvo una mejora del 12.8% en CER (de 8.85% a 7.72%). El hallazgo más relevante fue que el parámetro `textline_orientation` (clasificación de orientación de línea de texto) tiene un impacto crítico en el rendimiento. Adicionalmente, se identificó que el umbral de detección (`text_det_thresh`) presenta una correlación positiva moderada (0.43) con el error, lo que indica que valores más bajos tienden a mejorar el rendimiento.
-**Fuente:** [`docs/metrics/metrics_paddle.md`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/docs/metrics/metrics_paddle.md), [`src/results/correlations/paddle_correlations.csv`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/src/results/correlations/paddle_correlations.csv).
+**Fuente:** [`metrics_paddle.md`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/docs/metrics/metrics_paddle.md), [`paddle_correlations.csv`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/src/results/correlations/paddle_correlations.csv).
 Este trabajo demuestra que la optimización de hiperparámetros es una alternativa viable al fine-tuning, especialmente útil cuando se dispone de modelos preentrenados para el idioma objetivo. La infraestructura dockerizada desarrollada permite reproducir los experimentos y facilita la evaluación sistemática de configuraciones OCR.
@@ -22,7 +22,7 @@ A comparative study of three open-source OCR solutions was conducted with EasyOC
 Results demonstrate that hyperparameter optimization achieved significant improvements. The best individual trial reached a CER of 0.79% (99.21% accuracy), meeting the CER < 2% objective. When validating the optimized configuration on the full 45-page dataset, a 12.8% CER improvement was obtained (from 8.85% to 7.72%). The most relevant finding was that the `textline_orientation` parameter (text line orientation classification) has a critical impact on performance. Additionally, the detection threshold (`text_det_thresh`) showed a moderate positive correlation (0.43) with error, indicating that lower values tend to improve performance.
-Sources: [`docs/metrics/metrics_paddle.md`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/docs/metrics/metrics_paddle.md), [`src/results/correlations/paddle_correlations.csv`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/src/results/correlations/paddle_correlations.csv).
+Sources: [`metrics_paddle.md`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/docs/metrics/metrics_paddle.md), [`paddle_correlations.csv`](https://seryus.ddns.net/unir/MastersThesis/src/branch/main/src/results/correlations/paddle_correlations.csv).
 This work demonstrates that hyperparameter optimization is a viable alternative to fine-tuning, especially useful when pre-trained models for the target language are available. The dockerized infrastructure developed enables experiment reproducibility and facilitates systematic evaluation of OCR configurations.
--- a/docs/compliance.md
+++ b/docs/compliance.md
@@ -0,0 +1,157 @@
 # UNIR Style Compliance Checklist
 This document lists the UNIR TFE style requirements to verify before final submission.
 ## Page Layout
 | Requirement | Specification | Check |
 |-------------|---------------|-------|
 | Page size | A4 | ☐ |
 | Left margin | 3.0 cm | ☐ |
 | Right margin | 2.0 cm | ☐ |
 | Top margin | 2.5 cm | ☐ |
 | Bottom margin | 2.5 cm | ☐ |
 | Header | Student name + TFE title | ☐ |
 | Footer | Page number | ☐ |
 ## Typography
 | Element | Specification | Check |
 |---------|---------------|-------|
 | Body text | Calibri 12pt, justified, 1.5 line spacing | ☐ |
 | Título 1 (H1) | Calibri Light 18pt, blue, numbered (1., 2., ...) | ☐ |
 | Título 2 (H2) | Calibri Light 14pt, blue, numbered (1.1, 1.2, ...) | ☐ |
 | Título 3 (H3) | Calibri Light 12pt, numbered (1.1.1, 1.1.2, ...) | ☐ |
 | Título 4 (H4) | Calibri 12pt, bold, unnumbered | ☐ |
 | Footnotes | Calibri 10pt, justified, single spacing | ☐ |
 | Code blocks | Consolas 10pt | ☐ |
 ## Document Structure
 | Section | Requirements | Check |
 |---------|--------------|-------|
 | Portada | Title, Author, Type, Director, Date | ☐ |
 | Resumen | 150-300 words in Spanish + Palabras clave (3-5) | ☐ |
 | Abstract | 150-300 words in English + Keywords (3-5) | ☐ |
 | Índice de contenidos | Auto-generated, new page | ☐ |
 | Índice de figuras | Auto-generated, new page | ☐ |
 | Índice de tablas | Auto-generated, new page | ☐ |
 | Cap. 1 Introducción | 1.1 Motivación, 1.2 Planteamiento, 1.3 Estructura | ☐ |
 | Cap. 2 Contexto | 2.1 Contexto, 2.2 Estado del arte, 2.3 Conclusiones | ☐ |
 | Cap. 3 Objetivos | 3.1 Objetivo general, 3.2 Específicos, 3.3 Metodología | ☐ |
 | Cap. 4 Desarrollo | Structure depends on work type | ☐ |
 | Cap. 5 Conclusiones | 5.1 Conclusiones, 5.2 Trabajo futuro | ☐ |
 | Referencias | APA format, alphabetical order | ☐ |
 | Anexos | Code repository URL, supplementary data | ☐ |
 ## Tables
 | Requirement | Specification | Check |
 |-------------|---------------|-------|
 | Title position | Above the table | ☐ |
 | Title format | **Tabla N.** *Descriptive title in italics.* | ☐ |
 | Numbering | Sequential (1, 2, 3...), Anexo uses A1, A2... | ☐ |
 | Border style | APA: horizontal lines only (top, header bottom, table bottom) | ☐ |
 | Source position | Below the table, centered | ☐ |
 | Source format | Fuente: Author, Year. or Fuente: Elaboración propia. | ☐ |
 | Leyenda (if needed) | Below Fuente, same style (Piedefoto-tabla) | ☐ |
 | In TOT index | All tables appear in Índice de tablas | ☐ |
 ## Figures
 | Requirement | Specification | Check |
 |-------------|---------------|-------|
 | Title position | Above the figure | ☐ |
 | Title format | **Figura N.** *Descriptive title in italics.* | ☐ |
 | Numbering | Sequential (1, 2, 3...), Anexo uses A1, A2... | ☐ |
 | Alignment | Centered | ☐ |
 | Source position | Below the figure, centered | ☐ |
 | Source format | Fuente: Author, Year. or Fuente: Elaboración propia. | ☐ |
 | Leyenda (if needed) | Below Fuente, same style (Piedefoto-tabla) | ☐ |
 | In TOF index | All figures appear in Índice de figuras | ☐ |
 ## Lists
 | Requirement | Specification | Check |
 |-------------|---------------|-------|
 | Bullet lists | Indented 36pt, bullet symbol (·) | ☐ |
 | Numbered lists | Indented 36pt, sequential numbers (1, 2, 3...) | ☐ |
 | Spacing | Proper First/Middle/Last paragraph spacing | ☐ |
 ## Citations and References
 | Requirement | Specification | Check |
 |-------------|---------------|-------|
 | Citation format | APA 7th edition | ☐ |
 | Single author | (Author, Year) or Author (Year) | ☐ |
 | Two authors | (Author1 & Author2, Year) | ☐ |
 | Three+ authors | (Author1 et al., Year) | ☐ |
 | Reference list | Alphabetical by first author surname | ☐ |
 | Hanging indent | 36pt left margin, -36pt text indent | ☐ |
 | DOI/URL | Include when available | ☐ |
 | No Wikipedia | Wikipedia citations not allowed | ☐ |
 | Source variety | Books, journals, conferences (not just URLs) | ☐ |
 ## SMART Objectives
 All objectives must be SMART:
 | Criterion | Requirement | Check |
 |-----------|-------------|-------|
 | **S**pecific | Clearly defined, unambiguous | ☐ |
 | **M**easurable | Quantifiable success metric (e.g., CER < 2%) | ☐ |
 | **A**ttainable | Feasible with available resources | ☐ |
 | **R**elevant | Demonstrable impact | ☐ |
 | **T**ime-bound | Achievable within timeframe | ☐ |
 ## Writing Style
 | Requirement | Check |
 |-------------|-------|
 | Each chapter starts with introductory paragraph | ☐ |
 | Each paragraph has at least 3 sentences | ☐ |
 | No two consecutive headings without text between them | ☐ |
 | No superfluous phrases or repetition | ☐ |
 | All concepts defined with pertinent citations | ☐ |
 | Spelling checked (Word corrector) | ☐ |
 | Logical flow between paragraphs | ☐ |
 ## Final Checks
 | Requirement | Check |
 |-------------|-------|
 | All cited references appear in reference list | ☐ |
 | All references in list are cited in text | ☐ |
 | All figures/tables have numbers and titles | ☐ |
 | Update all indices (Ctrl+A, F9 in Word) | ☐ |
 | Page count: 50-90 pages (excl. cover, indices, annexes) | ☐ |
 | Final format: PDF for deposit | ☐ |
 ## Automated Checks (apply_content.py)
 The following are automatically handled by the generation scripts:
 - ✓ Table/Figure sequential numbering
 - ✓ Anexo items use A1, A2... prefix
 - ✓ TC fields for Anexo items (appear in indices)
 - ✓ Piedefoto-tabla style for Fuente/Leyenda
 - ✓ MsoCaption style for titles
 - ✓ APA table borders (horizontal only)
 - ✓ MsoBibliography style for references
 - ✓ MsoQuote style for blockquotes
 - ✓ List paragraph classes (First/Middle/Last)
 - ✓ Bold H4 headings (unnumbered)
 ## Color Palette (UNIR Theme)
 | Color | Hex | Usage |
 |-------|-----|-------|
 | Primary Blue | `#0098CD` | Headings, diagram borders |
 | Light Blue BG | `#E6F4F9` | Diagram backgrounds |
 | Dark Gray | `#404040` | Body text |
 | Accent Blue | `#5B9BD5` | Table headers |
 | Light Accent | `#9CC2E5` | Table borders |
 ---
 **Reference:** UNIR TFE Guidelines (`instructions/instrucciones.pdf`, `instructions/plantilla_individual.pdf`)
--- a/thesis_output/plantilla_individual.htm
+++ b/thesis_output/plantilla_individual.htm