deliberable_16_12_2025

2025-12-16 00:53:27 +01:00
parent 6d6bebfed9
commit 57df34ac5a
88 changed files with 17836 additions and 1467 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -0,0 +1,609 @@
+#!/usr/bin/env python3
+"""Replace template content with thesis content from docs/ folder using BeautifulSoup."""
+
+import re
+import os
+from bs4 import BeautifulSoup, NavigableString
+
+BASE_DIR = '/Users/sergio/Desktop/MastersThesis'
+TEMPLATE = os.path.join(BASE_DIR, 'thesis_output/plantilla_individual.htm')
+DOCS_DIR = os.path.join(BASE_DIR, 'docs')
+
+# Global counters for tables and figures
+table_counter = 0
+figure_counter = 0
+
+def read_file(path):
+    try:
+        with open(path, 'r', encoding='utf-8') as f:
+            return f.read()
+    except UnicodeDecodeError:
+        with open(path, 'r', encoding='latin-1') as f:
+            return f.read()
+
+def write_file(path, content):
+    with open(path, 'w', encoding='utf-8') as f:
+        f.write(content)
+
+def md_to_html_para(text):
+    """Convert markdown inline formatting to HTML."""
+    # Bold
+    text = re.sub(r'\*\*([^*]+)\*\*', r'<b>\1</b>', text)
+    # Italic
+    text = re.sub(r'\*([^*]+)\*', r'<i>\1</i>', text)
+    # Inline code
+    text = re.sub(r'`([^`]+)`', r'<span style="font-family:Consolas;font-size:10pt">\1</span>', text)
+    return text
+
+def extract_table_title(lines, current_index):
+    """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
+    # Check previous non-empty lines for table title
+    for i in range(current_index - 1, max(0, current_index - 5), -1):
+        line = lines[i].strip()
+        if line.startswith('**Tabla') or line.startswith('*Tabla'):
+            return line
+        if line and not line.startswith('|'):
+            break
+    return None
+
+def extract_figure_title_from_mermaid(lines, current_index):
+    """Extract title from mermaid diagram or preceding text."""
+    # Look for title in mermaid content
+    for i in range(current_index + 1, min(len(lines), current_index + 20)):
+        line = lines[i].strip()
+        if line.startswith('```'):
+            break
+        if 'title' in line.lower():
+            # Extract title from: title "Some Title"
+            match = re.search(r'title\s+["\']([^"\']+)["\']', line)
+            if match:
+                return match.group(1)
+
+    # Check preceding lines for figure reference
+    for i in range(current_index - 1, max(0, current_index - 3), -1):
+        line = lines[i].strip()
+        if line.startswith('**Figura') or 'Figura' in line:
+            return line
+
+    return None
+
+def parse_md_to_html_blocks(md_content):
+    """Convert markdown content to HTML blocks with template styles."""
+    global table_counter, figure_counter
+
+    html_blocks = []
+    lines = md_content.split('\n')
+    i = 0
+
+    while i < len(lines):
+        line = lines[i]
+
+        # Skip empty lines
+        if not line.strip():
+            i += 1
+            continue
+
+        # Mermaid diagram - convert to figure with actual image
+        if line.strip().startswith('```mermaid'):
+            figure_counter += 1
+            mermaid_lines = []
+            i += 1
+            while i < len(lines) and not lines[i].strip() == '```':
+                mermaid_lines.append(lines[i])
+                i += 1
+
+            # Try to extract title from mermaid content (YAML format: title: "...")
+            mermaid_content = '\n'.join(mermaid_lines)
+            # Match YAML format: title: "Title" or title: 'Title'
+            title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_content)
+            if not title_match:
+                # Fallback to non-YAML format: title "Title"
+                title_match = re.search(r'title\s+["\']?([^"\'"\n]+)["\']?', mermaid_content)
+            if title_match:
+                fig_title = title_match.group(1).strip()
+            else:
+                fig_title = f"Diagrama {figure_counter}"
+
+            # Check if the generated PNG exists
+            fig_file = f'figures/figura_{figure_counter}.png'
+            fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file)
+
+            # Create figure with MsoCaption class and proper Word SEQ field for cross-reference
+            # Format: "Figura X." in bold, title in italic (per UNIR guidelines)
+            # Word TOC looks for text with Caption style - anchor must be outside main caption text
+            bookmark_id = f"_Ref_Fig{figure_counter}"
+            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
+
+            if os.path.exists(fig_path):
+                # Use Word-compatible width in cm (A4 text area is ~16cm wide, use ~12cm max)
+                html_blocks.append(f'''<p class=MsoNormal style="text-align:center"><span lang=ES><img style="width:12cm;max-width:100%" src="{fig_file}" alt="{fig_title}"/></span></p>''')
+            else:
+                # Fallback to placeholder
+                html_blocks.append(f'''<p class=MsoNormal style="text-align:center;border:1px dashed #999;padding:20px;margin:10px 40px;background:#f9f9f9"><span lang=ES style="color:#666">[Insertar diagrama Mermaid aquí]</span></p>''')
+
+            html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Fuente: Elaboración propia.</span></p>''')
+            html_blocks.append('<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>')
+            i += 1
+            continue
+
+        # Code block (non-mermaid)
+        if line.strip().startswith('```'):
+            code_lang = line.strip()[3:]
+            code_lines = []
+            i += 1
+            while i < len(lines) and not lines[i].strip().startswith('```'):
+                code_lines.append(lines[i])
+                i += 1
+            code = '\n'.join(code_lines)
+            # Escape HTML entities in code
+            code = code.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+            html_blocks.append(f'<p class=MsoNormal style="margin-left:1cm"><span style="font-family:Consolas;font-size:9pt"><pre>{code}</pre></span></p>')
+            i += 1
+            continue
+
+        # Headers - ## becomes h2, ### becomes h3
+        if line.startswith('####'):
+            text = line.lstrip('#').strip()
+            html_blocks.append(f'<h4><span lang=ES>{text}</span></h4>')
+            i += 1
+            continue
+        elif line.startswith('###'):
+            text = line.lstrip('#').strip()
+            html_blocks.append(f'<h3 style="mso-list:l22 level3 lfo18"><span lang=ES style="text-transform:none">{text}</span></h3>')
+            i += 1
+            continue
+        elif line.startswith('##'):
+            text = line.lstrip('#').strip()
+            html_blocks.append(f'<h2 style="mso-list:l22 level2 lfo18"><span lang=ES style="text-transform:none">{text}</span></h2>')
+            i += 1
+            continue
+        elif line.startswith('#'):
+            # Skip h1 - we keep the original
+            i += 1
+            continue
+
+        # Table - check for table title pattern first
+        if '|' in line and i + 1 < len(lines) and '---' in lines[i + 1]:
+            table_counter += 1
+
+            # Check if previous line has table title (e.g., **Tabla 1.** *Title*)
+            table_title = None
+            table_source = "Elaboración propia"
+
+            # Look back for table title
+            for j in range(i - 1, max(0, i - 5), -1):
+                prev_line = lines[j].strip()
+                if prev_line.startswith('**Tabla') or prev_line.startswith('*Tabla'):
+                    # Extract title text
+                    table_title = re.sub(r'\*+', '', prev_line).strip()
+                    break
+                elif prev_line and not prev_line.startswith('|'):
+                    break
+
+            # Parse table
+            table_lines = []
+            while i < len(lines) and '|' in lines[i]:
+                if '---' not in lines[i]:
+                    table_lines.append(lines[i])
+                i += 1
+
+            # Look ahead for source
+            if i < len(lines) and 'Fuente:' in lines[i]:
+                table_source = lines[i].replace('*', '').replace('Fuente:', '').strip()
+                i += 1
+
+            # Add table title with MsoCaption class and proper Word SEQ field for cross-reference
+            # Format: "Tabla X." in bold, title in italic (per UNIR guidelines)
+            # Word TOC looks for text with Caption style - anchor must be outside main caption text
+            bookmark_id = f"_Ref_Tab{table_counter}"
+            if table_title:
+                clean_title = table_title.replace(f"Tabla {table_counter}.", "").strip()
+            else:
+                clean_title = "Tabla de datos."
+            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
+
+            # Build table HTML with APA style (horizontal lines only, no vertical)
+            table_html = '<table class=MsoTableGrid border=0 cellspacing=0 cellpadding=0 style="border-collapse:collapse;border:none">'
+            for j, tline in enumerate(table_lines):
+                cells = [c.strip() for c in tline.split('|')[1:-1]]
+                table_html += '<tr>'
+                for cell in cells:
+                    if j == 0:
+                        # Header row: top and bottom border, bold text
+                        table_html += f'<td style="border-top:solid windowtext 1.0pt;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0"><b><span lang=ES>{md_to_html_para(cell)}</span></b></p></td>'
+                    elif j == len(table_lines) - 1:
+                        # Last row: bottom border only
+                        table_html += f'<td style="border-top:none;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
+                    else:
+                        # Middle rows: no borders
+                        table_html += f'<td style="border:none;padding:5px"><p class=MsoNormal style="margin:0"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
+                table_html += '</tr>'
+            table_html += '</table>'
+            html_blocks.append(table_html)
+
+            # Add source with proper template format
+            html_blocks.append(f'<p class=Piedefoto-tabla style="margin-left:0cm"><span lang=ES>Fuente: {table_source}.</span></p>')
+            html_blocks.append('<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>')
+            continue
+
+        # Blockquote
+        if line.startswith('>'):
+            quote_text = line[1:].strip()
+            i += 1
+            while i < len(lines) and lines[i].startswith('>'):
+                quote_text += ' ' + lines[i][1:].strip()
+                i += 1
+            html_blocks.append(f'<p class=MsoNormal style="margin-left:2cm;margin-right:1cm"><i><span lang=ES>{md_to_html_para(quote_text)}</span></i></p>')
+            continue
+
+        # Bullet list
+        if re.match(r'^[\-\*\+]\s', line):
+            while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
+                item_text = lines[i][2:].strip()
+                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
+                i += 1
+            continue
+
+        # Numbered list
+        if re.match(r'^\d+\.\s', line):
+            num = 1
+            while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
+                item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
+                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{md_to_html_para(item_text)}</span></p>')
+                num += 1
+                i += 1
+            continue
+
+        # Skip lines that are just table/figure titles (they'll be handled with the table/figure)
+        if line.strip().startswith('**Tabla') or line.strip().startswith('*Tabla'):
+            i += 1
+            continue
+        if line.strip().startswith('**Figura') or line.strip().startswith('*Figura'):
+            i += 1
+            continue
+        if line.strip().startswith('*Fuente:') or line.strip().startswith('Fuente:'):
+            i += 1
+            continue
+
+        # Regular paragraph
+        para_lines = [line]
+        i += 1
+        while i < len(lines) and lines[i].strip() and not lines[i].startswith('#') and not lines[i].startswith('```') and not lines[i].startswith('>') and not re.match(r'^[\-\*\+]\s', lines[i]) and not re.match(r'^\d+\.\s', lines[i]) and '|' not in lines[i]:
+            para_lines.append(lines[i])
+            i += 1
+
+        para_text = ' '.join(para_lines)
+        html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
+
+    return '\n\n'.join(html_blocks)
+
+def extract_section_content(md_content):
+    """Extract content from markdown, skipping the first # header."""
+    md_content = re.sub(r'^#\s+[^\n]+\n+', '', md_content, count=1)
+    return parse_md_to_html_blocks(md_content)
+
+def find_section_element(soup, keyword):
+    """Find element containing keyword (h1 or special paragraph classes)."""
+    # First try h1
+    for h1 in soup.find_all('h1'):
+        text = h1.get_text()
+        if keyword.lower() in text.lower():
+            return h1
+
+    # Try special paragraph classes for unnumbered sections
+    for p in soup.find_all('p', class_=['Ttulo1sinnumerar', 'Anexo', 'MsoNormal']):
+        text = p.get_text()
+        if keyword.lower() in text.lower():
+            classes = p.get('class', [])
+            if 'Ttulo1sinnumerar' in classes or 'Anexo' in classes:
+                return p
+            if re.match(r'^\d+\.?\s', text.strip()):
+                return p
+    return None
+
+def remove_elements_between(start_elem, end_elem):
+    """Remove all elements between start and end (exclusive)."""
+    current = start_elem.next_sibling
+    elements_to_remove = []
+    while current and current != end_elem:
+        elements_to_remove.append(current)
+        current = current.next_sibling
+    for elem in elements_to_remove:
+        if hasattr(elem, 'decompose'):
+            elem.decompose()
+        elif isinstance(elem, NavigableString):
+            elem.extract()
+
+def format_references(refs_content):
+    """Format references with proper MsoBibliography style."""
+    refs_content = refs_content.replace('# Referencias bibliográficas {.unnumbered}', '').strip()
+    refs_html = ''
+
+    for line in refs_content.split('\n\n'):
+        line = line.strip()
+        if not line:
+            continue
+
+        # Apply markdown formatting
+        formatted = md_to_html_para(line)
+
+        # Use MsoBibliography style with hanging indent (36pt indent, -36pt text-indent)
+        refs_html += f'''<p class=MsoBibliography style="margin-left:36.0pt;text-indent:-36.0pt"><span lang=ES>{formatted}</span></p>\n'''
+
+    return refs_html
+
+def extract_resumen_parts(resumen_content):
+    """Extract Spanish resumen and English abstract from 00_resumen.md"""
+    parts = resumen_content.split('---')
+
+    spanish_part = parts[0] if len(parts) > 0 else ''
+    english_part = parts[1] if len(parts) > 1 else ''
+
+    # Extract Spanish content
+    spanish_text = ''
+    spanish_keywords = ''
+    if '**Palabras clave:**' in spanish_part:
+        text_part, kw_part = spanish_part.split('**Palabras clave:**')
+        spanish_text = text_part.replace('# Resumen', '').strip()
+        spanish_keywords = kw_part.strip()
+    else:
+        spanish_text = spanish_part.replace('# Resumen', '').strip()
+
+    # Extract English content
+    english_text = ''
+    english_keywords = ''
+    if '**Keywords:**' in english_part:
+        text_part, kw_part = english_part.split('**Keywords:**')
+        english_text = text_part.replace('# Abstract', '').strip()
+        english_keywords = kw_part.strip()
+    else:
+        english_text = english_part.replace('# Abstract', '').strip()
+
+    return spanish_text, spanish_keywords, english_text, english_keywords
+
+def main():
+    global table_counter, figure_counter
+
+    print("Reading template...")
+    html_content = read_file(TEMPLATE)
+    soup = BeautifulSoup(html_content, 'html.parser')
+
+    print("Reading docs content...")
+    docs = {
+        'resumen': read_file(os.path.join(DOCS_DIR, '00_resumen.md')),
+        'intro': read_file(os.path.join(DOCS_DIR, '01_introduccion.md')),
+        'contexto': read_file(os.path.join(DOCS_DIR, '02_contexto_estado_arte.md')),
+        'objetivos': read_file(os.path.join(DOCS_DIR, '03_objetivos_metodologia.md')),
+        'desarrollo': read_file(os.path.join(DOCS_DIR, '04_desarrollo_especifico.md')),
+        'conclusiones': read_file(os.path.join(DOCS_DIR, '05_conclusiones_trabajo_futuro.md')),
+        'referencias': read_file(os.path.join(DOCS_DIR, '06_referencias_bibliograficas.md')),
+        'anexo': read_file(os.path.join(DOCS_DIR, '07_anexo_a.md')),
+    }
+
+    # Extract resumen and abstract
+    spanish_text, spanish_kw, english_text, english_kw = extract_resumen_parts(docs['resumen'])
+
+    # Replace title
+    print("Replacing title...")
+    for elem in soup.find_all(string=re.compile(r'Título del TFE', re.IGNORECASE)):
+        elem.replace_with(elem.replace('Título del TFE', 'Optimización de Hiperparámetros OCR con Ray Tune para Documentos Académicos en Español'))
+
+    # Replace Resumen section
+    print("Replacing Resumen...")
+    resumen_title = soup.find('p', class_='Ttulondices', string=re.compile(r'Resumen'))
+    if resumen_title:
+        # Find and replace content after Resumen title until Abstract
+        current = resumen_title.find_next_sibling()
+        elements_to_remove = []
+        while current:
+            text = current.get_text() if hasattr(current, 'get_text') else str(current)
+            if 'Abstract' in text and current.name == 'p' and 'Ttulondices' in str(current.get('class', [])):
+                break
+            elements_to_remove.append(current)
+            current = current.find_next_sibling()
+
+        for elem in elements_to_remove:
+            if hasattr(elem, 'decompose'):
+                elem.decompose()
+
+        # Insert new resumen content
+        resumen_html = f'''<p class=MsoNormal><span lang=ES>{spanish_text}</span></p>
+<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>
+<p class=MsoNormal><b><span lang=ES>Palabras clave:</span></b><span lang=ES> {spanish_kw}</span></p>
+<p class=MsoNormal><span lang=ES><o:p>&nbsp;</o:p></span></p>'''
+        resumen_soup = BeautifulSoup(resumen_html, 'html.parser')
+        insert_point = resumen_title
+        for new_elem in reversed(list(resumen_soup.children)):
+            insert_point.insert_after(new_elem)
+        print("    ✓ Replaced Resumen")
+
+    # Replace Abstract section
+    print("Replacing Abstract...")
+    abstract_title = soup.find('p', class_='Ttulondices', string=re.compile(r'Abstract'))
+    if abstract_title:
+        # Find and replace content after Abstract title until next major section
+        current = abstract_title.find_next_sibling()
+        elements_to_remove = []
+        while current:
+            # Stop at page break or next title
+            if current.name == 'span' and 'page-break' in str(current):
+                break
+            text = current.get_text() if hasattr(current, 'get_text') else str(current)
+            if current.name == 'p' and ('Ttulondices' in str(current.get('class', [])) or 'MsoToc' in str(current.get('class', []))):
+                break
+            elements_to_remove.append(current)
+            current = current.find_next_sibling()
+
+        for elem in elements_to_remove:
+            if hasattr(elem, 'decompose'):
+                elem.decompose()
+
+        # Insert new abstract content
+        abstract_html = f'''<p class=MsoNormal><span lang=EN-US>{english_text}</span></p>
+<p class=MsoNormal><span lang=EN-US><o:p>&nbsp;</o:p></span></p>
+<p class=MsoNormal><b><span lang=EN-US>Keywords:</span></b><span lang=EN-US> {english_kw}</span></p>
+<p class=MsoNormal><span lang=EN-US><o:p>&nbsp;</o:p></span></p>'''
+        abstract_soup = BeautifulSoup(abstract_html, 'html.parser')
+        insert_point = abstract_title
+        for new_elem in reversed(list(abstract_soup.children)):
+            insert_point.insert_after(new_elem)
+        print("    ✓ Replaced Abstract")
+
+    # Remove "Importante" callout boxes (template instructions)
+    print("Removing template instructions...")
+    for div in soup.find_all('div'):
+        text = div.get_text()
+        if 'Importante:' in text and 'extensión mínima' in text:
+            div.decompose()
+            print("    ✓ Removed 'Importante' box")
+
+    # Remove "Ejemplo de nota al pie" footnote
+    for elem in soup.find_all(string=re.compile(r'Ejemplo de nota al pie')):
+        parent = elem.parent
+        if parent:
+            # Find the footnote container and remove it
+            while parent and parent.name != 'p':
+                parent = parent.parent
+            if parent:
+                parent.decompose()
+                print("    ✓ Removed footnote example")
+
+    # Clear old figure/table index entries (they need to be regenerated in Word)
+    print("Clearing old index entries...")
+
+    # Remove ALL content from MsoTof paragraphs that reference template examples
+    # The indices will be regenerated when user opens in Word and presses Ctrl+A, F9
+    for p in soup.find_all('p', class_='MsoTof'):
+        text = p.get_text()
+        # Check for figure index entries with template examples
+        if 'Figura' in text and 'Ejemplo' in text:
+            # Remove all <a> tags (the actual index entry links)
+            for a in p.find_all('a'):
+                a.decompose()
+            # Also remove any remaining text content that shows the example
+            for span in p.find_all('span', style=lambda x: x and 'mso-no-proof' in str(x)):
+                if 'Ejemplo' in span.get_text():
+                    span.decompose()
+            print("    ✓ Cleared figure index example entry")
+        # Check for table index entries with template examples
+        if 'Tabla' in text and 'Ejemplo' in text:
+            for a in p.find_all('a'):
+                a.decompose()
+            for span in p.find_all('span', style=lambda x: x and 'mso-no-proof' in str(x)):
+                if 'Ejemplo' in span.get_text():
+                    span.decompose()
+            print("    ✓ Cleared table index example entry")
+
+    # Remove old figure index entries that reference template examples
+    for p in soup.find_all('p', class_='MsoToc3'):
+        text = p.get_text()
+        if 'Figura 1. Ejemplo' in text or 'Tabla 1. Ejemplo' in text:
+            p.decompose()
+            print("    ✓ Removed template index entry")
+
+    # Also clear the specific figure/table from template
+    for p in soup.find_all('p', class_='Imagencentrada'):
+        p.decompose()
+        print("    ✓ Removed template figure placeholder")
+
+    # Remove template table example
+    for table in soup.find_all('table', class_='MsoTableGrid'):
+        # Check if this is the template example table
+        text = table.get_text()
+        if 'Celda 1' in text or 'Encabezado 1' in text:
+            # Also remove surrounding caption and source
+            prev_sib = table.find_previous_sibling()
+            next_sib = table.find_next_sibling()
+            if prev_sib and 'Tabla 1. Ejemplo' in prev_sib.get_text():
+                prev_sib.decompose()
+            if next_sib and 'Fuente:' in next_sib.get_text():
+                next_sib.decompose()
+            table.decompose()
+            print("    ✓ Removed template table example")
+            break
+
+    # Define chapters with their keywords and next chapter keywords
+    chapters = [
+        ('Introducción', 'intro', 'Contexto'),
+        ('Contexto', 'contexto', 'Objetivos'),
+        ('Objetivos', 'objetivos', 'Desarrollo'),
+        ('Desarrollo', 'desarrollo', 'Conclusiones'),
+        ('Conclusiones', 'conclusiones', 'Referencias'),
+    ]
+
+    print("Replacing chapter contents...")
+    for chapter_keyword, doc_key, next_keyword in chapters:
+        print(f"  Processing: {chapter_keyword}")
+
+        # Reset counters for consistent numbering per chapter (optional - remove if you want global numbering)
+        # table_counter = 0
+        # figure_counter = 0
+
+        start_elem = find_section_element(soup, chapter_keyword)
+        end_elem = find_section_element(soup, next_keyword)
+
+        if start_elem and end_elem:
+            remove_elements_between(start_elem, end_elem)
+            new_content_html = extract_section_content(docs[doc_key])
+            new_soup = BeautifulSoup(new_content_html, 'html.parser')
+            insert_point = start_elem
+            for new_elem in reversed(list(new_soup.children)):
+                insert_point.insert_after(new_elem)
+            print(f"    ✓ Replaced content")
+        else:
+            if not start_elem:
+                print(f"    Warning: Could not find start element for {chapter_keyword}")
+            if not end_elem:
+                print(f"    Warning: Could not find end element for {next_keyword}")
+
+    # Handle Referencias
+    print("  Processing: Referencias bibliográficas")
+    refs_start = find_section_element(soup, 'Referencias')
+    anexo_elem = find_section_element(soup, 'Anexo')
+
+    if refs_start and anexo_elem:
+        remove_elements_between(refs_start, anexo_elem)
+        refs_html = format_references(docs['referencias'])
+        refs_soup = BeautifulSoup(refs_html, 'html.parser')
+        insert_point = refs_start
+        for new_elem in reversed(list(refs_soup.children)):
+            insert_point.insert_after(new_elem)
+        print(f"    ✓ Replaced content")
+
+    # Handle Anexo (last section)
+    print("  Processing: Anexo")
+    if anexo_elem:
+        body = soup.find('body')
+        if body:
+            current = anexo_elem.next_sibling
+            while current:
+                next_elem = current.next_sibling
+                if hasattr(current, 'decompose'):
+                    current.decompose()
+                elif isinstance(current, NavigableString):
+                    current.extract()
+                current = next_elem
+
+            anexo_content = extract_section_content(docs['anexo'])
+            anexo_soup = BeautifulSoup(anexo_content, 'html.parser')
+            insert_point = anexo_elem
+            for new_elem in reversed(list(anexo_soup.children)):
+                insert_point.insert_after(new_elem)
+            print(f"    ✓ Replaced content")
+
+    print(f"\nSummary: {table_counter} tables, {figure_counter} figures processed")
+
+    print("Saving modified template...")
+    output_html = str(soup)
+    write_file(TEMPLATE, output_html)
+
+    print(f"✓ Done! Modified: {TEMPLATE}")
+    print("\nTo convert to DOCX:")
+    print("1. Open the .htm file in Microsoft Word")
+    print("2. Replace [Insertar diagrama Mermaid aquí] placeholders with actual diagrams")
+    print("3. Update indices: Select all (Ctrl+A) then press F9 to update fields")
+    print("   - This will regenerate: Índice de contenidos, Índice de figuras, Índice de tablas")
+    print("4. Save as .docx")
+
+if __name__ == '__main__':
+    main()