links regeneration

2026-02-04 17:52:58 +01:00
parent d384f1e4d3
commit 38ba85d834
11 changed files with 227 additions and 240 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -16,6 +16,10 @@ DOCS_DIR = os.path.join(BASE_DIR, 'docs')
 # Global counters for tables and figures
 table_counter = 0
 figure_counter = 0
+anexo_table_counter = 0
+anexo_figure_counter = 0
+# Global sequential counter for figure filenames (figura_1.png, figura_2.png, etc.)
+global_figure_index = 0

 def read_file(path):
    try:
@@ -99,7 +103,7 @@ def extract_figure_title_from_mermaid(lines, current_index):

 def parse_md_to_html_blocks(md_content, is_anexo=False):
    """Convert markdown content to HTML blocks with template styles."""
-    global table_counter, figure_counter
+    global table_counter, figure_counter, anexo_table_counter, anexo_figure_counter, global_figure_index

    html_blocks = []
    lines = md_content.split('\n')
@@ -115,7 +119,17 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):

        # Mermaid diagram - convert to figure with actual image
        if line.strip().startswith('```mermaid'):
-            figure_counter += 1
+            # Always increment global index for sequential filenames
+            global_figure_index += 1
+
+            # Use Anexo-specific counter with "A" prefix for display, or global counter
+            if is_anexo:
+                anexo_figure_counter += 1
+                fig_num = f"A{anexo_figure_counter}"  # Display number: A1, A2, A3...
+            else:
+                figure_counter += 1
+                fig_num = str(figure_counter)  # Display number: 1, 2, 3...
+
            mermaid_lines = []
            i += 1
            while i < len(lines) and not lines[i].strip() == '```':
@@ -132,18 +146,22 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            if title_match:
                fig_title = title_match.group(1).strip()
            else:
-                fig_title = f"Diagrama {figure_counter}"
+                fig_title = f"Diagrama {fig_num}"

-            # Check if the generated PNG exists
-            fig_file = f'figures/figura_{figure_counter}.png'
+            # Use global sequential index for filename (figura_1.png, figura_2.png, etc.)
+            fig_file = f'figures/figura_{global_figure_index}.png'
            fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file)

            # Create figure with MsoCaption class and proper Word SEQ field for cross-reference
            # Format: "Figura X." in bold, title in italic (per UNIR guidelines)
            # Word TOC looks for text with Caption style - anchor must be outside main caption text
-            bookmark_id = f"_Ref_Fig{figure_counter}"
+            bookmark_id = f"_Ref_Fig{fig_num}"
            # mso-pagination:keep-with-next ensures caption stays with figure image (correct MSO property)
-            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
+            # For Anexo figures, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
+            if is_anexo:
+                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura {fig_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
+            else:
+                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{fig_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')

            if os.path.exists(fig_path):
                # Read actual image dimensions and scale to fit page width
@@ -216,7 +234,8 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
        # Headers - ## becomes h2, ### becomes h3
        if line.startswith('####'):
            text = line.lstrip('#').strip()
-            html_blocks.append(f'<h4><span lang=ES>{text}</span></h4>')
+            # Apply consistent styling like h2/h3, disable numbering for h4
+            html_blocks.append(f'<h4 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h4>')
            i += 1
            continue
        elif line.startswith('###'):
@@ -246,7 +265,13 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):

        # Table - check for table title pattern first
        if '|' in line and i + 1 < len(lines) and '---' in lines[i + 1]:
-            table_counter += 1
+            # Use Anexo-specific counter with "A" prefix, or global counter
+            if is_anexo:
+                anexo_table_counter += 1
+                table_num = f"A{anexo_table_counter}"
+            else:
+                table_counter += 1
+                table_num = str(table_counter)

            # Check if previous line has table title (e.g., **Tabla 1.** *Title*)
            table_title = None
@@ -281,7 +306,7 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            # Add table title with MsoCaption class and proper Word SEQ field for cross-reference
            # Format: "Tabla X." in bold, title in italic (per UNIR guidelines)
            # Word TOC looks for text with Caption style - anchor must be outside main caption text
-            bookmark_id = f"_Ref_Tab{table_counter}"
+            bookmark_id = f"_Ref_Tab{table_num}"
            if table_title:
                # Remove any "Tabla X." or "Tabla AX." pattern from the title
                clean_title = re.sub(r'^Tabla\s+[A-Z]?\d+\.\s*', '', table_title).strip()
@@ -291,7 +316,11 @@ def parse_md_to_html_blocks(md_content, is_anexo=False):
            else:
                clean_title = "Tabla de datos."
            # mso-pagination:keep-with-next ensures caption stays with table (correct MSO property)
-            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
+            # For Anexo tables, use static text (no SEQ field) to prevent Word from overwriting A1, A2...
+            if is_anexo:
+                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla {table_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
+            else:
+                html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')

            # Build table HTML with APA style (horizontal lines only, no vertical)
            table_html = '<div align="center"><table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 align="center" style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
@@ -445,25 +474,25 @@ def extract_resumen_parts(resumen_content):
    spanish_keywords = ''
    if '**Palabras clave:**' in spanish_part:
        text_part, kw_part = spanish_part.split('**Palabras clave:**')
-        spanish_text = text_part.replace('# Resumen', '').strip()
-        spanish_keywords = kw_part.strip()
+        spanish_text = md_to_html_para(text_part.replace('# Resumen', '').strip())
+        spanish_keywords = md_to_html_para(kw_part.strip())
    else:
-        spanish_text = spanish_part.replace('# Resumen', '').strip()
+        spanish_text = md_to_html_para(spanish_part.replace('# Resumen', '').strip())

    # Extract English content
    english_text = ''
    english_keywords = ''
    if '**Keywords:**' in english_part:
        text_part, kw_part = english_part.split('**Keywords:**')
-        english_text = text_part.replace('# Abstract', '').strip()
-        english_keywords = kw_part.strip()
+        english_text = md_to_html_para(text_part.replace('# Abstract', '').strip())
+        english_keywords = md_to_html_para(kw_part.strip())
    else:
-        english_text = english_part.replace('# Abstract', '').strip()
+        english_text = md_to_html_para(english_part.replace('# Abstract', '').strip())

    return spanish_text, spanish_keywords, english_text, english_keywords

 def main():
-    global table_counter, figure_counter
+    global table_counter, figure_counter, anexo_table_counter, anexo_figure_counter

    print("Reading template...")
    html_content = read_file(TEMPLATE_INPUT)
@@ -692,7 +721,7 @@ def main():
                insert_point.insert_after(new_elem)
            print(f"    ✓ Replaced content")

-    print(f"\nSummary: {table_counter} tables, {figure_counter} figures processed")
+    print(f"\nSummary: {table_counter} tables + {anexo_table_counter} Anexo tables, {figure_counter} figures + {anexo_figure_counter} Anexo figures processed")

    print("Saving modified template...")
    output_html = str(soup)