correlations

2026-01-24 16:48:47 +01:00
parent 4c299cc00f
commit d384f1e4d3
7 changed files with 369 additions and 11 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -97,7 +97,7 @@ def extract_figure_title_from_mermaid(lines, current_index):

    return None

-def parse_md_to_html_blocks(md_content):
+def parse_md_to_html_blocks(md_content, is_anexo=False):
    """Convert markdown content to HTML blocks with template styles."""
    global table_counter, figure_counter

@@ -142,7 +142,8 @@ def parse_md_to_html_blocks(md_content):
            # Format: "Figura X." in bold, title in italic (per UNIR guidelines)
            # Word TOC looks for text with Caption style - anchor must be outside main caption text
            bookmark_id = f"_Ref_Fig{figure_counter}"
-            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
+            # mso-pagination:keep-with-next ensures caption stays with figure image (correct MSO property)
+            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{figure_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')

            if os.path.exists(fig_path):
                # Read actual image dimensions and scale to fit page width
@@ -162,10 +163,12 @@ def parse_md_to_html_blocks(md_content):
                w_pt = new_w * 0.75
                h_pt = new_h * 0.75

-                html_blocks.append(f'''<p class=MsoNormal style="text-align:center"><span lang=ES><img width="{new_w}" height="{new_h}" style="width:{w_pt}pt;height:{h_pt}pt;display:block;margin:0 auto" src="{fig_file}" alt="{fig_title}"/></span></p>''')
+                # mso-pagination:keep-with-next ensures image stays with source line
+                html_blocks.append(f'''<p class=MsoNormal style="text-align:center;mso-pagination:keep-with-next"><span lang=ES><img width="{new_w}" height="{new_h}" style="width:{w_pt}pt;height:{h_pt}pt;display:block;margin:0 auto" src="{fig_file}" alt="{fig_title}"/></span></p>''')
            else:
                # Fallback to placeholder
-                html_blocks.append(f'''<p class=MsoNormal style="text-align:center;border:1px dashed #999;padding:20px;margin:10px 40px;background:#f9f9f9"><span lang=ES style="color:#666">[Insertar diagrama Mermaid aquí]</span></p>''')
+                # mso-pagination:keep-with-next ensures placeholder stays with source line
+                html_blocks.append(f'''<p class=MsoNormal style="text-align:center;mso-pagination:keep-with-next;border:1px dashed #999;padding:20px;margin:10px 40px;background:#f9f9f9"><span lang=ES style="color:#666">[Insertar diagrama Mermaid aquí]</span></p>''')

            # Check if next non-empty line has custom Fuente
            custom_source = None
@@ -218,12 +221,22 @@ def parse_md_to_html_blocks(md_content):
            continue
        elif line.startswith('###'):
            text = line.lstrip('#').strip()
-            html_blocks.append(f'<h3 style="mso-list:l22 level3 lfo18"><span lang=ES style="text-transform:none">{text}</span></h3>')
+            # Disable auto-numbering for Anexo content or A.x headings
+            if is_anexo or re.match(r'^A\.\d+', text):
+                # mso-list:none explicitly disables inherited list numbering from template CSS
+                html_blocks.append(f'<h3 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h3>')
+            else:
+                html_blocks.append(f'<h3 style="mso-list:l22 level3 lfo18"><span lang=ES style="text-transform:none">{text}</span></h3>')
            i += 1
            continue
        elif line.startswith('##'):
            text = line.lstrip('#').strip()
-            html_blocks.append(f'<h2 style="mso-list:l22 level2 lfo18"><span lang=ES style="text-transform:none">{text}</span></h2>')
+            # Disable auto-numbering for Anexo content or A.x headings
+            if is_anexo or re.match(r'^A\.\d+', text):
+                # mso-list:none explicitly disables inherited list numbering from template CSS
+                html_blocks.append(f'<h2 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h2>')
+            else:
+                html_blocks.append(f'<h2 style="mso-list:l22 level2 lfo18"><span lang=ES style="text-transform:none">{text}</span></h2>')
            i += 1
            continue
        elif line.startswith('#'):
@@ -277,10 +290,10 @@ def parse_md_to_html_blocks(md_content):
                clean_title = alt_title
            else:
                clean_title = "Tabla de datos."
-            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
+            # mso-pagination:keep-with-next ensures caption stays with table (correct MSO property)
+            html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_counter}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')

            # Build table HTML with APA style (horizontal lines only, no vertical)
-            # Wrap in centered div for Word compatibility
            table_html = '<div align="center"><table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 align="center" style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
            for j, tline in enumerate(table_lines):
                cells = [c.strip() for c in tline.split('|')[1:-1]]
@@ -365,10 +378,10 @@ def parse_md_to_html_blocks(md_content):

    return '\n\n'.join(html_blocks)

-def extract_section_content(md_content):
+def extract_section_content(md_content, is_anexo=False):
    """Extract content from markdown, skipping the first # header."""
    md_content = re.sub(r'^#\s+[^\n]+\n+', '', md_content, count=1)
-    return parse_md_to_html_blocks(md_content)
+    return parse_md_to_html_blocks(md_content, is_anexo=is_anexo)

 def find_section_element(soup, keyword):
    """Find element containing keyword (h1 or special paragraph classes)."""
@@ -672,7 +685,7 @@ def main():
                    current.extract()
                current = next_elem

-            anexo_content = extract_section_content(docs['anexo'])
+            anexo_content = extract_section_content(docs['anexo'], is_anexo=True)
            anexo_soup = BeautifulSoup(anexo_content, 'html.parser')
            insert_point = anexo_elem
            for new_elem in reversed(list(anexo_soup.children)):