formula

2026-01-19 17:14:55 +01:00
parent 506f447d46
commit b1539fd79f
2 changed files with 56 additions and 16 deletions
--- a/apply_content.py
+++ b/apply_content.py
@@ -4,6 +4,7 @@
 import re
 import os
 from bs4 import BeautifulSoup, NavigableString
+from latex2mathml.converter import convert as latex_to_mathml

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
@@ -38,6 +39,30 @@ def md_to_html_para(text):
    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
    return text

+def convert_latex_formulas(text):
+    """Convert LaTeX formulas to MathML for Word compatibility."""
+    # Block formulas $$...$$
+    def convert_block(match):
+        latex = match.group(1)
+        try:
+            mathml = latex_to_mathml(latex, display="block")
+            return f'<p class=MsoNormal style="text-align:center">{mathml}</p>'
+        except:
+            return match.group(0)  # Keep original if conversion fails
+
+    text = re.sub(r'\$\$([^$]+)\$\$', convert_block, text)
+
+    # Inline formulas $...$
+    def convert_inline(match):
+        latex = match.group(1)
+        try:
+            return latex_to_mathml(latex, display="inline")
+        except:
+            return match.group(0)
+
+    text = re.sub(r'\$([^$]+)\$', convert_inline, text)
+    return text
+
 def extract_table_title(lines, current_index):
    """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
    # Check previous non-empty lines for table title
@@ -251,6 +276,7 @@ def parse_md_to_html_blocks(md_content):
        if re.match(r'^[\-\*\+]\s', line):
            while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
                item_text = lines[i][2:].strip()
+                item_text = convert_latex_formulas(item_text)
                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
                i += 1
            continue
@@ -260,6 +286,7 @@ def parse_md_to_html_blocks(md_content):
            num = 1
            while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
                item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
+                item_text = convert_latex_formulas(item_text)
                html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{md_to_html_para(item_text)}</span></p>')
                num += 1
                i += 1
@@ -284,7 +311,12 @@ def parse_md_to_html_blocks(md_content):
            i += 1

        para_text = ' '.join(para_lines)
-        html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
+        para_text = convert_latex_formulas(para_text)
+        # Check if paragraph contains MathML (already wrapped)
+        if '<math' in para_text:
+            html_blocks.append(para_text)
+        else:
+            html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')

    return '\n\n'.join(html_blocks)