diff --git a/apply_content.py b/apply_content.py index 0f28fcf..24c79d3 100644 --- a/apply_content.py +++ b/apply_content.py @@ -4,6 +4,7 @@ import re import os from bs4 import BeautifulSoup, NavigableString +from latex2mathml.converter import convert as latex_to_mathml BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm') @@ -38,6 +39,30 @@ def md_to_html_para(text): text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) return text +def convert_latex_formulas(text): + """Convert LaTeX formulas to MathML for Word compatibility.""" + # Block formulas $$...$$ + def convert_block(match): + latex = match.group(1) + try: + mathml = latex_to_mathml(latex, display="block") + return f'
{mathml}
' + except: + return match.group(0) # Keep original if conversion fails + + text = re.sub(r'\$\$([^$]+)\$\$', convert_block, text) + + # Inline formulas $...$ + def convert_inline(match): + latex = match.group(1) + try: + return latex_to_mathml(latex, display="inline") + except: + return match.group(0) + + text = re.sub(r'\$([^$]+)\$', convert_inline, text) + return text + def extract_table_title(lines, current_index): """Look for table title in preceding lines (e.g., **Tabla 1.** *Title*).""" # Check previous non-empty lines for table title @@ -251,6 +276,7 @@ def parse_md_to_html_blocks(md_content): if re.match(r'^[\-\*\+]\s', line): while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]): item_text = lines[i][2:].strip() + item_text = convert_latex_formulas(item_text) html_blocks.append(f'· {md_to_html_para(item_text)}
') i += 1 continue @@ -260,6 +286,7 @@ def parse_md_to_html_blocks(md_content): num = 1 while i < len(lines) and re.match(r'^\d+\.\s', lines[i]): item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip() + item_text = convert_latex_formulas(item_text) html_blocks.append(f'{num}. {md_to_html_para(item_text)}
') num += 1 i += 1 @@ -284,7 +311,12 @@ def parse_md_to_html_blocks(md_content): i += 1 para_text = ' '.join(para_lines) - html_blocks.append(f'{md_to_html_para(para_text)}
') + para_text = convert_latex_formulas(para_text) + # Check if paragraph contains MathML (already wrapped) + if '