formula
Some checks failed
build_docker / essential (pull_request) Successful in 0s
build_docker / build_cpu (pull_request) Has been cancelled
build_docker / build_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (pull_request) Has been cancelled
build_docker / build_doctr (pull_request) Has been cancelled
build_docker / build_doctr_gpu (pull_request) Has been cancelled
build_docker / build_raytune (pull_request) Has been cancelled

This commit is contained in:
2026-01-19 17:14:55 +01:00
parent 506f447d46
commit b1539fd79f
2 changed files with 56 additions and 16 deletions

View File

@@ -4,6 +4,7 @@
import re
import os
from bs4 import BeautifulSoup, NavigableString
from latex2mathml.converter import convert as latex_to_mathml
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
@@ -38,6 +39,30 @@ def md_to_html_para(text):
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
return text
def convert_latex_formulas(text):
"""Convert LaTeX formulas to MathML for Word compatibility."""
# Block formulas $$...$$
def convert_block(match):
latex = match.group(1)
try:
mathml = latex_to_mathml(latex, display="block")
return f'<p class=MsoNormal style="text-align:center">{mathml}</p>'
except:
return match.group(0) # Keep original if conversion fails
text = re.sub(r'\$\$([^$]+)\$\$', convert_block, text)
# Inline formulas $...$
def convert_inline(match):
latex = match.group(1)
try:
return latex_to_mathml(latex, display="inline")
except:
return match.group(0)
text = re.sub(r'\$([^$]+)\$', convert_inline, text)
return text
def extract_table_title(lines, current_index):
"""Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
# Check previous non-empty lines for table title
@@ -251,6 +276,7 @@ def parse_md_to_html_blocks(md_content):
if re.match(r'^[\-\*\+]\s', line):
while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
item_text = lines[i][2:].strip()
item_text = convert_latex_formulas(item_text)
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
i += 1
continue
@@ -260,6 +286,7 @@ def parse_md_to_html_blocks(md_content):
num = 1
while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
item_text = convert_latex_formulas(item_text)
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{md_to_html_para(item_text)}</span></p>')
num += 1
i += 1
@@ -284,7 +311,12 @@ def parse_md_to_html_blocks(md_content):
i += 1
para_text = ' '.join(para_lines)
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
para_text = convert_latex_formulas(para_text)
# Check if paragraph contains MathML (already wrapped)
if '<math' in para_text:
html_blocks.append(para_text)
else:
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
return '\n\n'.join(html_blocks)