formula
Some checks failed
build_docker / essential (pull_request) Successful in 0s
build_docker / build_cpu (pull_request) Has been cancelled
build_docker / build_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (pull_request) Has been cancelled
build_docker / build_doctr (pull_request) Has been cancelled
build_docker / build_doctr_gpu (pull_request) Has been cancelled
build_docker / build_raytune (pull_request) Has been cancelled
Some checks failed
build_docker / essential (pull_request) Successful in 0s
build_docker / build_cpu (pull_request) Has been cancelled
build_docker / build_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (pull_request) Has been cancelled
build_docker / build_easyocr_gpu (pull_request) Has been cancelled
build_docker / build_doctr (pull_request) Has been cancelled
build_docker / build_doctr_gpu (pull_request) Has been cancelled
build_docker / build_raytune (pull_request) Has been cancelled
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
import re
|
||||
import os
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from latex2mathml.converter import convert as latex_to_mathml
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
TEMPLATE_INPUT = os.path.join(BASE_DIR, 'instructions/plantilla_individual.htm')
|
||||
@@ -38,6 +39,30 @@ def md_to_html_para(text):
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2">\1</a>', text)
|
||||
return text
|
||||
|
||||
def convert_latex_formulas(text):
|
||||
"""Convert LaTeX formulas to MathML for Word compatibility."""
|
||||
# Block formulas $$...$$
|
||||
def convert_block(match):
|
||||
latex = match.group(1)
|
||||
try:
|
||||
mathml = latex_to_mathml(latex, display="block")
|
||||
return f'<p class=MsoNormal style="text-align:center">{mathml}</p>'
|
||||
except:
|
||||
return match.group(0) # Keep original if conversion fails
|
||||
|
||||
text = re.sub(r'\$\$([^$]+)\$\$', convert_block, text)
|
||||
|
||||
# Inline formulas $...$
|
||||
def convert_inline(match):
|
||||
latex = match.group(1)
|
||||
try:
|
||||
return latex_to_mathml(latex, display="inline")
|
||||
except:
|
||||
return match.group(0)
|
||||
|
||||
text = re.sub(r'\$([^$]+)\$', convert_inline, text)
|
||||
return text
|
||||
|
||||
def extract_table_title(lines, current_index):
|
||||
"""Look for table title in preceding lines (e.g., **Tabla 1.** *Title*)."""
|
||||
# Check previous non-empty lines for table title
|
||||
@@ -251,6 +276,7 @@ def parse_md_to_html_blocks(md_content):
|
||||
if re.match(r'^[\-\*\+]\s', line):
|
||||
while i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
|
||||
item_text = lines[i][2:].strip()
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt"> </span><span lang=ES>{md_to_html_para(item_text)}</span></p>')
|
||||
i += 1
|
||||
continue
|
||||
@@ -260,6 +286,7 @@ def parse_md_to_html_blocks(md_content):
|
||||
num = 1
|
||||
while i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
|
||||
item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
html_blocks.append(f'<p class=MsoListParagraphCxSpMiddle style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt"> </span>{md_to_html_para(item_text)}</span></p>')
|
||||
num += 1
|
||||
i += 1
|
||||
@@ -284,7 +311,12 @@ def parse_md_to_html_blocks(md_content):
|
||||
i += 1
|
||||
|
||||
para_text = ' '.join(para_lines)
|
||||
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
|
||||
para_text = convert_latex_formulas(para_text)
|
||||
# Check if paragraph contains MathML (already wrapped)
|
||||
if '<math' in para_text:
|
||||
html_blocks.append(para_text)
|
||||
else:
|
||||
html_blocks.append(f'<p class=MsoNormal><span lang=ES>{md_to_html_para(para_text)}</span></p>')
|
||||
|
||||
return '\n\n'.join(html_blocks)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user