LaTex ecuations
Some checks failed
build_docker / essential (push) Successful in 1s
build_docker / build_paddle_ocr (push) Has started running
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_easyocr (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_raytune (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has been cancelled

This commit is contained in:
2026-02-04 21:07:27 +01:00
parent e9c937a042
commit b91e31e173
9 changed files with 157 additions and 108 deletions

View File

@@ -403,7 +403,7 @@ def handle_bullet_list(lines, i):
def handle_numbered_list(lines, i):
"""Handle numbered list (1., 2., etc).
"""Handle numbered list (1., 2., etc) with nested bullet sub-lists.
Args:
lines: List of markdown lines
@@ -413,6 +413,8 @@ def handle_numbered_list(lines, i):
Tuple of (html_blocks, new_index)
"""
html_blocks = []
# Each item is a tuple: (main_text, nested_bullets)
# where nested_bullets is a list of bullet point strings
numbered_items = []
while i < len(lines):
@@ -423,8 +425,8 @@ def handle_numbered_list(lines, i):
if i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
i += 1
# Collect any nested/indented content
nested_lines = []
# Collect any nested/indented content (bullet points)
nested_bullets = []
while i < len(lines):
current = lines[i]
# Stop conditions
@@ -436,34 +438,53 @@ def handle_numbered_list(lines, i):
break
if current.startswith('**Tabla') or current.startswith('**Figura'):
break
if current.strip() and not current.startswith(' ') and not current.startswith('\t') and not current.startswith('-'):
if nested_lines or not current.strip():
break
if current.strip():
cleaned = current.strip()
if cleaned.startswith('- '):
cleaned = cleaned[2:]
nested_lines.append(cleaned)
# Check for non-indented, non-bullet content (end of nested)
stripped = current.strip()
if stripped and not current.startswith(' ') and not current.startswith('\t') and not stripped.startswith('-'):
break
# Collect indented bullet points
if stripped.startswith('- '):
bullet_text = stripped[2:].strip()
nested_bullets.append(bullet_text)
i += 1
# Combine item with nested content
if nested_lines:
item_text = item_text + '<br/>' + '<br/>'.join(nested_lines)
item_text = convert_latex_formulas(item_text)
numbered_items.append(md_to_html_para(item_text))
numbered_items.append((md_to_html_para(item_text), nested_bullets))
else:
break
# Output with proper First/Middle/Last classes
for idx, item in enumerate(numbered_items):
# Output numbered items with nested bullet lists
for idx, (item_text, nested_bullets) in enumerate(numbered_items):
num = idx + 1
if len(numbered_items) == 1:
cls = 'MsoListParagraph'
elif idx == 0:
cls = 'MsoListParagraphCxSpFirst'
elif idx == len(numbered_items) - 1:
elif idx == len(numbered_items) - 1 and not nested_bullets:
cls = 'MsoListParagraphCxSpLast'
else:
cls = 'MsoListParagraphCxSpMiddle'
html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{item}</span></p>')
# Main numbered item
html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt">&nbsp;&nbsp;&nbsp;</span>{item_text}</span></p>')
# Nested bullet sub-list (indented further)
if nested_bullets:
for bullet_idx, bullet_text in enumerate(nested_bullets):
bullet_text = convert_latex_formulas(bullet_text)
bullet_html = md_to_html_para(bullet_text)
# Determine class for sub-list items
if bullet_idx == 0:
sub_cls = 'MsoListParagraphCxSpFirst'
elif bullet_idx == len(nested_bullets) - 1:
# If this is the last bullet of the last numbered item, use Last
if idx == len(numbered_items) - 1:
sub_cls = 'MsoListParagraphCxSpLast'
else:
sub_cls = 'MsoListParagraphCxSpLast'
else:
sub_cls = 'MsoListParagraphCxSpMiddle'
# Nested bullets at 54pt margin (36pt + 18pt)
html_blocks.append(f'<p class={sub_cls} style="margin-left:54pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><span lang=ES>{bullet_html}</span></p>')
return html_blocks, i