Cross references
Some checks failed
build_docker / essential (push) Successful in 1s
build_docker / build_paddle_ocr (push) Successful in 5m6s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has started running
build_docker / build_easyocr (push) Has been cancelled
Some checks failed
build_docker / essential (push) Successful in 1s
build_docker / build_paddle_ocr (push) Successful in 5m6s
build_docker / build_raytune (push) Has been cancelled
build_docker / build_easyocr_gpu (push) Has been cancelled
build_docker / build_doctr (push) Has been cancelled
build_docker / build_doctr_gpu (push) Has been cancelled
build_docker / build_paddle_ocr_gpu (push) Has started running
build_docker / build_easyocr (push) Has been cancelled
This commit is contained in:
469
content_handlers.py
Normal file
469
content_handlers.py
Normal file
@@ -0,0 +1,469 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Content block handlers for markdown to HTML conversion."""
|
||||
|
||||
import os
|
||||
import re
|
||||
from PIL import Image
|
||||
|
||||
from markdown_utils import (
|
||||
md_to_html_para,
|
||||
convert_latex_formulas,
|
||||
is_source_line,
|
||||
extract_source_from_line,
|
||||
is_leyenda_line,
|
||||
extract_leyenda_from_line,
|
||||
)
|
||||
|
||||
# Base directory for resolving paths
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def handle_mermaid_diagram(lines, i, counters, is_anexo):
|
||||
"""Handle mermaid diagram block, converting to figure with image.
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to ```mermaid)
|
||||
counters: Dict with 'table', 'figure', 'anexo_table', 'anexo_figure', 'global_figure'
|
||||
is_anexo: Boolean indicating if processing Anexo section
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
|
||||
# Always increment global index for sequential filenames
|
||||
counters['global_figure'] += 1
|
||||
|
||||
# Use Anexo-specific counter with "A" prefix for display, or global counter
|
||||
if is_anexo:
|
||||
counters['anexo_figure'] += 1
|
||||
fig_num = f"A{counters['anexo_figure']}"
|
||||
else:
|
||||
counters['figure'] += 1
|
||||
fig_num = str(counters['figure'])
|
||||
|
||||
mermaid_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip() == '```':
|
||||
mermaid_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# Try to extract title from mermaid content (YAML format)
|
||||
mermaid_content = '\n'.join(mermaid_lines)
|
||||
# Match title with quotes: title: "Something" or title: 'Something'
|
||||
title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_content)
|
||||
if not title_match:
|
||||
# Match title without quotes: title: Something
|
||||
title_match = re.search(r'title:\s*([^"\'\n]+)', mermaid_content)
|
||||
if title_match:
|
||||
fig_title = title_match.group(1).strip()
|
||||
else:
|
||||
fig_title = f"Diagrama {fig_num}"
|
||||
|
||||
# Use global sequential index for filename
|
||||
fig_file = f'figures/figura_{counters["global_figure"]}.png'
|
||||
fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file)
|
||||
|
||||
# Create figure with MsoCaption class and proper Word SEQ field
|
||||
bookmark_id = f"_Ref_Fig{fig_num}"
|
||||
|
||||
if is_anexo:
|
||||
tc_field = f'''<!--[if supportFields]><span style='mso-element:field-begin'></span> TC "Figura {fig_num}. {fig_title}" \\f c \\l 1 <span style='mso-element:field-end'></span><![endif]-->'''
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a>{tc_field}<p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura {fig_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
|
||||
else:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="text-align:center;mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Figura <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Figura \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{fig_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{fig_title}</span></i></p>''')
|
||||
|
||||
if os.path.exists(fig_path):
|
||||
# Read actual image dimensions and scale to fit page width
|
||||
img = Image.open(fig_path)
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
# Scale to fit max width of 566px (15cm at 96dpi) while preserving aspect ratio
|
||||
max_width = 566
|
||||
if orig_w > max_width:
|
||||
scale = max_width / orig_w
|
||||
new_w = max_width
|
||||
new_h = int(orig_h * scale)
|
||||
else:
|
||||
new_w, new_h = orig_w, orig_h
|
||||
|
||||
# Convert to pt (1px at 96dpi = 0.75pt)
|
||||
w_pt = new_w * 0.75
|
||||
h_pt = new_h * 0.75
|
||||
|
||||
html_blocks.append(f'''<p class=MsoNormal style="text-align:center;mso-pagination:keep-with-next"><span lang=ES><img width="{new_w}" height="{new_h}" style="width:{w_pt}pt;height:{h_pt}pt;display:block;margin:0 auto" src="{fig_file}" alt="{fig_title}"/></span></p>''')
|
||||
else:
|
||||
# Fallback to placeholder
|
||||
html_blocks.append(f'''<p class=MsoNormal style="text-align:center;mso-pagination:keep-with-next;border:1px dashed #999;padding:20px;margin:10px 40px;background:#f9f9f9"><span lang=ES style="color:#666">[Insertar diagrama Mermaid aquí]</span></p>''')
|
||||
|
||||
# Check if next non-empty line has custom Fuente
|
||||
custom_source = None
|
||||
fig_leyenda = None
|
||||
lookahead = i + 1
|
||||
while lookahead < len(lines) and not lines[lookahead].strip():
|
||||
lookahead += 1
|
||||
if lookahead < len(lines):
|
||||
next_line = lines[lookahead].strip()
|
||||
if is_source_line(next_line):
|
||||
custom_source = extract_source_from_line(next_line)
|
||||
if custom_source and not custom_source.endswith('.'):
|
||||
custom_source += '.'
|
||||
i = lookahead
|
||||
# Check for Leyenda after source
|
||||
leyenda_idx = i + 1
|
||||
while leyenda_idx < len(lines) and not lines[leyenda_idx].strip():
|
||||
leyenda_idx += 1
|
||||
if leyenda_idx < len(lines) and is_leyenda_line(lines[leyenda_idx]):
|
||||
fig_leyenda = extract_leyenda_from_line(lines[leyenda_idx])
|
||||
i = leyenda_idx
|
||||
|
||||
if custom_source:
|
||||
source_html = md_to_html_para(custom_source)
|
||||
html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Fuente: {source_html}</span></p>''')
|
||||
else:
|
||||
html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Fuente: Elaboración propia.</span></p>''')
|
||||
|
||||
if fig_leyenda:
|
||||
leyenda_html = md_to_html_para(fig_leyenda)
|
||||
if not fig_leyenda.endswith('.'):
|
||||
leyenda_html += '.'
|
||||
html_blocks.append(f'''<p class=Piedefoto-tabla style="margin-left:0cm;text-align:center"><span lang=ES>Leyenda: {leyenda_html}</span></p>''')
|
||||
|
||||
html_blocks.append('<p class=MsoNormal><span lang=ES><o:p> </o:p></span></p>')
|
||||
i += 1
|
||||
|
||||
return html_blocks, i
|
||||
|
||||
|
||||
def handle_code_block(lines, i):
|
||||
"""Handle non-mermaid code block.
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to ```)
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
code_lang = lines[i].strip()[3:]
|
||||
code_lines = []
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip().startswith('```'):
|
||||
code_lines.append(lines[i])
|
||||
i += 1
|
||||
code = '\n'.join(code_lines)
|
||||
# Escape HTML entities in code
|
||||
code = code.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||
html_blocks.append(f'''<div style="background:#E6F4F9;border-top:solid #0098CD .5pt;border-bottom:solid #0098CD .5pt;padding:8pt 12pt;margin:6pt 0">
|
||||
<pre style="font-family:Consolas,monospace;font-size:9pt;color:#333333;margin:0;white-space:pre-wrap;word-wrap:break-word">{code}</pre>
|
||||
</div>''')
|
||||
i += 1
|
||||
return html_blocks, i
|
||||
|
||||
|
||||
def handle_header(line, is_anexo):
|
||||
"""Handle header lines (##, ###, ####).
|
||||
|
||||
Args:
|
||||
line: The header line
|
||||
is_anexo: Boolean indicating if processing Anexo section
|
||||
|
||||
Returns:
|
||||
HTML string for the header, or None if h1 (skip)
|
||||
"""
|
||||
if line.startswith('####'):
|
||||
text = line.lstrip('#').strip()
|
||||
return f'<h4 style="mso-list:none"><b><span lang=ES style="text-transform:none">{text}</span></b></h4>'
|
||||
elif line.startswith('###'):
|
||||
text = line.lstrip('#').strip()
|
||||
# Extract section number if present
|
||||
sec_match = re.match(r'^([\d\.]+)\s+', text)
|
||||
bookmark_html = ''
|
||||
if sec_match:
|
||||
sec_num = sec_match.group(1).rstrip('.')
|
||||
bookmark_id = f"_Ref_Sec{sec_num.replace('.', '_')}"
|
||||
bookmark_html = f'<a name="{bookmark_id}"></a>'
|
||||
# Disable auto-numbering for Anexo content or A.x headings
|
||||
if is_anexo or re.match(r'^A\.\d+', text):
|
||||
return f'{bookmark_html}<h3 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h3>'
|
||||
else:
|
||||
return f'{bookmark_html}<h3 style="mso-list:l22 level3 lfo18"><span lang=ES style="text-transform:none">{text}</span></h3>'
|
||||
elif line.startswith('##'):
|
||||
text = line.lstrip('#').strip()
|
||||
# Extract section number if present
|
||||
sec_match = re.match(r'^([\d\.]+)\s+', text)
|
||||
bookmark_html = ''
|
||||
if sec_match:
|
||||
sec_num = sec_match.group(1).rstrip('.')
|
||||
bookmark_id = f"_Ref_Sec{sec_num.replace('.', '_')}"
|
||||
bookmark_html = f'<a name="{bookmark_id}"></a>'
|
||||
# Disable auto-numbering for Anexo content or A.x headings
|
||||
if is_anexo or re.match(r'^A\.\d+', text):
|
||||
return f'{bookmark_html}<h2 style="mso-list:none"><span lang=ES style="text-transform:none">{text}</span></h2>'
|
||||
else:
|
||||
return f'{bookmark_html}<h2 style="mso-list:l22 level2 lfo18"><span lang=ES style="text-transform:none">{text}</span></h2>'
|
||||
elif line.startswith('#'):
|
||||
# Skip h1 - we keep the original
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def handle_table(lines, i, counters, is_anexo):
|
||||
"""Handle markdown table.
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to first table row)
|
||||
counters: Dict with table/figure counters
|
||||
is_anexo: Boolean indicating if processing Anexo section
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
|
||||
# Use Anexo-specific counter with "A" prefix, or global counter
|
||||
if is_anexo:
|
||||
counters['anexo_table'] += 1
|
||||
table_num = f"A{counters['anexo_table']}"
|
||||
else:
|
||||
counters['table'] += 1
|
||||
table_num = str(counters['table'])
|
||||
|
||||
# Check if previous line has table title
|
||||
table_title = None
|
||||
alt_title = None
|
||||
table_source = "Elaboración propia"
|
||||
|
||||
# Look back for table title
|
||||
for j in range(i - 1, max(0, i - 5), -1):
|
||||
prev_line = lines[j].strip()
|
||||
if prev_line.startswith('**Tabla') or prev_line.startswith('*Tabla'):
|
||||
table_title = re.sub(r'\*+', '', prev_line).strip()
|
||||
break
|
||||
elif prev_line.startswith('**') and prev_line.endswith(':**'):
|
||||
alt_title = re.sub(r'\*+', '', prev_line).rstrip(':').strip()
|
||||
elif prev_line and not prev_line.startswith('|'):
|
||||
break
|
||||
|
||||
# Parse table
|
||||
table_lines = []
|
||||
while i < len(lines) and '|' in lines[i]:
|
||||
if '---' not in lines[i]:
|
||||
table_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# Look ahead for source
|
||||
source_idx = i
|
||||
table_leyenda = None
|
||||
while source_idx < len(lines) and not lines[source_idx].strip():
|
||||
source_idx += 1
|
||||
if source_idx < len(lines) and is_source_line(lines[source_idx]):
|
||||
table_source = extract_source_from_line(lines[source_idx])
|
||||
i = source_idx + 1
|
||||
# Check for Leyenda after source
|
||||
leyenda_idx = i
|
||||
while leyenda_idx < len(lines) and not lines[leyenda_idx].strip():
|
||||
leyenda_idx += 1
|
||||
if leyenda_idx < len(lines) and is_leyenda_line(lines[leyenda_idx]):
|
||||
table_leyenda = extract_leyenda_from_line(lines[leyenda_idx])
|
||||
i = leyenda_idx + 1
|
||||
|
||||
# Add table title with MsoCaption class
|
||||
bookmark_id = f"_Ref_Tab{table_num}"
|
||||
if table_title:
|
||||
clean_title = re.sub(r'^Tabla\s+[A-Z]?\d+\.\s*', '', table_title).strip()
|
||||
elif alt_title:
|
||||
clean_title = alt_title
|
||||
else:
|
||||
clean_title = "Tabla de datos."
|
||||
|
||||
if is_anexo:
|
||||
tc_field = f'''<!--[if supportFields]><span style='mso-element:field-begin'></span> TC "Tabla {table_num}. {clean_title}" \\f t \\l 1 <span style='mso-element:field-end'></span><![endif]-->'''
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a>{tc_field}<p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla {table_num}.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
else:
|
||||
html_blocks.append(f'''<a name="{bookmark_id}"></a><p class=MsoCaption style="mso-pagination:keep-with-next"><b><span lang=ES style="font-size:12.0pt;line-height:150%">Tabla <!--[if supportFields]><span style='mso-element:field-begin'></span> SEQ Tabla \\* ARABIC <span style='mso-element:field-separator'></span><![endif]-->{table_num}<!--[if supportFields]><span style='mso-element:field-end'></span><![endif]-->.</span></b><span lang=ES style="font-size:12.0pt;line-height:150%"> </span><i><span lang=ES style="font-size:12.0pt;line-height:150%">{clean_title}</span></i></p>''')
|
||||
|
||||
# Build table HTML with APA style
|
||||
table_html = '<div align="center"><table class=MsoTableGrid border=1 cellspacing=0 cellpadding=0 align="center" style="border-collapse:collapse;margin-left:auto;margin-right:auto;mso-table-style-name:\'Plain Table 1\'">'
|
||||
for j, tline in enumerate(table_lines):
|
||||
cells = [c.strip() for c in tline.split('|')[1:-1]]
|
||||
table_html += '<tr>'
|
||||
for cell in cells:
|
||||
if j == 0:
|
||||
# Header row
|
||||
table_html += f'<td style="border-top:solid windowtext 1.0pt;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><b><span lang=ES>{md_to_html_para(cell)}</span></b></p></td>'
|
||||
elif j == len(table_lines) - 1:
|
||||
# Last row
|
||||
table_html += f'<td style="border-top:none;border-bottom:solid windowtext 1.0pt;border-left:none;border-right:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
else:
|
||||
# Middle rows
|
||||
table_html += f'<td style="border:none;padding:5px"><p class=MsoNormal style="margin:0;text-align:center"><span lang=ES>{md_to_html_para(cell)}</span></p></td>'
|
||||
table_html += '</tr>'
|
||||
table_html += '</table></div>'
|
||||
html_blocks.append(table_html)
|
||||
|
||||
# Add source
|
||||
source_html = md_to_html_para(table_source)
|
||||
if not table_source.endswith('.'):
|
||||
source_html += '.'
|
||||
html_blocks.append(f'<p class=Piedefoto-tabla style="margin-left:0cm"><span lang=ES>Fuente: {source_html}</span></p>')
|
||||
|
||||
# Add leyenda if present
|
||||
if table_leyenda:
|
||||
leyenda_html = md_to_html_para(table_leyenda)
|
||||
if not table_leyenda.endswith('.'):
|
||||
leyenda_html += '.'
|
||||
html_blocks.append(f'<p class=Piedefoto-tabla style="margin-left:0cm"><span lang=ES>Leyenda: {leyenda_html}</span></p>')
|
||||
|
||||
html_blocks.append('<p class=MsoNormal><span lang=ES><o:p> </o:p></span></p>')
|
||||
|
||||
return html_blocks, i
|
||||
|
||||
|
||||
def handle_blockquote(lines, i):
|
||||
"""Handle blockquote (regular or Nota callout).
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to > line)
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
line = lines[i]
|
||||
quote_text = line[1:].strip()
|
||||
i += 1
|
||||
while i < len(lines) and lines[i].startswith('>'):
|
||||
quote_text += ' ' + lines[i][1:].strip()
|
||||
i += 1
|
||||
|
||||
# Check if this is a Nota/Note callout
|
||||
if quote_text.startswith('**Nota:**') or quote_text.startswith('**Note:**'):
|
||||
if quote_text.startswith('**Nota:**'):
|
||||
label = 'Nota:'
|
||||
content = quote_text[9:].strip()
|
||||
else:
|
||||
label = 'Note:'
|
||||
content = quote_text[9:].strip()
|
||||
|
||||
# UNIR callout box style
|
||||
html_blocks.append(f'''<div style='mso-element:para-border-div;border-top:solid #0098CD 1.0pt;border-left:none;border-bottom:solid #0098CD 1.0pt;border-right:none;mso-border-top-alt:solid #0098CD .5pt;mso-border-bottom-alt:solid #0098CD .5pt;padding:4.0pt 0cm 4.0pt 0cm;background:#E6F4F9'>
|
||||
<p class=MsoNormal style='background:#E6F4F9;border:none;padding:0cm;margin:0cm'><b><span lang=ES>{label}</span></b><span lang=ES> {md_to_html_para(content)}</span></p>
|
||||
</div>''')
|
||||
else:
|
||||
# Regular blockquote
|
||||
html_blocks.append(f'<p class=MsoQuote><i><span lang=ES>{md_to_html_para(quote_text)}</span></i></p>')
|
||||
|
||||
return html_blocks, i
|
||||
|
||||
|
||||
def handle_bullet_list(lines, i):
|
||||
"""Handle bullet list (-, *, +).
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to first bullet)
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
bullet_items = []
|
||||
|
||||
while i < len(lines):
|
||||
# Skip blank lines
|
||||
while i < len(lines) and not lines[i].strip():
|
||||
i += 1
|
||||
# Check if next non-blank line is a bullet item
|
||||
if i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]):
|
||||
item_text = lines[i][2:].strip()
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
bullet_items.append(md_to_html_para(item_text))
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
|
||||
# Output with proper First/Middle/Last classes
|
||||
for idx, item in enumerate(bullet_items):
|
||||
if len(bullet_items) == 1:
|
||||
cls = 'MsoListParagraph'
|
||||
elif idx == 0:
|
||||
cls = 'MsoListParagraphCxSpFirst'
|
||||
elif idx == len(bullet_items) - 1:
|
||||
cls = 'MsoListParagraphCxSpLast'
|
||||
else:
|
||||
cls = 'MsoListParagraphCxSpMiddle'
|
||||
html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES style="font-family:Symbol">·</span><span lang=ES style="font-size:7pt"> </span><span lang=ES>{item}</span></p>')
|
||||
|
||||
return html_blocks, i
|
||||
|
||||
|
||||
def handle_numbered_list(lines, i):
|
||||
"""Handle numbered list (1., 2., etc).
|
||||
|
||||
Args:
|
||||
lines: List of markdown lines
|
||||
i: Current line index (pointing to first numbered item)
|
||||
|
||||
Returns:
|
||||
Tuple of (html_blocks, new_index)
|
||||
"""
|
||||
html_blocks = []
|
||||
numbered_items = []
|
||||
|
||||
while i < len(lines):
|
||||
# Skip blank lines
|
||||
while i < len(lines) and not lines[i].strip():
|
||||
i += 1
|
||||
# Check if next non-blank line is a numbered item
|
||||
if i < len(lines) and re.match(r'^\d+\.\s', lines[i]):
|
||||
item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip()
|
||||
i += 1
|
||||
# Collect any nested/indented content
|
||||
nested_lines = []
|
||||
while i < len(lines):
|
||||
current = lines[i]
|
||||
# Stop conditions
|
||||
if re.match(r'^\d+\.\s', current):
|
||||
break
|
||||
if current.startswith('#'):
|
||||
break
|
||||
if current.startswith('```'):
|
||||
break
|
||||
if current.startswith('**Tabla') or current.startswith('**Figura'):
|
||||
break
|
||||
if current.strip() and not current.startswith(' ') and not current.startswith('\t') and not current.startswith('-'):
|
||||
if nested_lines or not current.strip():
|
||||
break
|
||||
if current.strip():
|
||||
cleaned = current.strip()
|
||||
if cleaned.startswith('- '):
|
||||
cleaned = cleaned[2:]
|
||||
nested_lines.append(cleaned)
|
||||
i += 1
|
||||
# Combine item with nested content
|
||||
if nested_lines:
|
||||
item_text = item_text + '<br/>' + '<br/>'.join(nested_lines)
|
||||
item_text = convert_latex_formulas(item_text)
|
||||
numbered_items.append(md_to_html_para(item_text))
|
||||
else:
|
||||
break
|
||||
|
||||
# Output with proper First/Middle/Last classes
|
||||
for idx, item in enumerate(numbered_items):
|
||||
num = idx + 1
|
||||
if len(numbered_items) == 1:
|
||||
cls = 'MsoListParagraph'
|
||||
elif idx == 0:
|
||||
cls = 'MsoListParagraphCxSpFirst'
|
||||
elif idx == len(numbered_items) - 1:
|
||||
cls = 'MsoListParagraphCxSpLast'
|
||||
else:
|
||||
cls = 'MsoListParagraphCxSpMiddle'
|
||||
html_blocks.append(f'<p class={cls} style="margin-left:36pt;text-indent:-18pt"><span lang=ES>{num}.<span style="font-size:7pt"> </span>{item}</span></p>')
|
||||
|
||||
return html_blocks, i
|
||||
Reference in New Issue
Block a user