#!/usr/bin/env python3 """Content block handlers for markdown to HTML conversion.""" import os import re from PIL import Image from markdown_utils import ( md_to_html_para, convert_latex_formulas, is_source_line, extract_source_from_line, is_leyenda_line, extract_leyenda_from_line, ) # Base directory for resolving paths BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def handle_mermaid_diagram(lines, i, counters, is_anexo): """Handle mermaid diagram block, converting to figure with image. Args: lines: List of markdown lines i: Current line index (pointing to ```mermaid) counters: Dict with 'table', 'figure', 'anexo_table', 'anexo_figure', 'global_figure' is_anexo: Boolean indicating if processing Anexo section Returns: Tuple of (html_blocks, new_index) """ html_blocks = [] # Always increment global index for sequential filenames counters['global_figure'] += 1 # Use Anexo-specific counter with "A" prefix for display, or global counter if is_anexo: counters['anexo_figure'] += 1 fig_num = f"A{counters['anexo_figure']}" else: counters['figure'] += 1 fig_num = str(counters['figure']) mermaid_lines = [] i += 1 while i < len(lines) and not lines[i].strip() == '```': mermaid_lines.append(lines[i]) i += 1 # Try to extract title from mermaid content (YAML format) mermaid_content = '\n'.join(mermaid_lines) # Match title with quotes: title: "Something" or title: 'Something' title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_content) if not title_match: # Match title without quotes: title: Something title_match = re.search(r'title:\s*([^"\'\n]+)', mermaid_content) if title_match: fig_title = title_match.group(1).strip() else: fig_title = f"Diagrama {fig_num}" # Use global sequential index for filename fig_file = f'figures/figura_{counters["global_figure"]}.png' fig_path = os.path.join(BASE_DIR, 'thesis_output', fig_file) # Create figure with MsoCaption class and proper Word SEQ field bookmark_id = f"_Ref_Fig{fig_num}" if is_anexo: tc_field = f'''''' html_blocks.append(f'''{tc_field}
Figura {fig_num}. {fig_title}
''') else: html_blocks.append(f'''Figura {fig_num}. {fig_title}
''') if os.path.exists(fig_path): # Read actual image dimensions and scale to fit page width img = Image.open(fig_path) orig_w, orig_h = img.size # Scale to fit max width of 566px (15cm at 96dpi) while preserving aspect ratio max_width = 566 if orig_w > max_width: scale = max_width / orig_w new_w = max_width new_h = int(orig_h * scale) else: new_w, new_h = orig_w, orig_h # Convert to pt (1px at 96dpi = 0.75pt) w_pt = new_w * 0.75 h_pt = new_h * 0.75 html_blocks.append(f'''[Insertar diagrama Mermaid aquí]
''') # Check if next non-empty line has custom Fuente custom_source = None fig_leyenda = None lookahead = i + 1 while lookahead < len(lines) and not lines[lookahead].strip(): lookahead += 1 if lookahead < len(lines): next_line = lines[lookahead].strip() if is_source_line(next_line): custom_source = extract_source_from_line(next_line) if custom_source and not custom_source.endswith('.'): custom_source += '.' i = lookahead # Check for Leyenda after source leyenda_idx = i + 1 while leyenda_idx < len(lines) and not lines[leyenda_idx].strip(): leyenda_idx += 1 if leyenda_idx < len(lines) and is_leyenda_line(lines[leyenda_idx]): fig_leyenda = extract_leyenda_from_line(lines[leyenda_idx]) i = leyenda_idx if custom_source: source_html = md_to_html_para(custom_source) html_blocks.append(f'''Fuente: {source_html}
''') else: html_blocks.append(f'''Fuente: Elaboración propia.
''') if fig_leyenda: leyenda_html = md_to_html_para(fig_leyenda) if not fig_leyenda.endswith('.'): leyenda_html += '.' html_blocks.append(f'''Leyenda: {leyenda_html}
''') html_blocks.append('{code}
Tabla {table_num}. {clean_title}
''') else: html_blocks.append(f'''Tabla {table_num}. {clean_title}
''') # Build table HTML with APA style table_html = '{md_to_html_para(cell)} | '
elif j == len(table_lines) - 1:
# Last row
table_html += f'{md_to_html_para(cell)} | '
else:
# Middle rows
table_html += f'{md_to_html_para(cell)} | '
table_html += '
Fuente: {source_html}
') # Add leyenda if present if table_leyenda: leyenda_html = md_to_html_para(table_leyenda) if not table_leyenda.endswith('.'): leyenda_html += '.' html_blocks.append(f'Leyenda: {leyenda_html}
') html_blocks.append('{label} {md_to_html_para(content)}
{md_to_html_para(quote_text)}
') return html_blocks, i def handle_bullet_list(lines, i): """Handle bullet list (-, *, +). Args: lines: List of markdown lines i: Current line index (pointing to first bullet) Returns: Tuple of (html_blocks, new_index) """ html_blocks = [] bullet_items = [] while i < len(lines): # Skip blank lines while i < len(lines) and not lines[i].strip(): i += 1 # Check if next non-blank line is a bullet item if i < len(lines) and re.match(r'^[\-\*\+]\s', lines[i]): item_text = lines[i][2:].strip() item_text = convert_latex_formulas(item_text) bullet_items.append(md_to_html_para(item_text)) i += 1 else: break # Output with proper First/Middle/Last classes for idx, item in enumerate(bullet_items): if len(bullet_items) == 1: cls = 'MsoListParagraph' elif idx == 0: cls = 'MsoListParagraphCxSpFirst' elif idx == len(bullet_items) - 1: cls = 'MsoListParagraphCxSpLast' else: cls = 'MsoListParagraphCxSpMiddle' html_blocks.append(f'· {item}
') return html_blocks, i def handle_numbered_list(lines, i): """Handle numbered list (1., 2., etc) with nested bullet sub-lists. Args: lines: List of markdown lines i: Current line index (pointing to first numbered item) Returns: Tuple of (html_blocks, new_index) """ html_blocks = [] # Each item is a tuple: (main_text, nested_bullets) # where nested_bullets is a list of bullet point strings numbered_items = [] while i < len(lines): # Skip blank lines while i < len(lines) and not lines[i].strip(): i += 1 # Check if next non-blank line is a numbered item if i < len(lines) and re.match(r'^\d+\.\s', lines[i]): item_text = re.sub(r'^\d+\.\s*', '', lines[i]).strip() i += 1 # Collect any nested/indented content (bullet points) nested_bullets = [] while i < len(lines): current = lines[i] # Stop conditions if re.match(r'^\d+\.\s', current): break if current.startswith('#'): break if current.startswith('```'): break if current.startswith('**Tabla') or current.startswith('**Figura'): break # Check for non-indented, non-bullet content (end of nested) stripped = current.strip() if stripped and not current.startswith(' ') and not current.startswith('\t') and not stripped.startswith('-'): break # Collect indented bullet points if stripped.startswith('- '): bullet_text = stripped[2:].strip() nested_bullets.append(bullet_text) i += 1 item_text = convert_latex_formulas(item_text) numbered_items.append((md_to_html_para(item_text), nested_bullets)) else: break # Output numbered items with nested bullet lists for idx, (item_text, nested_bullets) in enumerate(numbered_items): num = idx + 1 if len(numbered_items) == 1: cls = 'MsoListParagraph' elif idx == 0: cls = 'MsoListParagraphCxSpFirst' elif idx == len(numbered_items) - 1 and not nested_bullets: cls = 'MsoListParagraphCxSpLast' else: cls = 'MsoListParagraphCxSpMiddle' # Main numbered item html_blocks.append(f'{num}. {item_text}
') # Nested bullet sub-list (indented further) if nested_bullets: for bullet_idx, bullet_text in enumerate(nested_bullets): bullet_text = convert_latex_formulas(bullet_text) bullet_html = md_to_html_para(bullet_text) # Determine class for sub-list items if bullet_idx == 0: sub_cls = 'MsoListParagraphCxSpFirst' elif bullet_idx == len(nested_bullets) - 1: # If this is the last bullet of the last numbered item, use Last if idx == len(numbered_items) - 1: sub_cls = 'MsoListParagraphCxSpLast' else: sub_cls = 'MsoListParagraphCxSpLast' else: sub_cls = 'MsoListParagraphCxSpMiddle' # Nested bullets at 54pt margin (36pt + 18pt) html_blocks.append(f'· {bullet_html}
') return html_blocks, i