#!/usr/bin/env python3 """Extract Mermaid diagrams from markdown files and convert to PNG images.""" import os import re import subprocess import json BASE_DIR = '/Users/sergio/Desktop/MastersThesis' DOCS_DIR = os.path.join(BASE_DIR, 'docs') OUTPUT_DIR = os.path.join(BASE_DIR, 'thesis_output/figures') MMDC = os.path.join(BASE_DIR, 'node_modules/.bin/mmdc') def extract_mermaid_diagrams(): """Extract all mermaid diagrams from markdown files.""" diagrams = [] md_files = [ '02_contexto_estado_arte.md', '03_objetivos_metodologia.md', '04_desarrollo_especifico.md', '07_anexo_a.md', ] for md_file in md_files: filepath = os.path.join(DOCS_DIR, md_file) if not os.path.exists(filepath): continue with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Find all mermaid blocks pattern = r'```mermaid\n(.*?)```' matches = re.findall(pattern, content, re.DOTALL) for i, mermaid_code in enumerate(matches): # Try to extract title from YAML front matter or inline title title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_code) if not title_match: title_match = re.search(r'title\s+["\']?([^"\'"\n]+)["\']?', mermaid_code) title = title_match.group(1).strip() if title_match else f"Diagrama {len(diagrams) + 1}" diagrams.append({ 'source': md_file, 'code': mermaid_code.strip(), 'title': title, 'index': len(diagrams) + 1 }) return diagrams def convert_to_png(diagrams): """Convert mermaid diagrams to PNG using mmdc.""" os.makedirs(OUTPUT_DIR, exist_ok=True) generated = [] for diagram in diagrams: # Write mermaid code to temp file temp_file = os.path.join(OUTPUT_DIR, f'temp_{diagram["index"]}.mmd') output_file = os.path.join(OUTPUT_DIR, f'figura_{diagram["index"]}.png') with open(temp_file, 'w', encoding='utf-8') as f: f.write(diagram['code']) # Convert using mmdc with moderate size for page fit try: result = subprocess.run( [MMDC, '-i', temp_file, '-o', output_file, '-b', 'white', '-w', '800', '-s', '1.5'], capture_output=True, text=True, timeout=60 ) if os.path.exists(output_file): print(f"✓ Generated: figura_{diagram['index']}.png - {diagram['title']}") generated.append({ 'file': f'figura_{diagram["index"]}.png', 'title': diagram['title'], 'index': diagram['index'] }) else: print(f"✗ Failed: figura_{diagram['index']}.png - {result.stderr}") except subprocess.TimeoutExpired: print(f"✗ Timeout: figura_{diagram['index']}.png") except Exception as e: print(f"✗ Error: figura_{diagram['index']}.png - {e}") # Clean up temp file if os.path.exists(temp_file): os.remove(temp_file) return generated def main(): print("Extracting Mermaid diagrams from markdown files...") diagrams = extract_mermaid_diagrams() print(f"Found {len(diagrams)} diagrams\n") print("Converting to PNG images...") generated = convert_to_png(diagrams) print(f"\n✓ Generated {len(generated)} figures in {OUTPUT_DIR}") # Save manifest for apply_content.py to use manifest_file = os.path.join(OUTPUT_DIR, 'figures_manifest.json') with open(manifest_file, 'w', encoding='utf-8') as f: json.dump(generated, f, indent=2, ensure_ascii=False) print(f"✓ Saved manifest to {manifest_file}") if __name__ == '__main__': main()