Files
MastersThesis/generate_mermaid_figures.py

114 lines
3.9 KiB
Python
Raw Normal View History

2025-12-15 23:54:41 +01:00
#!/usr/bin/env python3
"""Extract Mermaid diagrams from markdown files and convert to PNG images."""
import os
import re
import subprocess
import json
BASE_DIR = '/Users/sergio/Desktop/MastersThesis'
DOCS_DIR = os.path.join(BASE_DIR, 'docs')
OUTPUT_DIR = os.path.join(BASE_DIR, 'thesis_output/figures')
MMDC = os.path.join(BASE_DIR, 'node_modules/.bin/mmdc')
def extract_mermaid_diagrams():
"""Extract all mermaid diagrams from markdown files."""
diagrams = []
md_files = [
'02_contexto_estado_arte.md',
'03_objetivos_metodologia.md',
'04_desarrollo_especifico.md',
2025-12-16 00:25:16 +01:00
'07_anexo_a.md',
2025-12-15 23:54:41 +01:00
]
for md_file in md_files:
filepath = os.path.join(DOCS_DIR, md_file)
if not os.path.exists(filepath):
continue
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Find all mermaid blocks
pattern = r'```mermaid\n(.*?)```'
matches = re.findall(pattern, content, re.DOTALL)
for i, mermaid_code in enumerate(matches):
2025-12-16 00:31:27 +01:00
# Try to extract title from YAML front matter or inline title
title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_code)
if not title_match:
title_match = re.search(r'title\s+["\']?([^"\'"\n]+)["\']?', mermaid_code)
title = title_match.group(1).strip() if title_match else f"Diagrama {len(diagrams) + 1}"
2025-12-15 23:54:41 +01:00
diagrams.append({
'source': md_file,
'code': mermaid_code.strip(),
'title': title,
'index': len(diagrams) + 1
})
return diagrams
def convert_to_png(diagrams):
"""Convert mermaid diagrams to PNG using mmdc."""
os.makedirs(OUTPUT_DIR, exist_ok=True)
generated = []
for diagram in diagrams:
# Write mermaid code to temp file
temp_file = os.path.join(OUTPUT_DIR, f'temp_{diagram["index"]}.mmd')
output_file = os.path.join(OUTPUT_DIR, f'figura_{diagram["index"]}.png')
with open(temp_file, 'w', encoding='utf-8') as f:
f.write(diagram['code'])
2025-12-16 00:38:52 +01:00
# Convert using mmdc with moderate size for page fit
2025-12-15 23:54:41 +01:00
try:
result = subprocess.run(
2025-12-16 00:38:52 +01:00
[MMDC, '-i', temp_file, '-o', output_file, '-b', 'white', '-w', '800', '-s', '1.5'],
2025-12-15 23:54:41 +01:00
capture_output=True,
text=True,
timeout=60
)
if os.path.exists(output_file):
print(f"✓ Generated: figura_{diagram['index']}.png - {diagram['title']}")
generated.append({
'file': f'figura_{diagram["index"]}.png',
'title': diagram['title'],
'index': diagram['index']
})
else:
print(f"✗ Failed: figura_{diagram['index']}.png - {result.stderr}")
except subprocess.TimeoutExpired:
print(f"✗ Timeout: figura_{diagram['index']}.png")
except Exception as e:
print(f"✗ Error: figura_{diagram['index']}.png - {e}")
# Clean up temp file
if os.path.exists(temp_file):
os.remove(temp_file)
return generated
def main():
print("Extracting Mermaid diagrams from markdown files...")
diagrams = extract_mermaid_diagrams()
print(f"Found {len(diagrams)} diagrams\n")
print("Converting to PNG images...")
generated = convert_to_png(diagrams)
print(f"\n✓ Generated {len(generated)} figures in {OUTPUT_DIR}")
# Save manifest for apply_content.py to use
manifest_file = os.path.join(OUTPUT_DIR, 'figures_manifest.json')
with open(manifest_file, 'w', encoding='utf-8') as f:
json.dump(generated, f, indent=2, ensure_ascii=False)
print(f"✓ Saved manifest to {manifest_file}")
if __name__ == '__main__':
main()