Files
MastersThesis/generate_mermaid_figures.py
sergio f5d1032e41
Some checks failed
build_docker / build_cpu (pull_request) Successful in 5m16s
build_docker / build_easyocr_gpu (pull_request) Has been cancelled
build_docker / build_doctr (pull_request) Has been cancelled
build_docker / build_doctr_gpu (pull_request) Has been cancelled
build_docker / build_raytune (pull_request) Has been cancelled
build_docker / build_gpu (pull_request) Has been cancelled
build_docker / build_easyocr (pull_request) Has been cancelled
build_docker / essential (pull_request) Successful in 1s
headers
2026-01-20 11:49:51 +01:00

116 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""Extract Mermaid diagrams from markdown files and convert to PNG images."""
import os
import re
import subprocess
import json
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DOCS_DIR = os.path.join(BASE_DIR, 'docs')
OUTPUT_DIR = os.path.join(BASE_DIR, 'thesis_output/figures')
MMDC = os.path.join(BASE_DIR, 'node_modules/.bin/mmdc')
def extract_mermaid_diagrams():
"""Extract all mermaid diagrams from markdown files."""
diagrams = []
md_files = [
'02_contexto_estado_arte.md',
'03_objetivos_metodologia.md',
'04_desarrollo_especifico.md',
'07_anexo_a.md',
]
for md_file in md_files:
filepath = os.path.join(DOCS_DIR, md_file)
if not os.path.exists(filepath):
continue
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Find all mermaid blocks
pattern = r'```mermaid\n(.*?)```'
matches = re.findall(pattern, content, re.DOTALL)
for i, mermaid_code in enumerate(matches):
# Try to extract title from YAML front matter
# Match title with quotes: title: "Something" or title: 'Something'
title_match = re.search(r'title:\s*["\']([^"\']+)["\']', mermaid_code)
if not title_match:
# Match title without quotes: title: Something
title_match = re.search(r'title:\s*([^"\'\n]+)', mermaid_code)
title = title_match.group(1).strip() if title_match else f"Diagrama {len(diagrams) + 1}"
diagrams.append({
'source': md_file,
'code': mermaid_code.strip(),
'title': title,
'index': len(diagrams) + 1
})
return diagrams
def convert_to_png(diagrams):
"""Convert mermaid diagrams to PNG using mmdc."""
os.makedirs(OUTPUT_DIR, exist_ok=True)
generated = []
for diagram in diagrams:
# Write mermaid code to temp file
temp_file = os.path.join(OUTPUT_DIR, f'temp_{diagram["index"]}.mmd')
output_file = os.path.join(OUTPUT_DIR, f'figura_{diagram["index"]}.png')
with open(temp_file, 'w', encoding='utf-8') as f:
f.write(diagram['code'])
# Convert using mmdc with high resolution for clarity
try:
result = subprocess.run(
[MMDC, '-i', temp_file, '-o', output_file, '-b', 'white', '-w', '1600', '-s', '3'],
capture_output=True,
text=True,
timeout=60
)
if os.path.exists(output_file):
print(f"✓ Generated: figura_{diagram['index']}.png - {diagram['title']}")
generated.append({
'file': f'figura_{diagram["index"]}.png',
'title': diagram['title'],
'index': diagram['index']
})
else:
print(f"✗ Failed: figura_{diagram['index']}.png - {result.stderr}")
except subprocess.TimeoutExpired:
print(f"✗ Timeout: figura_{diagram['index']}.png")
except Exception as e:
print(f"✗ Error: figura_{diagram['index']}.png - {e}")
# Clean up temp file
if os.path.exists(temp_file):
os.remove(temp_file)
return generated
def main():
print("Extracting Mermaid diagrams from markdown files...")
diagrams = extract_mermaid_diagrams()
print(f"Found {len(diagrams)} diagrams\n")
print("Converting to PNG images...")
generated = convert_to_png(diagrams)
print(f"\n✓ Generated {len(generated)} figures in {OUTPUT_DIR}")
# Save manifest for apply_content.py to use
manifest_file = os.path.join(OUTPUT_DIR, 'figures_manifest.json')
with open(manifest_file, 'w', encoding='utf-8') as f:
json.dump(generated, f, indent=2, ensure_ascii=False)
print(f"✓ Saved manifest to {manifest_file}")
if __name__ == '__main__':
main()