#!/usr/bin/env python3
"""
Program to convert a single markdown file to to multiple markdown
files used by [mdbook](https://rust-lang.github.io/mdBook/) program.
Test this program simply with uv:
: uv run https://forge.sekun.eu/utils/doc/tip/prog/mdsplit.py
"""
# /// script
# dependencies = [
# "python-slugify"
# ]
# ///
import argparse
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from slugify import slugify
parser = argparse.ArgumentParser(
description="Convert an single markdown file to mdbook"
)
parser.add_argument("mdfile", help="The org file", type=Path)
parser.add_argument("mdbook", help="mdbook root diretory", type=Path)
parser.add_argument(
"-d",
"--max-depth",
help="Max depth for headings",
type=int,
default=1,
dest="depth",
)
args = parser.parse_args()
if not args.mdfile.is_file():
raise ValueError("`mdfile` must be an existing markdown file")
if not args.mdbook.is_dir():
raise ValueError(
"`mdbook` must be a root mdbook directory initialiezd with `mdbook init`"
)
if args.depth < 1:
raise ValueError("`depth` must be >= 1")
@dataclass
class ExtraTitle:
level: int
title: str
class DType(Enum):
CODE = 1
BODY = 2
TITLE = 3
@dataclass
class Content:
content: str
dtype: DType
extra: None | ExtraTitle = None
@dataclass
class Config:
inside_code: bool
OUTPUT_DIR: Path = Path("book/src")
MAX_LEVEL: int = args.depth
with open(args.mdfile, "r") as f:
print(args.mdfile)
mdlines: list[str] = f.readlines()
parsed_lines: list[Content] = []
config: Config = Config(inside_code=False)
def if_parse_begin_code(
line: str,
config: Config,
parsed_lines: list[Content],
):
if line.startswith("```"):
config.inside_code = True
parsed_lines.append(Content(content=line, dtype=DType.CODE))
return True
return False
def if_parse_title(line: str, config: dict[str, bool], parsed_lines: list[Content]):
if line.startswith("#"):
title = " ".join(line.split(" ")[1:])
level = len(line.split(" ")[0])
parsed_lines.append(
Content(
content=line,
dtype=DType.TITLE,
extra=ExtraTitle(level=level, title=title),
)
)
return True
return False
def parse_code(line, config, parse_lines):
parsed_lines.append(Content(content=line, dtype=DType.CODE))
return True
def parse_content(line, config, parse_lines):
parsed_lines.append(Content(content=line, dtype=DType.BODY))
return True
def if_parse_end_code(line, config, parse_lines):
if line.startswith("```"):
parsed_lines.append(Content(content=line, dtype=DType.CODE))
config.inside_code = False
return True
return False
def do_parse_nocode(line, config, parse_lines):
return any(
f(line, config, parse_lines)
for f in (
if_parse_begin_code,
if_parse_title,
parse_content,
)
)
for line in mdlines:
if not config.inside_code:
do_parse_nocode(line, config, parsed_lines)
else:
any(f(line, config, parsed_lines) for f in (if_parse_end_code, parse_code))
filepath_suffix = "_prelude.md"
filepath = OUTPUT_DIR / ("0" + filepath_suffix)
num_titles = defaultdict(int)
summaries = []
for parsed_line in parsed_lines:
if parsed_line.dtype == DType.TITLE and parsed_line.extra.level <= MAX_LEVEL:
level = parsed_line.extra.level
num_titles[level] += 1
num_title = num_titles[level]
title = parsed_line.extra.title.strip()
slug_title = slugify(title)
filepath_suffix = f"_{slug_title}.md"
# reset key of num_titles if key > level
keys = {k for k in num_titles.keys() if k > level}
for key in keys:
del num_titles[key]
filepath_prefix = ".".join(
f"{i:02d}" for _, i in sorted(num_titles.items(), key=lambda x: x[0])
)
basename = filepath_prefix + filepath_suffix
filepath = OUTPUT_DIR / basename
summaries.append((level, title, basename))
with open(filepath, "a") as f:
f.write(parsed_line.content)
else:
with open(filepath, "a") as f:
f.write(parsed_line.content)
with open(OUTPUT_DIR / "SUMMARY.md", "w") as f:
for level, title, basename in summaries:
print(level * " ", f"- [{title}](./{basename})", file=f)