refactor nerd font codegen script (#7819)

Mainly added type annotations, cleaned up weird AI slop and used more idiomatic stuff. This requires Python 3.12 to run (I can downgrade it if need be).
2025-07-14 15:56:13 +03:00 · 2025-07-05 21:36:04 -07:00
parent 82cad3cf33 2fca0477bc
commit 753f3ea72b
2 changed files with 113 additions and 119 deletions
--- a/src/font/nerd_font_attributes.zig
+++ b/src/font/nerd_font_attributes.zig
@ -1,4 +1,4 @@
-//! This is a generate file, produced by nerd_font_codegen.py
+//! This is a generated file, produced by nerd_font_codegen.py
 //! DO NOT EDIT BY HAND!
 //!
 //! This file provides info extracted from the nerd fonts patcher script,
--- a/src/font/nerd_font_codegen.py
+++ b/src/font/nerd_font_codegen.py
@ -1,102 +1,122 @@
 """
-This file is mostly vibe coded because I don't like Python. It extracts the
+This file extracts the patch sets from the nerd fonts font patcher file in order to
-patch sets from the nerd fonts font patcher file in order to extract scaling
+extract scaling rules and attributes for different codepoint ranges which it then
-rules and attributes for different codepoint ranges which it then codegens
+codegens in to a Zig file with a function that switches over codepoints and returns the
-in to a Zig file with a function that switches over codepoints and returns
+attributes and scaling rules.
 the attributes and scaling rules.
-This does include an `eval` call! This is spooky, but we trust
+This does include an `eval` call! This is spooky, but we trust the nerd fonts code to
-the nerd fonts code to be safe and not malicious or anything.
+be safe and not malicious or anything.
 """
 import ast
 import math
-from pathlib import Path
+import sys
 from collections import defaultdict
 from contextlib import suppress
 from types import SimpleNamespace
 from typing import Literal, TypedDict, cast
 type PatchSetAttributes = dict[Literal["default"] | int, PatchSetAttributeEntry]
 type AttributeHash = tuple[str | None, str | None, str, float, float, float]
 type ResolvedSymbol = PatchSetAttributes | PatchSetScaleRules | int | None
 class PatchSetScaleRules(TypedDict):
    ShiftMode: str
    ScaleGroups: list[list[int] | range]
 class PatchSetAttributeEntry(TypedDict):
    align: str
    valign: str
    stretch: str
    params: dict[str, float | bool]
 class PatchSet(TypedDict):
    SymStart: int
    SymEnd: int
    SrcStart: int | None
    ScaleRules: PatchSetScaleRules | None
    Attributes: PatchSetAttributes
 class PatchSetExtractor(ast.NodeVisitor):
-    def __init__(self):
+    def __init__(self) -> None:
-        self.symbol_table = {}
+        self.symbol_table: dict[str, ast.expr] = {}
-        self.patch_set_values = []
+        self.patch_set_values: list[PatchSet] = []
-    def visit_ClassDef(self, node):
+    def visit_ClassDef(self, node: ast.ClassDef) -> None:
-        if node.name == "font_patcher":
+        if node.name != "font_patcher":
            return
        for item in node.body:
            if isinstance(item, ast.FunctionDef) and item.name == "setup_patch_set":
                self.visit_setup_patch_set(item)
-    def visit_setup_patch_set(self, node):
+    def visit_setup_patch_set(self, node: ast.FunctionDef) -> None:
        # First pass: gather variable assignments
        for stmt in node.body:
-            if isinstance(stmt, ast.Assign):
+            match stmt:
                case ast.Assign(targets=[ast.Name(id=symbol)]):
                    # Store simple variable assignments in the symbol table
-                if len(stmt.targets) == 1 and isinstance(stmt.targets[0], ast.Name):
+                    self.symbol_table[symbol] = stmt.value
                    var_name = stmt.targets[0].id
                    self.symbol_table[var_name] = stmt.value
        # Second pass: process self.patch_set
        for stmt in node.body:
-            if isinstance(stmt, ast.Assign):
+            if not isinstance(stmt, ast.Assign):
                continue
            for target in stmt.targets:
-                    if isinstance(target, ast.Attribute) and target.attr == "patch_set":
+                if (
-                        if isinstance(stmt.value, ast.List):
+                    isinstance(target, ast.Attribute)
                    and target.attr == "patch_set"
                    and isinstance(stmt.value, ast.List)
                ):
                    for elt in stmt.value.elts:
                        if isinstance(elt, ast.Dict):
                            self.process_patch_entry(elt)
-    def resolve_symbol(self, node):
+    def resolve_symbol(self, node: ast.expr) -> ResolvedSymbol:
        """Resolve named variables to their actual values from the symbol table."""
        if isinstance(node, ast.Name) and node.id in self.symbol_table:
            return self.safe_literal_eval(self.symbol_table[node.id])
        return self.safe_literal_eval(node)
-    def safe_literal_eval(self, node):
+    def safe_literal_eval(self, node: ast.expr) -> ResolvedSymbol:
        """Try to evaluate or stringify an AST node."""
        try:
            return ast.literal_eval(node)
-        except Exception:
+        except ValueError:
            # Spooky eval! But we trust nerd fonts to be safe...
            if hasattr(ast, "unparse"):
                return eval(
-                    ast.unparse(node), {"box_keep": True}, {"self": SpoofSelf()}
+                    ast.unparse(node),
                    {"box_keep": True},
                    {"self": SimpleNamespace(args=SimpleNamespace(careful=True))},
                )
-            else:
+            msg = f"<cannot eval: {type(node).__name__}>"
-                return f"<cannot eval: {type(node).__name__}>"
+            raise ValueError(msg) from None
-    def process_patch_entry(self, dict_node):
+    def process_patch_entry(self, dict_node: ast.Dict) -> None:
        entry = {}
        disallowed_key_nodes = frozenset({"Enabled", "Name", "Filename", "Exact"})
        for key_node, value_node in zip(dict_node.keys, dict_node.values):
-            if isinstance(key_node, ast.Constant) and key_node.value in (
+            if (
-                "Enabled",
+                isinstance(key_node, ast.Constant)
-                "Name",
+                and key_node.value not in disallowed_key_nodes
                "Filename",
                "Exact",
            ):
-                continue
+                key = ast.literal_eval(cast("ast.Constant", key_node))
-            key = ast.literal_eval(key_node)
+                entry[key] = self.resolve_symbol(value_node)
-            value = self.resolve_symbol(value_node)
+        self.patch_set_values.append(cast("PatchSet", entry))
            entry[key] = value
        self.patch_set_values.append(entry)
-def extract_patch_set_values(source_code):
+def extract_patch_set_values(source_code: str) -> list[PatchSet]:
    tree = ast.parse(source_code)
    extractor = PatchSetExtractor()
    extractor.visit(tree)
    return extractor.patch_set_values
-# We have to spoof `self` and `self.args` for the eval.
+def parse_alignment(val: str) -> str | None:
 class SpoofArgs:
    careful = True
 class SpoofSelf:
    args = SpoofArgs()
 def parse_alignment(val):
    return {
        "l": ".start",
        "r": ".end",
@ -105,28 +125,24 @@ def parse_alignment(val):
    }.get(val, ".none")
-def get_param(d, key, default):
+def attr_key(attr: PatchSetAttributeEntry) -> AttributeHash:
    return float(d.get(key, default))
 def attr_key(attr):
    """Convert attributes to a hashable key for grouping."""
-    stretch = attr.get("stretch", "")
+    params = attr.get("params", {})
    return (
        parse_alignment(attr.get("align", "")),
        parse_alignment(attr.get("valign", "")),
-        stretch,
+        attr.get("stretch", ""),
-        float(attr.get("params", {}).get("overlap", 0.0)),
+        float(params.get("overlap", 0.0)),
-        float(attr.get("params", {}).get("xy-ratio", -1.0)),
+        float(params.get("xy-ratio", -1.0)),
-        float(attr.get("params", {}).get("ypadding", 0.0)),
+        float(params.get("ypadding", 0.0)),
    )
-def coalesce_codepoints_to_ranges(codepoints):
+def coalesce_codepoints_to_ranges(codepoints: list[int]) -> list[tuple[int, int]]:
    """Convert a sorted list of integers to a list of single values and ranges."""
-    ranges = []
+    ranges: list[tuple[int, int]] = []
    cp_iter = iter(sorted(codepoints))
-    try:
+    with suppress(StopIteration):
        start = prev = next(cp_iter)
        for cp in cp_iter:
            if cp == prev + 1:
@ -135,52 +151,49 @@ def coalesce_codepoints_to_ranges(codepoints):
                ranges.append((start, prev))
                start = prev = cp
        ranges.append((start, prev))
    except StopIteration:
        pass
    return ranges
-def emit_zig_entry_multikey(codepoints, attr):
+def emit_zig_entry_multikey(codepoints: list[int], attr: PatchSetAttributeEntry) -> str:
    align = parse_alignment(attr.get("align", ""))
    valign = parse_alignment(attr.get("valign", ""))
    stretch = attr.get("stretch", "")
    params = attr.get("params", {})
-    overlap = get_param(params, "overlap", 0.0)
+    overlap = params.get("overlap", 0.0)
-    xy_ratio = get_param(params, "xy-ratio", -1.0)
+    xy_ratio = params.get("xy-ratio", -1.0)
-    y_padding = get_param(params, "ypadding", 0.0)
+    y_padding = params.get("ypadding", 0.0)
    ranges = coalesce_codepoints_to_ranges(codepoints)
    keys = "\n".join(
-        f"        0x{start:x}...0x{end:x}," if start != end else f"        0x{start:x},"
+        f"        {start:#x}...{end:#x}," if start != end else f"        {start:#x},"
        for start, end in ranges
    )
-    s = f"""{keys}
+    s = f"{keys}\n        => .{{\n"
        => .{{\n"""
    # These translations don't quite capture the way
    # the actual patcher does scaling, but they're a
    # good enough compromise.
-    if ("xy" in stretch):
+    if "xy" in stretch:
        s += "            .size_horizontal = .stretch,\n"
        s += "            .size_vertical = .stretch,\n"
-    elif ("!" in stretch):
+    elif "!" in stretch:
        s += "            .size_horizontal = .cover,\n"
        s += "            .size_vertical = .fit,\n"
-    elif ("^" in stretch):
+    elif "^" in stretch:
        s += "            .size_horizontal = .cover,\n"
        s += "            .size_vertical = .cover,\n"
    else:
        s += "            .size_horizontal = .fit,\n"
        s += "            .size_vertical = .fit,\n"
-    if (align is not None):
+    if align is not None:
        s += f"            .align_horizontal = {align},\n"
-    if (valign is not None):
+    if valign is not None:
        s += f"            .align_vertical = {valign},\n"
-    if (overlap != 0.0):
+    if overlap:
        pad = -overlap
        s += f"            .pad_left = {pad},\n"
        s += f"            .pad_right = {pad},\n"
@ -188,35 +201,33 @@ def emit_zig_entry_multikey(codepoints, attr):
        s += f"            .pad_top = {v_pad},\n"
        s += f"            .pad_bottom = {v_pad},\n"
-    if (xy_ratio > 0):
+    if xy_ratio > 0:
        s += f"            .max_xy_ratio = {xy_ratio},\n"
    s += "        },"
    return s
-def generate_zig_switch_arms(patch_set):
+
-    entries = {}
+def generate_zig_switch_arms(patch_sets: list[PatchSet]) -> str:
-    for entry in patch_set:
+    entries: dict[int, PatchSetAttributeEntry] = {}
    for entry in patch_sets:
        attributes = entry["Attributes"]
        for cp in range(entry["SymStart"], entry["SymEnd"] + 1):
            entries[cp] = attributes["default"]
-        for k, v in attributes.items():
+        entries |= {k: v for k, v in attributes.items() if isinstance(k, int)}
            if isinstance(k, int):
                entries[k] = v
    del entries[0]
    # Group codepoints by attribute key
-    grouped = defaultdict(list)
+    grouped = defaultdict[AttributeHash, list[int]](list)
    for cp, attr in entries.items():
        grouped[attr_key(attr)].append(cp)
    # Emit zig switch arms
-    result = []
+    result: list[str] = []
-    for _, codepoints in sorted(grouped.items(), key=lambda x: x[1]):
+    for codepoints in sorted(grouped.values()):
        # Use one of the attrs in the group to emit the value
        attr = entries[codepoints[0]]
        result.append(emit_zig_entry_multikey(codepoints, attr))
@ -225,23 +236,9 @@ def generate_zig_switch_arms(patch_set):
 if __name__ == "__main__":
-    path = (
+    source = sys.stdin.read()
        Path(__file__).resolve().parent
        / ".."
        / ".."
        / "vendor"
        / "nerd-fonts"
        / "font-patcher.py"
    )
    with open(path, "r", encoding="utf-8") as f:
        source = f.read()
    patch_set = extract_patch_set_values(source)
-
+    print("""//! This is a generated file, produced by nerd_font_codegen.py
    out_path = Path(__file__).resolve().parent / "nerd_font_attributes.zig"
    with open(out_path, "w", encoding="utf-8") as f:
        f.write("""//! This is a generate file, produced by nerd_font_codegen.py
 //! DO NOT EDIT BY HAND!
 //!
 //! This file provides info extracted from the nerd fonts patcher script,
@ -251,9 +248,6 @@ const Constraint = @import("face.zig").RenderOptions.Constraint;
 /// Get the a constraints for the provided codepoint.
 pub fn getConstraint(cp: u21) Constraint {
-    return switch (cp) {
+    return switch (cp) {""")
-""")
+    print(generate_zig_switch_arms(patch_set))
-        f.write(generate_zig_switch_arms(patch_set))
+    print("        else => .none,\n    };\n}")
        f.write("\n")
        f.write("        else => .none,\n    };\n}\n")