refactor nerd font codegen script (#7819)

Mainly added type annotations, cleaned up weird AI slop and used more
idiomatic stuff.
This requires Python 3.12 to run (I can downgrade it if need be).
This commit is contained in:
Mitchell Hashimoto
2025-07-05 21:36:04 -07:00
committed by GitHub
2 changed files with 113 additions and 119 deletions

View File

@ -1,4 +1,4 @@
//! This is a generate file, produced by nerd_font_codegen.py //! This is a generated file, produced by nerd_font_codegen.py
//! DO NOT EDIT BY HAND! //! DO NOT EDIT BY HAND!
//! //!
//! This file provides info extracted from the nerd fonts patcher script, //! This file provides info extracted from the nerd fonts patcher script,

View File

@ -1,102 +1,122 @@
""" """
This file is mostly vibe coded because I don't like Python. It extracts the This file extracts the patch sets from the nerd fonts font patcher file in order to
patch sets from the nerd fonts font patcher file in order to extract scaling extract scaling rules and attributes for different codepoint ranges which it then
rules and attributes for different codepoint ranges which it then codegens codegens in to a Zig file with a function that switches over codepoints and returns the
in to a Zig file with a function that switches over codepoints and returns attributes and scaling rules.
the attributes and scaling rules.
This does include an `eval` call! This is spooky, but we trust This does include an `eval` call! This is spooky, but we trust the nerd fonts code to
the nerd fonts code to be safe and not malicious or anything. be safe and not malicious or anything.
""" """
import ast import ast
import math import math
from pathlib import Path import sys
from collections import defaultdict from collections import defaultdict
from contextlib import suppress
from types import SimpleNamespace
from typing import Literal, TypedDict, cast
type PatchSetAttributes = dict[Literal["default"] | int, PatchSetAttributeEntry]
type AttributeHash = tuple[str | None, str | None, str, float, float, float]
type ResolvedSymbol = PatchSetAttributes | PatchSetScaleRules | int | None
class PatchSetScaleRules(TypedDict):
ShiftMode: str
ScaleGroups: list[list[int] | range]
class PatchSetAttributeEntry(TypedDict):
align: str
valign: str
stretch: str
params: dict[str, float | bool]
class PatchSet(TypedDict):
SymStart: int
SymEnd: int
SrcStart: int | None
ScaleRules: PatchSetScaleRules | None
Attributes: PatchSetAttributes
class PatchSetExtractor(ast.NodeVisitor): class PatchSetExtractor(ast.NodeVisitor):
def __init__(self): def __init__(self) -> None:
self.symbol_table = {} self.symbol_table: dict[str, ast.expr] = {}
self.patch_set_values = [] self.patch_set_values: list[PatchSet] = []
def visit_ClassDef(self, node): def visit_ClassDef(self, node: ast.ClassDef) -> None:
if node.name == "font_patcher": if node.name != "font_patcher":
return
for item in node.body: for item in node.body:
if isinstance(item, ast.FunctionDef) and item.name == "setup_patch_set": if isinstance(item, ast.FunctionDef) and item.name == "setup_patch_set":
self.visit_setup_patch_set(item) self.visit_setup_patch_set(item)
def visit_setup_patch_set(self, node): def visit_setup_patch_set(self, node: ast.FunctionDef) -> None:
# First pass: gather variable assignments # First pass: gather variable assignments
for stmt in node.body: for stmt in node.body:
if isinstance(stmt, ast.Assign): match stmt:
case ast.Assign(targets=[ast.Name(id=symbol)]):
# Store simple variable assignments in the symbol table # Store simple variable assignments in the symbol table
if len(stmt.targets) == 1 and isinstance(stmt.targets[0], ast.Name): self.symbol_table[symbol] = stmt.value
var_name = stmt.targets[0].id
self.symbol_table[var_name] = stmt.value
# Second pass: process self.patch_set # Second pass: process self.patch_set
for stmt in node.body: for stmt in node.body:
if isinstance(stmt, ast.Assign): if not isinstance(stmt, ast.Assign):
continue
for target in stmt.targets: for target in stmt.targets:
if isinstance(target, ast.Attribute) and target.attr == "patch_set": if (
if isinstance(stmt.value, ast.List): isinstance(target, ast.Attribute)
and target.attr == "patch_set"
and isinstance(stmt.value, ast.List)
):
for elt in stmt.value.elts: for elt in stmt.value.elts:
if isinstance(elt, ast.Dict): if isinstance(elt, ast.Dict):
self.process_patch_entry(elt) self.process_patch_entry(elt)
def resolve_symbol(self, node): def resolve_symbol(self, node: ast.expr) -> ResolvedSymbol:
"""Resolve named variables to their actual values from the symbol table.""" """Resolve named variables to their actual values from the symbol table."""
if isinstance(node, ast.Name) and node.id in self.symbol_table: if isinstance(node, ast.Name) and node.id in self.symbol_table:
return self.safe_literal_eval(self.symbol_table[node.id]) return self.safe_literal_eval(self.symbol_table[node.id])
return self.safe_literal_eval(node) return self.safe_literal_eval(node)
def safe_literal_eval(self, node): def safe_literal_eval(self, node: ast.expr) -> ResolvedSymbol:
"""Try to evaluate or stringify an AST node.""" """Try to evaluate or stringify an AST node."""
try: try:
return ast.literal_eval(node) return ast.literal_eval(node)
except Exception: except ValueError:
# Spooky eval! But we trust nerd fonts to be safe... # Spooky eval! But we trust nerd fonts to be safe...
if hasattr(ast, "unparse"): if hasattr(ast, "unparse"):
return eval( return eval(
ast.unparse(node), {"box_keep": True}, {"self": SpoofSelf()} ast.unparse(node),
{"box_keep": True},
{"self": SimpleNamespace(args=SimpleNamespace(careful=True))},
) )
else: msg = f"<cannot eval: {type(node).__name__}>"
return f"<cannot eval: {type(node).__name__}>" raise ValueError(msg) from None
def process_patch_entry(self, dict_node): def process_patch_entry(self, dict_node: ast.Dict) -> None:
entry = {} entry = {}
disallowed_key_nodes = frozenset({"Enabled", "Name", "Filename", "Exact"})
for key_node, value_node in zip(dict_node.keys, dict_node.values): for key_node, value_node in zip(dict_node.keys, dict_node.values):
if isinstance(key_node, ast.Constant) and key_node.value in ( if (
"Enabled", isinstance(key_node, ast.Constant)
"Name", and key_node.value not in disallowed_key_nodes
"Filename",
"Exact",
): ):
continue key = ast.literal_eval(cast("ast.Constant", key_node))
key = ast.literal_eval(key_node) entry[key] = self.resolve_symbol(value_node)
value = self.resolve_symbol(value_node) self.patch_set_values.append(cast("PatchSet", entry))
entry[key] = value
self.patch_set_values.append(entry)
def extract_patch_set_values(source_code): def extract_patch_set_values(source_code: str) -> list[PatchSet]:
tree = ast.parse(source_code) tree = ast.parse(source_code)
extractor = PatchSetExtractor() extractor = PatchSetExtractor()
extractor.visit(tree) extractor.visit(tree)
return extractor.patch_set_values return extractor.patch_set_values
# We have to spoof `self` and `self.args` for the eval. def parse_alignment(val: str) -> str | None:
class SpoofArgs:
careful = True
class SpoofSelf:
args = SpoofArgs()
def parse_alignment(val):
return { return {
"l": ".start", "l": ".start",
"r": ".end", "r": ".end",
@ -105,28 +125,24 @@ def parse_alignment(val):
}.get(val, ".none") }.get(val, ".none")
def get_param(d, key, default): def attr_key(attr: PatchSetAttributeEntry) -> AttributeHash:
return float(d.get(key, default))
def attr_key(attr):
"""Convert attributes to a hashable key for grouping.""" """Convert attributes to a hashable key for grouping."""
stretch = attr.get("stretch", "") params = attr.get("params", {})
return ( return (
parse_alignment(attr.get("align", "")), parse_alignment(attr.get("align", "")),
parse_alignment(attr.get("valign", "")), parse_alignment(attr.get("valign", "")),
stretch, attr.get("stretch", ""),
float(attr.get("params", {}).get("overlap", 0.0)), float(params.get("overlap", 0.0)),
float(attr.get("params", {}).get("xy-ratio", -1.0)), float(params.get("xy-ratio", -1.0)),
float(attr.get("params", {}).get("ypadding", 0.0)), float(params.get("ypadding", 0.0)),
) )
def coalesce_codepoints_to_ranges(codepoints): def coalesce_codepoints_to_ranges(codepoints: list[int]) -> list[tuple[int, int]]:
"""Convert a sorted list of integers to a list of single values and ranges.""" """Convert a sorted list of integers to a list of single values and ranges."""
ranges = [] ranges: list[tuple[int, int]] = []
cp_iter = iter(sorted(codepoints)) cp_iter = iter(sorted(codepoints))
try: with suppress(StopIteration):
start = prev = next(cp_iter) start = prev = next(cp_iter)
for cp in cp_iter: for cp in cp_iter:
if cp == prev + 1: if cp == prev + 1:
@ -135,52 +151,49 @@ def coalesce_codepoints_to_ranges(codepoints):
ranges.append((start, prev)) ranges.append((start, prev))
start = prev = cp start = prev = cp
ranges.append((start, prev)) ranges.append((start, prev))
except StopIteration:
pass
return ranges return ranges
def emit_zig_entry_multikey(codepoints, attr): def emit_zig_entry_multikey(codepoints: list[int], attr: PatchSetAttributeEntry) -> str:
align = parse_alignment(attr.get("align", "")) align = parse_alignment(attr.get("align", ""))
valign = parse_alignment(attr.get("valign", "")) valign = parse_alignment(attr.get("valign", ""))
stretch = attr.get("stretch", "") stretch = attr.get("stretch", "")
params = attr.get("params", {}) params = attr.get("params", {})
overlap = get_param(params, "overlap", 0.0) overlap = params.get("overlap", 0.0)
xy_ratio = get_param(params, "xy-ratio", -1.0) xy_ratio = params.get("xy-ratio", -1.0)
y_padding = get_param(params, "ypadding", 0.0) y_padding = params.get("ypadding", 0.0)
ranges = coalesce_codepoints_to_ranges(codepoints) ranges = coalesce_codepoints_to_ranges(codepoints)
keys = "\n".join( keys = "\n".join(
f" 0x{start:x}...0x{end:x}," if start != end else f" 0x{start:x}," f" {start:#x}...{end:#x}," if start != end else f" {start:#x},"
for start, end in ranges for start, end in ranges
) )
s = f"""{keys} s = f"{keys}\n => .{{\n"
=> .{{\n"""
# These translations don't quite capture the way # These translations don't quite capture the way
# the actual patcher does scaling, but they're a # the actual patcher does scaling, but they're a
# good enough compromise. # good enough compromise.
if ("xy" in stretch): if "xy" in stretch:
s += " .size_horizontal = .stretch,\n" s += " .size_horizontal = .stretch,\n"
s += " .size_vertical = .stretch,\n" s += " .size_vertical = .stretch,\n"
elif ("!" in stretch): elif "!" in stretch:
s += " .size_horizontal = .cover,\n" s += " .size_horizontal = .cover,\n"
s += " .size_vertical = .fit,\n" s += " .size_vertical = .fit,\n"
elif ("^" in stretch): elif "^" in stretch:
s += " .size_horizontal = .cover,\n" s += " .size_horizontal = .cover,\n"
s += " .size_vertical = .cover,\n" s += " .size_vertical = .cover,\n"
else: else:
s += " .size_horizontal = .fit,\n" s += " .size_horizontal = .fit,\n"
s += " .size_vertical = .fit,\n" s += " .size_vertical = .fit,\n"
if (align is not None): if align is not None:
s += f" .align_horizontal = {align},\n" s += f" .align_horizontal = {align},\n"
if (valign is not None): if valign is not None:
s += f" .align_vertical = {valign},\n" s += f" .align_vertical = {valign},\n"
if (overlap != 0.0): if overlap:
pad = -overlap pad = -overlap
s += f" .pad_left = {pad},\n" s += f" .pad_left = {pad},\n"
s += f" .pad_right = {pad},\n" s += f" .pad_right = {pad},\n"
@ -188,35 +201,33 @@ def emit_zig_entry_multikey(codepoints, attr):
s += f" .pad_top = {v_pad},\n" s += f" .pad_top = {v_pad},\n"
s += f" .pad_bottom = {v_pad},\n" s += f" .pad_bottom = {v_pad},\n"
if (xy_ratio > 0): if xy_ratio > 0:
s += f" .max_xy_ratio = {xy_ratio},\n" s += f" .max_xy_ratio = {xy_ratio},\n"
s += " }," s += " },"
return s return s
def generate_zig_switch_arms(patch_set):
entries = {} def generate_zig_switch_arms(patch_sets: list[PatchSet]) -> str:
for entry in patch_set: entries: dict[int, PatchSetAttributeEntry] = {}
for entry in patch_sets:
attributes = entry["Attributes"] attributes = entry["Attributes"]
for cp in range(entry["SymStart"], entry["SymEnd"] + 1): for cp in range(entry["SymStart"], entry["SymEnd"] + 1):
entries[cp] = attributes["default"] entries[cp] = attributes["default"]
for k, v in attributes.items(): entries |= {k: v for k, v in attributes.items() if isinstance(k, int)}
if isinstance(k, int):
entries[k] = v
del entries[0] del entries[0]
# Group codepoints by attribute key # Group codepoints by attribute key
grouped = defaultdict(list) grouped = defaultdict[AttributeHash, list[int]](list)
for cp, attr in entries.items(): for cp, attr in entries.items():
grouped[attr_key(attr)].append(cp) grouped[attr_key(attr)].append(cp)
# Emit zig switch arms # Emit zig switch arms
result = [] result: list[str] = []
for _, codepoints in sorted(grouped.items(), key=lambda x: x[1]): for codepoints in sorted(grouped.values()):
# Use one of the attrs in the group to emit the value # Use one of the attrs in the group to emit the value
attr = entries[codepoints[0]] attr = entries[codepoints[0]]
result.append(emit_zig_entry_multikey(codepoints, attr)) result.append(emit_zig_entry_multikey(codepoints, attr))
@ -225,23 +236,9 @@ def generate_zig_switch_arms(patch_set):
if __name__ == "__main__": if __name__ == "__main__":
path = ( source = sys.stdin.read()
Path(__file__).resolve().parent
/ ".."
/ ".."
/ "vendor"
/ "nerd-fonts"
/ "font-patcher.py"
)
with open(path, "r", encoding="utf-8") as f:
source = f.read()
patch_set = extract_patch_set_values(source) patch_set = extract_patch_set_values(source)
print("""//! This is a generated file, produced by nerd_font_codegen.py
out_path = Path(__file__).resolve().parent / "nerd_font_attributes.zig"
with open(out_path, "w", encoding="utf-8") as f:
f.write("""//! This is a generate file, produced by nerd_font_codegen.py
//! DO NOT EDIT BY HAND! //! DO NOT EDIT BY HAND!
//! //!
//! This file provides info extracted from the nerd fonts patcher script, //! This file provides info extracted from the nerd fonts patcher script,
@ -251,9 +248,6 @@ const Constraint = @import("face.zig").RenderOptions.Constraint;
/// Get the a constraints for the provided codepoint. /// Get the a constraints for the provided codepoint.
pub fn getConstraint(cp: u21) Constraint { pub fn getConstraint(cp: u21) Constraint {
return switch (cp) { return switch (cp) {""")
""") print(generate_zig_switch_arms(patch_set))
f.write(generate_zig_switch_arms(patch_set)) print(" else => .none,\n };\n}")
f.write("\n")
f.write(" else => .none,\n };\n}\n")