bench/codepoint-width

2025-07-14 15:56:13 +03:00 · 2024-02-06 17:06:58 -08:00
parent 4e3fdf7243
commit d4fa0fcabf
5 changed files with 150 additions and 1 deletions
--- a/build.zig
+++ b/build.zig
@ -1322,10 +1322,14 @@ fn benchSteps(
            var copy = config;
            copy.static = true;

+            var enum_name: [64]u8 = undefined;
+            @memcpy(enum_name[0..name.len], name);
+            std.mem.replaceScalar(u8, enum_name[0..name.len], '-', '_');
+
            var buf: [64]u8 = undefined;
            copy.exe_entrypoint = std.meta.stringToEnum(
                build_config.ExeEntrypoint,
-                try std.fmt.bufPrint(&buf, "bench_{s}", .{name}),
+                try std.fmt.bufPrint(&buf, "bench_{s}", .{enum_name[0..name.len]}),
            ).?;

            break :config copy;
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# This is a trivial helper script to help run the codepoint-width benchmark.
+# You probably want to tweak this script depending on what you're
+# trying to measure.
+
+# Options:
+# - "ascii", uniform random ASCII bytes
+# - "utf8", uniform random unicode characters, encoded as utf8
+# - "rand", pure random data, will contain many invalid code sequences.
+DATA="utf8"
+SIZE="25000000"
+
+# Add additional arguments
+ARGS=""
+
+# Generate the benchmark input ahead of time so it's not included in the time.
+./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
+
+# Uncomment to instead use the contents of `stream.txt` as input.
+# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
+
+hyperfine \
+  --warmup 10 \
+  -n baseline \
+  "./zig-out/bin/bench-codepoint-width --mode=baseline${ARGS} </tmp/ghostty_bench_data" \
+  -n ziglyph \
+  "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data"
+
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@ -0,0 +1,114 @@
+//! This benchmark tests the throughput of codepoint width calculation.
+//! This is a common operation in terminal character printing and the
+//! motivating factor to write this benchmark was discovering that our
+//! codepoint width function was 30% of the runtime of every character
+//! print.
+//!
+//! This will consume all of the available stdin, so you should run it
+//! with `head` in a pipe to restrict. For example, to test ASCII input:
+//!
+//!   bench-stream --mode=gen-ascii | head -c 50M | bench-codepoint-width --mode=ziglyph
+//!
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ziglyph = @import("ziglyph");
+const cli = @import("../cli.zig");
+const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
+
+const Args = struct {
+    mode: Mode = .baseline,
+
+    /// The size for read buffers. Doesn't usually need to be changed. The
+    /// main point is to make this runtime known so we can avoid compiler
+    /// optimizations.
+    @"buffer-size": usize = 4096,
+
+    /// This is set by the CLI parser for deinit.
+    _arena: ?ArenaAllocator = null,
+
+    pub fn deinit(self: *Args) void {
+        if (self._arena) |arena| arena.deinit();
+        self.* = undefined;
+    }
+};
+
+const Mode = enum {
+    /// The baseline mode copies the data from the fd into a buffer. This
+    /// is used to show the minimal overhead of reading the fd into memory
+    /// and establishes a baseline for the other modes.
+    baseline,
+
+    /// Use ziglyph library to calculate the display width of each codepoint.
+    ziglyph,
+};
+
+pub const std_options = struct {
+    pub const log_level: std.log.Level = .debug;
+};
+
+pub fn main() !void {
+    // We want to use the c allocator because it is much faster than GPA.
+    const alloc = std.heap.c_allocator;
+
+    // Parse our args
+    var args: Args = .{};
+    defer args.deinit();
+    {
+        var iter = try std.process.argsWithAllocator(alloc);
+        defer iter.deinit();
+        try cli.args.parse(Args, alloc, &args, &iter);
+    }
+
+    const reader = std.io.getStdIn().reader();
+    const buf = try alloc.alloc(u8, args.@"buffer-size");
+
+    // Handle the modes that do not depend on terminal state first.
+    switch (args.mode) {
+        .baseline => try benchBaseline(reader, buf),
+        .ziglyph => try benchZiglyph(reader, buf),
+    }
+}
+
+noinline fn benchBaseline(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    var d: UTF8Decoder = .{};
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            _ = d.next(c);
+        }
+    }
+}
+
+noinline fn benchZiglyph(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    var d: UTF8Decoder = .{};
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                const width = ziglyph.display_width.codePointWidth(cp, .half);
+
+                // Write the width to the buffer to avoid it being compiled away
+                buf[0] = @intCast(width);
+            }
+        }
+    }
+}
--- a/src/build_config.zig
+++ b/src/build_config.zig
@ -140,4 +140,5 @@ pub const ExeEntrypoint = enum {
    mdgen_ghostty_5,
    bench_parser,
    bench_stream,
+    bench_codepoint_width,
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -8,4 +8,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
    .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"),
    .bench_parser => @import("bench/parser.zig"),
    .bench_stream => @import("bench/stream.zig"),
+    .bench_codepoint_width => @import("bench/codepoint-width.zig"),
 };