bench/codepoint-width

2025-07-15 00:06:09 +03:00 · 2024-02-06 17:06:58 -08:00
parent 4e3fdf7243
commit d4fa0fcabf
5 changed files with 150 additions and 1 deletions
--- a/build.zig
+++ b/build.zig
@ -1322,10 +1322,14 @@ fn benchSteps(
            var copy = config;
            copy.static = true;
            var enum_name: [64]u8 = undefined;
            @memcpy(enum_name[0..name.len], name);
            std.mem.replaceScalar(u8, enum_name[0..name.len], '-', '_');
            var buf: [64]u8 = undefined;
            copy.exe_entrypoint = std.meta.stringToEnum(
                build_config.ExeEntrypoint,
-                try std.fmt.bufPrint(&buf, "bench_{s}", .{name}),
+                try std.fmt.bufPrint(&buf, "bench_{s}", .{enum_name[0..name.len]}),
            ).?;
            break :config copy;
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@ -0,0 +1,29 @@
 #!/usr/bin/env bash
 #
 # This is a trivial helper script to help run the codepoint-width benchmark.
 # You probably want to tweak this script depending on what you're
 # trying to measure.
 # Options:
 # - "ascii", uniform random ASCII bytes
 # - "utf8", uniform random unicode characters, encoded as utf8
 # - "rand", pure random data, will contain many invalid code sequences.
 DATA="utf8"
 SIZE="25000000"
 # Add additional arguments
 ARGS=""
 # Generate the benchmark input ahead of time so it's not included in the time.
 ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
 # Uncomment to instead use the contents of `stream.txt` as input.
 # yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
 hyperfine \
  --warmup 10 \
  -n baseline \
  "./zig-out/bin/bench-codepoint-width --mode=baseline${ARGS} </tmp/ghostty_bench_data" \
  -n ziglyph \
  "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data"
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@ -0,0 +1,114 @@
 //! This benchmark tests the throughput of codepoint width calculation.
 //! This is a common operation in terminal character printing and the
 //! motivating factor to write this benchmark was discovering that our
 //! codepoint width function was 30% of the runtime of every character
 //! print.
 //!
 //! This will consume all of the available stdin, so you should run it
 //! with `head` in a pipe to restrict. For example, to test ASCII input:
 //!
 //!   bench-stream --mode=gen-ascii | head -c 50M | bench-codepoint-width --mode=ziglyph
 //!
 const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const ArenaAllocator = std.heap.ArenaAllocator;
 const ziglyph = @import("ziglyph");
 const cli = @import("../cli.zig");
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const Args = struct {
    mode: Mode = .baseline,
    /// The size for read buffers. Doesn't usually need to be changed. The
    /// main point is to make this runtime known so we can avoid compiler
    /// optimizations.
    @"buffer-size": usize = 4096,
    /// This is set by the CLI parser for deinit.
    _arena: ?ArenaAllocator = null,
    pub fn deinit(self: *Args) void {
        if (self._arena) |arena| arena.deinit();
        self.* = undefined;
    }
 };
 const Mode = enum {
    /// The baseline mode copies the data from the fd into a buffer. This
    /// is used to show the minimal overhead of reading the fd into memory
    /// and establishes a baseline for the other modes.
    baseline,
    /// Use ziglyph library to calculate the display width of each codepoint.
    ziglyph,
 };
 pub const std_options = struct {
    pub const log_level: std.log.Level = .debug;
 };
 pub fn main() !void {
    // We want to use the c allocator because it is much faster than GPA.
    const alloc = std.heap.c_allocator;
    // Parse our args
    var args: Args = .{};
    defer args.deinit();
    {
        var iter = try std.process.argsWithAllocator(alloc);
        defer iter.deinit();
        try cli.args.parse(Args, alloc, &args, &iter);
    }
    const reader = std.io.getStdIn().reader();
    const buf = try alloc.alloc(u8, args.@"buffer-size");
    // Handle the modes that do not depend on terminal state first.
    switch (args.mode) {
        .baseline => try benchBaseline(reader, buf),
        .ziglyph => try benchZiglyph(reader, buf),
    }
 }
 noinline fn benchBaseline(
    reader: anytype,
    buf: []u8,
 ) !void {
    var d: UTF8Decoder = .{};
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            _ = d.next(c);
        }
    }
 }
 noinline fn benchZiglyph(
    reader: anytype,
    buf: []u8,
 ) !void {
    var d: UTF8Decoder = .{};
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp| {
                const width = ziglyph.display_width.codePointWidth(cp, .half);
                // Write the width to the buffer to avoid it being compiled away
                buf[0] = @intCast(width);
            }
        }
    }
 }
--- a/src/build_config.zig
+++ b/src/build_config.zig
@ -140,4 +140,5 @@ pub const ExeEntrypoint = enum {
    mdgen_ghostty_5,
    bench_parser,
    bench_stream,
    bench_codepoint_width,
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -8,4 +8,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
    .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"),
    .bench_parser => @import("bench/parser.zig"),
    .bench_stream => @import("bench/stream.zig"),
    .bench_codepoint_width => @import("bench/codepoint-width.zig"),
 };