diff --git a/build.zig b/build.zig index 3b109ee4e..ca768bd51 100644 --- a/build.zig +++ b/build.zig @@ -1322,10 +1322,14 @@ fn benchSteps( var copy = config; copy.static = true; + var enum_name: [64]u8 = undefined; + @memcpy(enum_name[0..name.len], name); + std.mem.replaceScalar(u8, enum_name[0..name.len], '-', '_'); + var buf: [64]u8 = undefined; copy.exe_entrypoint = std.meta.stringToEnum( build_config.ExeEntrypoint, - try std.fmt.bufPrint(&buf, "bench_{s}", .{name}), + try std.fmt.bufPrint(&buf, "bench_{s}", .{enum_name[0..name.len]}), ).?; break :config copy; diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh new file mode 100755 index 000000000..6aa3548c5 --- /dev/null +++ b/src/bench/codepoint-width.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# This is a trivial helper script to help run the codepoint-width benchmark. +# You probably want to tweak this script depending on what you're +# trying to measure. + +# Options: +# - "ascii", uniform random ASCII bytes +# - "utf8", uniform random unicode characters, encoded as utf8 +# - "rand", pure random data, will contain many invalid code sequences. +DATA="utf8" +SIZE="25000000" + +# Add additional arguments +ARGS="" + +# Generate the benchmark input ahead of time so it's not included in the time. +./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data + +# Uncomment to instead use the contents of `stream.txt` as input. +# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data + +hyperfine \ + --warmup 10 \ + -n baseline \ + "./zig-out/bin/bench-codepoint-width --mode=baseline${ARGS} try benchBaseline(reader, buf), + .ziglyph => try benchZiglyph(reader, buf), + } +} + +noinline fn benchBaseline( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + _ = d.next(c); + } + } +} + +noinline fn benchZiglyph( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp| { + const width = ziglyph.display_width.codePointWidth(cp, .half); + + // Write the width to the buffer to avoid it being compiled away + buf[0] = @intCast(width); + } + } + } +} diff --git a/src/build_config.zig b/src/build_config.zig index bfb4699d3..32dee925a 100644 --- a/src/build_config.zig +++ b/src/build_config.zig @@ -140,4 +140,5 @@ pub const ExeEntrypoint = enum { mdgen_ghostty_5, bench_parser, bench_stream, + bench_codepoint_width, }; diff --git a/src/main.zig b/src/main.zig index 393ddd541..46a6d7d3d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -8,4 +8,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) { .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"), .bench_parser => @import("bench/parser.zig"), .bench_stream => @import("bench/stream.zig"), + .bench_codepoint_width => @import("bench/codepoint-width.zig"), };