bench/codepoint-width: add wcwidth

This commit is contained in:
Mitchell Hashimoto
2024-02-07 09:17:26 -08:00
parent d949f1bd84
commit 5692d39067
2 changed files with 33 additions and 1 deletions

View File

@ -8,7 +8,7 @@
# - "ascii", uniform random ASCII bytes
# - "utf8", uniform random unicode characters, encoded as utf8
# - "rand", pure random data, will contain many invalid code sequences.
DATA="utf8"
DATA="ascii"
SIZE="25000000"
# Add additional arguments
@ -25,6 +25,8 @@ hyperfine \
--warmup 10 \
-n baseline \
"./zig-out/bin/bench-codepoint-width --mode=baseline${ARGS} </tmp/ghostty_bench_data" \
-n wcwidth \
"./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
-n ziglyph \
"./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
-n simd \

View File

@ -42,6 +42,9 @@ const Mode = enum {
/// and establishes a baseline for the other modes.
baseline,
/// libc wcwidth
wcwidth,
/// Use ziglyph library to calculate the display width of each codepoint.
ziglyph,
@ -72,6 +75,7 @@ pub fn main() !void {
// Handle the modes that do not depend on terminal state first.
switch (args.mode) {
.baseline => try benchBaseline(reader, buf),
.wcwidth => try benchWcwidth(reader, buf),
.ziglyph => try benchZiglyph(reader, buf),
.simd => try benchSimd(reader, buf),
}
@ -94,6 +98,32 @@ noinline fn benchBaseline(
}
}
extern "c" fn wcwidth(c: u32) c_int;
noinline fn benchWcwidth(
reader: anytype,
buf: []u8,
) !void {
var d: UTF8Decoder = .{};
while (true) {
const n = try reader.read(buf);
if (n == 0) break;
// Using stream.next directly with a for loop applies a naive
// scalar approach.
for (buf[0..n]) |c| {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
const width = wcwidth(cp);
// Write the width to the buffer to avoid it being compiled away
buf[0] = @intCast(width);
}
}
}
}
noinline fn benchZiglyph(
reader: anytype,
buf: []u8,