diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index d7e70f48d..1b94e7717 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -8,7 +8,7 @@ # - "ascii", uniform random ASCII bytes # - "utf8", uniform random unicode characters, encoded as utf8 # - "rand", pure random data, will contain many invalid code sequences. -DATA="utf8" +DATA="ascii" SIZE="25000000" # Add additional arguments @@ -25,6 +25,8 @@ hyperfine \ --warmup 10 \ -n baseline \ "./zig-out/bin/bench-codepoint-width --mode=baseline${ARGS} try benchBaseline(reader, buf), + .wcwidth => try benchWcwidth(reader, buf), .ziglyph => try benchZiglyph(reader, buf), .simd => try benchSimd(reader, buf), } @@ -94,6 +98,32 @@ noinline fn benchBaseline( } } +extern "c" fn wcwidth(c: u32) c_int; + +noinline fn benchWcwidth( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp| { + const width = wcwidth(cp); + + // Write the width to the buffer to avoid it being compiled away + buf[0] = @intCast(width); + } + } + } +} + noinline fn benchZiglyph( reader: anytype, buf: []u8,