diff --git a/build.zig b/build.zig index 0669f27cc..445cf4a98 100644 --- a/build.zig +++ b/build.zig @@ -1082,6 +1082,15 @@ fn addDeps( step.linkLibrary(utfcpp_dep.artifact("utfcpp")); try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); + // utf8proc + const utf8proc_dep = b.dependency("utf8proc", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc")); + step.linkLibrary(utf8proc_dep.artifact("utf8proc")); + try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin()); + // Spirv-Cross step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index a694562ea..535d51c24 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -39,6 +39,7 @@ .pixman = .{ .path = "./pkg/pixman" }, .simdutf = .{ .path = "./pkg/simdutf" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, + .utf8proc = .{ .path = "./pkg/utf8proc" }, .zlib = .{ .path = "./pkg/zlib" }, // Shader translation diff --git a/src/bench/grapheme-break.sh b/src/bench/grapheme-break.sh new file mode 100755 index 000000000..56bd28dd1 --- /dev/null +++ b/src/bench/grapheme-break.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# This is a trivial helper script to help run the grapheme-break benchmark. +# You probably want to tweak this script depending on what you're +# trying to measure. + +# Options: +# - "ascii", uniform random ASCII bytes +# - "utf8", uniform random unicode characters, encoded as utf8 +# - "rand", pure random data, will contain many invalid code sequences. +DATA="utf8" +SIZE="25000000" + +# Add additional arguments +ARGS="" + +# Generate the benchmark input ahead of time so it's not included in the time. +./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data +#cat ~/Downloads/JAPANESEBIBLE.txt > /tmp/ghostty_bench_data + +# Uncomment to instead use the contents of `stream.txt` as input. +# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data + +hyperfine \ + --warmup 10 \ + -n noop \ + "./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} try benchNoop(reader, buf), + .ziglyph => try benchZiglyph(reader, buf), + .utf8proc => try benchUtf8proc(reader, buf), + } +} + +noinline fn benchNoop( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + _ = d.next(c); + } + } +} + +noinline fn benchZiglyph( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + var state: u3 = 0; + var cp1: u21 = 0; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp2| { + const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state); + buf[0] = @intCast(@intFromBool(v)); + cp1 = cp2; + } + } + } +} + +noinline fn benchUtf8proc( + reader: anytype, + buf: []u8, +) !void { + const utf8proc = @import("utf8proc"); + var d: UTF8Decoder = .{}; + var state: i32 = 0; + var cp1: u21 = 0; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp2| { + const v = utf8proc.graphemeBreakStateful(cp1, @intCast(cp2), &state); + buf[0] = @intCast(@intFromBool(v)); + cp1 = cp2; + } + } + } +} diff --git a/src/build_config.zig b/src/build_config.zig index 32dee925a..33b76d252 100644 --- a/src/build_config.zig +++ b/src/build_config.zig @@ -141,4 +141,5 @@ pub const ExeEntrypoint = enum { bench_parser, bench_stream, bench_codepoint_width, + bench_grapheme_break, }; diff --git a/src/main.zig b/src/main.zig index 46a6d7d3d..8cad7ec9f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -9,4 +9,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) { .bench_parser => @import("bench/parser.zig"), .bench_stream => @import("bench/stream.zig"), .bench_codepoint_width => @import("bench/codepoint-width.zig"), + .bench_grapheme_break => @import("bench/grapheme-break.zig"), };