bench/grapheme-break

2025-07-14 15:56:13 +03:00 · 2024-02-09 09:12:05 -08:00
parent fc459ad827
commit 6437623500
6 changed files with 188 additions and 0 deletions
--- a/build.zig
+++ b/build.zig
@ -1082,6 +1082,15 @@ fn addDeps(
    step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
    try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());

+    // utf8proc
+    const utf8proc_dep = b.dependency("utf8proc", .{
+        .target = target,
+        .optimize = optimize,
+    });
+    step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
+    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
+    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
+
    // Spirv-Cross
    step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
    try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
--- a/build.zig.zon
+++ b/build.zig.zon
@ -39,6 +39,7 @@
        .pixman = .{ .path = "./pkg/pixman" },
        .simdutf = .{ .path = "./pkg/simdutf" },
        .utfcpp = .{ .path = "./pkg/utfcpp" },
+        .utf8proc = .{ .path = "./pkg/utf8proc" },
        .zlib = .{ .path = "./pkg/zlib" },

        // Shader translation
--- a/src/bench/grapheme-break.sh
+++ b/src/bench/grapheme-break.sh
@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# This is a trivial helper script to help run the grapheme-break benchmark.
+# You probably want to tweak this script depending on what you're
+# trying to measure.
+
+# Options:
+# - "ascii", uniform random ASCII bytes
+# - "utf8", uniform random unicode characters, encoded as utf8
+# - "rand", pure random data, will contain many invalid code sequences.
+DATA="utf8"
+SIZE="25000000"
+
+# Add additional arguments
+ARGS=""
+
+# Generate the benchmark input ahead of time so it's not included in the time.
+./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
+#cat ~/Downloads/JAPANESEBIBLE.txt > /tmp/ghostty_bench_data
+
+# Uncomment to instead use the contents of `stream.txt` as input.
+# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
+
+hyperfine \
+  --warmup 10 \
+  -n noop \
+  "./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
+  -n ziglyph \
+  "./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
+  -n utf8proc \
+  "./zig-out/bin/bench-grapheme-break --mode=utf8proc${ARGS} </tmp/ghostty_bench_data"
+
--- a/src/bench/grapheme-break.zig
+++ b/src/bench/grapheme-break.zig
@ -0,0 +1,144 @@
+//! This benchmark tests the throughput of grapheme break calculation.
+//! This is a common operation in terminal character printing for terminals
+//! that support grapheme clustering.
+//!
+//! This will consume all of the available stdin, so you should run it
+//! with `head` in a pipe to restrict. For example, to test ASCII input:
+//!
+//!   bench-stream --mode=gen-ascii | head -c 50M | bench-grapheme-break --mode=ziglyph
+//!
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ziglyph = @import("ziglyph");
+const cli = @import("../cli.zig");
+const simd = @import("../simd/main.zig");
+const table = @import("../unicode/main.zig").table;
+const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
+
+const Args = struct {
+    mode: Mode = .noop,
+
+    /// The size for read buffers. Doesn't usually need to be changed. The
+    /// main point is to make this runtime known so we can avoid compiler
+    /// optimizations.
+    @"buffer-size": usize = 4096,
+
+    /// This is set by the CLI parser for deinit.
+    _arena: ?ArenaAllocator = null,
+
+    pub fn deinit(self: *Args) void {
+        if (self._arena) |arena| arena.deinit();
+        self.* = undefined;
+    }
+};
+
+const Mode = enum {
+    /// The baseline mode copies the data from the fd into a buffer. This
+    /// is used to show the minimal overhead of reading the fd into memory
+    /// and establishes a baseline for the other modes.
+    noop,
+
+    /// Use ziglyph library to calculate the display width of each codepoint.
+    ziglyph,
+
+    utf8proc,
+};
+
+pub const std_options = struct {
+    pub const log_level: std.log.Level = .debug;
+};
+
+pub fn main() !void {
+    // We want to use the c allocator because it is much faster than GPA.
+    const alloc = std.heap.c_allocator;
+
+    // Parse our args
+    var args: Args = .{};
+    defer args.deinit();
+    {
+        var iter = try std.process.argsWithAllocator(alloc);
+        defer iter.deinit();
+        try cli.args.parse(Args, alloc, &args, &iter);
+    }
+
+    const reader = std.io.getStdIn().reader();
+    const buf = try alloc.alloc(u8, args.@"buffer-size");
+
+    // Handle the modes that do not depend on terminal state first.
+    switch (args.mode) {
+        .noop => try benchNoop(reader, buf),
+        .ziglyph => try benchZiglyph(reader, buf),
+        .utf8proc => try benchUtf8proc(reader, buf),
+    }
+}
+
+noinline fn benchNoop(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    var d: UTF8Decoder = .{};
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            _ = d.next(c);
+        }
+    }
+}
+
+noinline fn benchZiglyph(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    var d: UTF8Decoder = .{};
+    var state: u3 = 0;
+    var cp1: u21 = 0;
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp2| {
+                const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state);
+                buf[0] = @intCast(@intFromBool(v));
+                cp1 = cp2;
+            }
+        }
+    }
+}
+
+noinline fn benchUtf8proc(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    const utf8proc = @import("utf8proc");
+    var d: UTF8Decoder = .{};
+    var state: i32 = 0;
+    var cp1: u21 = 0;
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp2| {
+                const v = utf8proc.graphemeBreakStateful(cp1, @intCast(cp2), &state);
+                buf[0] = @intCast(@intFromBool(v));
+                cp1 = cp2;
+            }
+        }
+    }
+}
--- a/src/build_config.zig
+++ b/src/build_config.zig
@ -141,4 +141,5 @@ pub const ExeEntrypoint = enum {
    bench_parser,
    bench_stream,
    bench_codepoint_width,
+    bench_grapheme_break,
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -9,4 +9,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
    .bench_parser => @import("bench/parser.zig"),
    .bench_stream => @import("bench/stream.zig"),
    .bench_codepoint_width => @import("bench/codepoint-width.zig"),
+    .bench_grapheme_break => @import("bench/grapheme-break.zig"),
 };