bench/grapheme-break

2025-07-14 15:56:13 +03:00 · 2024-02-09 09:12:05 -08:00
parent fc459ad827
commit 6437623500
6 changed files with 188 additions and 0 deletions
--- a/build.zig
+++ b/build.zig
@ -1082,6 +1082,15 @@ fn addDeps(
    step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
    try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
    // utf8proc
    const utf8proc_dep = b.dependency("utf8proc", .{
        .target = target,
        .optimize = optimize,
    });
    step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
    // Spirv-Cross
    step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
    try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
--- a/build.zig.zon
+++ b/build.zig.zon
@ -39,6 +39,7 @@
        .pixman = .{ .path = "./pkg/pixman" },
        .simdutf = .{ .path = "./pkg/simdutf" },
        .utfcpp = .{ .path = "./pkg/utfcpp" },
        .utf8proc = .{ .path = "./pkg/utf8proc" },
        .zlib = .{ .path = "./pkg/zlib" },
        // Shader translation
--- a/src/bench/grapheme-break.sh
+++ b/src/bench/grapheme-break.sh
@ -0,0 +1,32 @@
 #!/usr/bin/env bash
 #
 # This is a trivial helper script to help run the grapheme-break benchmark.
 # You probably want to tweak this script depending on what you're
 # trying to measure.
 # Options:
 # - "ascii", uniform random ASCII bytes
 # - "utf8", uniform random unicode characters, encoded as utf8
 # - "rand", pure random data, will contain many invalid code sequences.
 DATA="utf8"
 SIZE="25000000"
 # Add additional arguments
 ARGS=""
 # Generate the benchmark input ahead of time so it's not included in the time.
 ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
 #cat ~/Downloads/JAPANESEBIBLE.txt > /tmp/ghostty_bench_data
 # Uncomment to instead use the contents of `stream.txt` as input.
 # yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
 hyperfine \
  --warmup 10 \
  -n noop \
  "./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
  -n ziglyph \
  "./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
  -n utf8proc \
  "./zig-out/bin/bench-grapheme-break --mode=utf8proc${ARGS} </tmp/ghostty_bench_data"
--- a/src/bench/grapheme-break.zig
+++ b/src/bench/grapheme-break.zig
@ -0,0 +1,144 @@
 //! This benchmark tests the throughput of grapheme break calculation.
 //! This is a common operation in terminal character printing for terminals
 //! that support grapheme clustering.
 //!
 //! This will consume all of the available stdin, so you should run it
 //! with `head` in a pipe to restrict. For example, to test ASCII input:
 //!
 //!   bench-stream --mode=gen-ascii | head -c 50M | bench-grapheme-break --mode=ziglyph
 //!
 const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const ArenaAllocator = std.heap.ArenaAllocator;
 const ziglyph = @import("ziglyph");
 const cli = @import("../cli.zig");
 const simd = @import("../simd/main.zig");
 const table = @import("../unicode/main.zig").table;
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 const Args = struct {
    mode: Mode = .noop,
    /// The size for read buffers. Doesn't usually need to be changed. The
    /// main point is to make this runtime known so we can avoid compiler
    /// optimizations.
    @"buffer-size": usize = 4096,
    /// This is set by the CLI parser for deinit.
    _arena: ?ArenaAllocator = null,
    pub fn deinit(self: *Args) void {
        if (self._arena) |arena| arena.deinit();
        self.* = undefined;
    }
 };
 const Mode = enum {
    /// The baseline mode copies the data from the fd into a buffer. This
    /// is used to show the minimal overhead of reading the fd into memory
    /// and establishes a baseline for the other modes.
    noop,
    /// Use ziglyph library to calculate the display width of each codepoint.
    ziglyph,
    utf8proc,
 };
 pub const std_options = struct {
    pub const log_level: std.log.Level = .debug;
 };
 pub fn main() !void {
    // We want to use the c allocator because it is much faster than GPA.
    const alloc = std.heap.c_allocator;
    // Parse our args
    var args: Args = .{};
    defer args.deinit();
    {
        var iter = try std.process.argsWithAllocator(alloc);
        defer iter.deinit();
        try cli.args.parse(Args, alloc, &args, &iter);
    }
    const reader = std.io.getStdIn().reader();
    const buf = try alloc.alloc(u8, args.@"buffer-size");
    // Handle the modes that do not depend on terminal state first.
    switch (args.mode) {
        .noop => try benchNoop(reader, buf),
        .ziglyph => try benchZiglyph(reader, buf),
        .utf8proc => try benchUtf8proc(reader, buf),
    }
 }
 noinline fn benchNoop(
    reader: anytype,
    buf: []u8,
 ) !void {
    var d: UTF8Decoder = .{};
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            _ = d.next(c);
        }
    }
 }
 noinline fn benchZiglyph(
    reader: anytype,
    buf: []u8,
 ) !void {
    var d: UTF8Decoder = .{};
    var state: u3 = 0;
    var cp1: u21 = 0;
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp2| {
                const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state);
                buf[0] = @intCast(@intFromBool(v));
                cp1 = cp2;
            }
        }
    }
 }
 noinline fn benchUtf8proc(
    reader: anytype,
    buf: []u8,
 ) !void {
    const utf8proc = @import("utf8proc");
    var d: UTF8Decoder = .{};
    var state: i32 = 0;
    var cp1: u21 = 0;
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp2| {
                const v = utf8proc.graphemeBreakStateful(cp1, @intCast(cp2), &state);
                buf[0] = @intCast(@intFromBool(v));
                cp1 = cp2;
            }
        }
    }
 }
--- a/src/build_config.zig
+++ b/src/build_config.zig
@ -141,4 +141,5 @@ pub const ExeEntrypoint = enum {
    bench_parser,
    bench_stream,
    bench_codepoint_width,
    bench_grapheme_break,
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -9,4 +9,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
    .bench_parser => @import("bench/parser.zig"),
    .bench_stream => @import("bench/stream.zig"),
    .bench_codepoint_width => @import("bench/codepoint-width.zig"),
    .bench_grapheme_break => @import("bench/grapheme-break.zig"),
 };