From 99ed984af2a12340a7b5b17326bc037044524ba4 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 9 Jul 2025 14:31:47 -0700 Subject: [PATCH] benchmark: add GraphemeBreak and TerminalParser benchmarks --- src/benchmark/GraphemeBreak.zig | 146 +++++++++++++++++++++++++++++++ src/benchmark/TerminalParser.zig | 106 ++++++++++++++++++++++ src/benchmark/cli.zig | 4 + src/benchmark/main.zig | 2 + 4 files changed, 258 insertions(+) create mode 100644 src/benchmark/GraphemeBreak.zig create mode 100644 src/benchmark/TerminalParser.zig diff --git a/src/benchmark/GraphemeBreak.zig b/src/benchmark/GraphemeBreak.zig new file mode 100644 index 000000000..57effebe4 --- /dev/null +++ b/src/benchmark/GraphemeBreak.zig @@ -0,0 +1,146 @@ +//! This benchmark tests the throughput of grapheme break calculation. +//! This is a common operation in terminal character printing for terminals +//! that support grapheme clustering. +const GraphemeBreak = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Benchmark = @import("Benchmark.zig"); +const options = @import("options.zig"); +const UTF8Decoder = @import("../terminal/UTF8Decoder.zig"); +const unicode = @import("../unicode/main.zig"); + +const log = std.log.scoped(.@"terminal-stream-bench"); + +opts: Options, + +/// The file, opened in the setup function. +data_f: ?std.fs.File = null, + +pub const Options = struct { + /// The type of codepoint width calculation to use. + mode: Mode = .table, + + /// The data to read as a filepath. If this is "-" then + /// we will read stdin. If this is unset, then we will + /// do nothing (benchmark is a noop). It'd be more unixy to + /// use stdin by default but I find that a hanging CLI command + /// with no interaction is a bit annoying. + data: ?[]const u8 = null, +}; + +pub const Mode = enum { + /// The baseline mode copies the data from the fd into a buffer. This + /// is used to show the minimal overhead of reading the fd into memory + /// and establishes a baseline for the other modes. + noop, + + /// Ghostty's table-based approach. + table, +}; + +/// Create a new terminal stream handler for the given arguments. +pub fn create( + alloc: Allocator, + opts: Options, +) !*GraphemeBreak { + const ptr = try alloc.create(GraphemeBreak); + errdefer alloc.destroy(ptr); + ptr.* = .{ .opts = opts }; + return ptr; +} + +pub fn destroy(self: *GraphemeBreak, alloc: Allocator) void { + alloc.destroy(self); +} + +pub fn benchmark(self: *GraphemeBreak) Benchmark { + return .init(self, .{ + .stepFn = switch (self.opts.mode) { + .noop => stepNoop, + .table => stepTable, + }, + .setupFn = setup, + .teardownFn = teardown, + }); +} + +fn setup(ptr: *anyopaque) Benchmark.Error!void { + const self: *GraphemeBreak = @ptrCast(@alignCast(ptr)); + + // Open our data file to prepare for reading. We can do more + // validation here eventually. + assert(self.data_f == null); + self.data_f = options.dataFile(self.opts.data) catch |err| { + log.warn("error opening data file err={}", .{err}); + return error.BenchmarkFailed; + }; +} + +fn teardown(ptr: *anyopaque) void { + const self: *GraphemeBreak = @ptrCast(@alignCast(ptr)); + if (self.data_f) |f| { + f.close(); + self.data_f = null; + } +} + +fn stepNoop(ptr: *anyopaque) Benchmark.Error!void { + const self: *GraphemeBreak = @ptrCast(@alignCast(ptr)); + + const f = self.data_f orelse return; + var r = std.io.bufferedReader(f.reader()); + var d: UTF8Decoder = .{}; + var buf: [4096]u8 = undefined; + while (true) { + const n = r.read(&buf) catch |err| { + log.warn("error reading data file err={}", .{err}); + return error.BenchmarkFailed; + }; + if (n == 0) break; // EOF reached + + for (buf[0..n]) |c| { + _ = d.next(c); + } + } +} + +fn stepTable(ptr: *anyopaque) Benchmark.Error!void { + const self: *GraphemeBreak = @ptrCast(@alignCast(ptr)); + + const f = self.data_f orelse return; + var r = std.io.bufferedReader(f.reader()); + var d: UTF8Decoder = .{}; + var state: unicode.GraphemeBreakState = .{}; + var cp1: u21 = 0; + var buf: [4096]u8 = undefined; + while (true) { + const n = r.read(&buf) catch |err| { + log.warn("error reading data file err={}", .{err}); + return error.BenchmarkFailed; + }; + if (n == 0) break; // EOF reached + + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp2| { + const v = unicode.graphemeBreak(cp1, @intCast(cp2), &state); + buf[0] = @intCast(@intFromBool(v)); + cp1 = cp2; + } + } + } +} + +test GraphemeBreak { + const testing = std.testing; + const alloc = testing.allocator; + + const impl: *GraphemeBreak = try .create(alloc, .{}); + defer impl.destroy(alloc); + + const bench = impl.benchmark(); + _ = try bench.run(.once); +} diff --git a/src/benchmark/TerminalParser.zig b/src/benchmark/TerminalParser.zig new file mode 100644 index 000000000..9107d4555 --- /dev/null +++ b/src/benchmark/TerminalParser.zig @@ -0,0 +1,106 @@ +//! This benchmark tests the throughput of the terminal escape code parser. +const TerminalParser = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const terminalpkg = @import("../terminal/main.zig"); +const Benchmark = @import("Benchmark.zig"); +const options = @import("options.zig"); + +const log = std.log.scoped(.@"terminal-stream-bench"); + +opts: Options, + +/// The file, opened in the setup function. +data_f: ?std.fs.File = null, + +pub const Options = struct { + /// The data to read as a filepath. If this is "-" then + /// we will read stdin. If this is unset, then we will + /// do nothing (benchmark is a noop). It'd be more unixy to + /// use stdin by default but I find that a hanging CLI command + /// with no interaction is a bit annoying. + data: ?[]const u8 = null, +}; + +pub fn create( + alloc: Allocator, + opts: Options, +) !*TerminalParser { + const ptr = try alloc.create(TerminalParser); + errdefer alloc.destroy(ptr); + ptr.* = .{ .opts = opts }; + return ptr; +} + +pub fn destroy(self: *TerminalParser, alloc: Allocator) void { + alloc.destroy(self); +} + +pub fn benchmark(self: *TerminalParser) Benchmark { + return .init(self, .{ + .stepFn = step, + .setupFn = setup, + .teardownFn = teardown, + }); +} + +fn setup(ptr: *anyopaque) Benchmark.Error!void { + const self: *TerminalParser = @ptrCast(@alignCast(ptr)); + + // Open our data file to prepare for reading. We can do more + // validation here eventually. + assert(self.data_f == null); + self.data_f = options.dataFile(self.opts.data) catch |err| { + log.warn("error opening data file err={}", .{err}); + return error.BenchmarkFailed; + }; +} + +fn teardown(ptr: *anyopaque) void { + const self: *TerminalParser = @ptrCast(@alignCast(ptr)); + if (self.data_f) |f| { + f.close(); + self.data_f = null; + } +} + +fn step(ptr: *anyopaque) Benchmark.Error!void { + const self: *TerminalParser = @ptrCast(@alignCast(ptr)); + + // Get our buffered reader so we're not predominantly + // waiting on file IO. It'd be better to move this fully into + // memory. If we're IO bound though that should show up on + // the benchmark results and... I know writing this that we + // aren't currently IO bound. + const f = self.data_f orelse return; + var r = std.io.bufferedReader(f.reader()); + + var p: terminalpkg.Parser = .{}; + + var buf: [4096]u8 = undefined; + while (true) { + const n = r.read(&buf) catch |err| { + log.warn("error reading data file err={}", .{err}); + return error.BenchmarkFailed; + }; + if (n == 0) break; // EOF reached + for (buf[0..n]) |c| { + const actions = p.next(c); + //std.log.warn("actions={any}", .{actions}); + _ = actions; + } + } +} + +test TerminalParser { + const testing = std.testing; + const alloc = testing.allocator; + + const impl: *TerminalParser = try .create(alloc, .{}); + defer impl.destroy(alloc); + + const bench = impl.benchmark(); + _ = try bench.run(.once); +} diff --git a/src/benchmark/cli.zig b/src/benchmark/cli.zig index b35159c6b..3f59b4a72 100644 --- a/src/benchmark/cli.zig +++ b/src/benchmark/cli.zig @@ -7,6 +7,8 @@ const cli = @import("../cli.zig"); pub const Action = enum { @"terminal-stream", @"codepoint-width", + @"grapheme-break", + @"terminal-parser", /// Returns the struct associated with the action. The struct /// should have a few decls: @@ -20,6 +22,8 @@ pub const Action = enum { return switch (action) { .@"terminal-stream" => @import("TerminalStream.zig"), .@"codepoint-width" => @import("CodepointWidth.zig"), + .@"grapheme-break" => @import("GraphemeBreak.zig"), + .@"terminal-parser" => @import("TerminalParser.zig"), }; } }; diff --git a/src/benchmark/main.zig b/src/benchmark/main.zig index dd00f72b5..56c515c9d 100644 --- a/src/benchmark/main.zig +++ b/src/benchmark/main.zig @@ -3,6 +3,8 @@ pub const Benchmark = @import("Benchmark.zig"); pub const CApi = @import("CApi.zig"); pub const TerminalStream = @import("TerminalStream.zig"); pub const CodepointWidth = @import("CodepointWidth.zig"); +pub const GraphemeBreak = @import("GraphemeBreak.zig"); +pub const TerminalParser = @import("TerminalParser.zig"); test { _ = @import("std").testing.refAllDecls(@This());