bench/stream: benchmark for stream processing

2025-07-14 15:56:13 +03:00 · 2024-02-05 11:58:10 -08:00
parent 120273aa1a
commit b030663e03
5 changed files with 174 additions and 3 deletions
--- a/bench.sh
+++ b/bench.sh
@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 # TODO: This script is temporary, remove it from the repo
 SIZE="25M"
 hyperfine \
  --warmup 10 \
  -n memcpy \
  "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=noop" \
  -n scalar \
  "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=scalar" \
  -n simd \
  "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=simd"
--- a/build.zig
+++ b/build.zig
@ -202,7 +202,7 @@ pub fn build(b: *std.Build) !void {
    if (emit_helpgen) try addHelp(b, null, config);
    // Add our benchmarks
-    try benchSteps(b, target, optimize, config, emit_bench);
+    try benchSteps(b, target, config, emit_bench);
    // TODO: temporary simd tester binary
    {
@ -1285,7 +1285,6 @@ fn buildDocumentation(
 fn benchSteps(
    b: *std.Build,
    target: std.Build.ResolvedTarget,
    optimize: std.builtin.OptimizeMode,
    config: BuildConfig,
    install: bool,
 ) !void {
@ -1313,8 +1312,11 @@ fn benchSteps(
            .name = bin_name,
            .root_source_file = .{ .path = "src/main.zig" },
            .target = target,
-            .optimize = optimize,
+
            // We always want our benchmarks to be in release mode.
            .optimize = .ReleaseFast,
        });
        c_exe.linkLibC();
        if (install) b.installArtifact(c_exe);
        _ = try addDeps(b, c_exe, config: {
            var copy = config;
--- a/src/bench/stream.zig
+++ b/src/bench/stream.zig
@ -0,0 +1,152 @@
 //! This benchmark tests the throughput of the VT stream. It has a few
 //! modes in order to test different methods of stream processing. It
 //! provides a "noop" mode to give us the `memcpy` speed.
 //!
 //! This will consume all of the available stdin, so you should run it
 //! with `head` in a pipe to restrict. For example, to test ASCII input:
 //!
 //!   bench-stream --mode=gen-ascii | head -c 50M | bench-stream --mode=simd
 //!
 const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const ArenaAllocator = std.heap.ArenaAllocator;
 const cli = @import("../cli.zig");
 const terminal = @import("../terminal/main.zig");
 const Args = struct {
    mode: Mode = .noop,
    /// This is set by the CLI parser for deinit.
    _arena: ?ArenaAllocator = null,
    pub fn deinit(self: *Args) void {
        if (self._arena) |arena| arena.deinit();
        self.* = undefined;
    }
 };
 const Mode = enum {
    // Do nothing, just read from stdin into a stack-allocated buffer.
    // This is used to benchmark our base-case: it gives us our maximum
    // throughput on a basic read.
    noop,
    // These benchmark the throughput of the terminal stream parsing
    // with and without SIMD. The "simd" option will use whatever is best
    // for the running platform.
    //
    // Note that these run through the full VT parser but do not apply
    // the operations to terminal state, so there is no terminal state
    // overhead.
    scalar,
    simd,
    // Generate an infinite stream of random printable ASCII characters.
    @"gen-ascii",
 };
 pub const std_options = struct {
    pub const log_level: std.log.Level = .debug;
 };
 pub fn main() !void {
    // We want to use the c allocator because it is much faster than GPA.
    const alloc = std.heap.c_allocator;
    // Parse our args
    var args: Args = .{};
    defer args.deinit();
    {
        var iter = try std.process.argsWithAllocator(alloc);
        defer iter.deinit();
        try cli.args.parse(Args, alloc, &args, &iter);
    }
    const reader = std.io.getStdIn().reader();
    const writer = std.io.getStdOut().writer();
    switch (args.mode) {
        .@"gen-ascii" => try genAscii(writer),
        .noop => try benchNoop(alloc, reader),
        .scalar => try benchScalar(alloc, reader),
        .simd => try benchSimd(alloc, reader),
    }
 }
 /// Generates an infinite stream of random printable ASCII characters.
 /// This has no control characters in it at all.
 fn genAscii(writer: anytype) !void {
    const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~";
    try genData(writer, alphabet);
 }
 /// Generates an infinite stream of bytes from the given alphabet.
 fn genData(writer: anytype, alphabet: []const u8) !void {
    var prng = std.rand.DefaultPrng.init(0x12345678);
    const rnd = prng.random();
    while (true) {
        var buf: [1024]u8 = undefined;
        for (&buf) |*c| {
            const idx = rnd.uintLessThanBiased(usize, alphabet.len);
            c.* = alphabet[idx];
        }
        writer.writeAll(&buf) catch |err| switch (err) {
            error.BrokenPipe => return, // stdout closed
            else => return err,
        };
    }
 }
 fn benchNoop(alloc: Allocator, reader: anytype) !void {
    // Large-ish buffer because we don't want to be benchmarking
    // heap allocation as much as possible. We purposely leak this
    // memory because we don't want to benchmark a free cost
    // either.
    const buf = try alloc.alloc(u8, 1024 * 1024 * 16);
    var total: usize = 0;
    while (true) {
        const n = try reader.readAll(buf);
        if (n == 0) break;
        total += n;
    }
    std.log.info("total bytes len={}", .{total});
 }
 fn benchScalar(alloc: Allocator, reader: anytype) !void {
    _ = alloc;
    // Create a stream that uses our noop handler so we don't
    // have any terminal state overhead.
    var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} };
    var buf: [4096]u8 = undefined;
    while (true) {
        const n = try reader.read(&buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| try stream.next(c);
    }
 }
 fn benchSimd(alloc: Allocator, reader: anytype) !void {
    _ = alloc;
    var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} };
    var buf: [4096]u8 = undefined;
    while (true) {
        const n = try reader.read(&buf);
        if (n == 0) break;
        try stream.nextSlice(buf[0..n]);
    }
 }
 const NoopHandler = struct {
    fn print(self: NoopHandler, cp: u21) !void {
        _ = self;
        _ = cp;
    }
 };
--- a/src/build_config.zig
+++ b/src/build_config.zig
@ -139,4 +139,5 @@ pub const ExeEntrypoint = enum {
    mdgen_ghostty_1,
    mdgen_ghostty_5,
    bench_parser,
    bench_stream,
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -7,4 +7,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
    .mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"),
    .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"),
    .bench_parser => @import("bench/parser.zig"),
    .bench_stream => @import("bench/stream.zig"),
 };