From b030663e0384ed53dea6bff7cacd53a614dc18bd Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Feb 2024 11:58:10 -0800 Subject: [PATCH] bench/stream: benchmark for stream processing --- bench.sh | 15 +++++ build.zig | 8 ++- src/bench/stream.zig | 152 +++++++++++++++++++++++++++++++++++++++++++ src/build_config.zig | 1 + src/main.zig | 1 + 5 files changed, 174 insertions(+), 3 deletions(-) create mode 100755 bench.sh create mode 100644 src/bench/stream.zig diff --git a/bench.sh b/bench.sh new file mode 100755 index 000000000..5cd693a13 --- /dev/null +++ b/bench.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# TODO: This script is temporary, remove it from the repo + + +SIZE="25M" + +hyperfine \ + --warmup 10 \ + -n memcpy \ + "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=noop" \ + -n scalar \ + "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=scalar" \ + -n simd \ + "./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=simd" diff --git a/build.zig b/build.zig index c3cbc9223..ddac51035 100644 --- a/build.zig +++ b/build.zig @@ -202,7 +202,7 @@ pub fn build(b: *std.Build) !void { if (emit_helpgen) try addHelp(b, null, config); // Add our benchmarks - try benchSteps(b, target, optimize, config, emit_bench); + try benchSteps(b, target, config, emit_bench); // TODO: temporary simd tester binary { @@ -1285,7 +1285,6 @@ fn buildDocumentation( fn benchSteps( b: *std.Build, target: std.Build.ResolvedTarget, - optimize: std.builtin.OptimizeMode, config: BuildConfig, install: bool, ) !void { @@ -1313,8 +1312,11 @@ fn benchSteps( .name = bin_name, .root_source_file = .{ .path = "src/main.zig" }, .target = target, - .optimize = optimize, + + // We always want our benchmarks to be in release mode. + .optimize = .ReleaseFast, }); + c_exe.linkLibC(); if (install) b.installArtifact(c_exe); _ = try addDeps(b, c_exe, config: { var copy = config; diff --git a/src/bench/stream.zig b/src/bench/stream.zig new file mode 100644 index 000000000..d0f53e504 --- /dev/null +++ b/src/bench/stream.zig @@ -0,0 +1,152 @@ +//! This benchmark tests the throughput of the VT stream. It has a few +//! modes in order to test different methods of stream processing. It +//! provides a "noop" mode to give us the `memcpy` speed. +//! +//! This will consume all of the available stdin, so you should run it +//! with `head` in a pipe to restrict. For example, to test ASCII input: +//! +//! bench-stream --mode=gen-ascii | head -c 50M | bench-stream --mode=simd +//! + +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const cli = @import("../cli.zig"); +const terminal = @import("../terminal/main.zig"); + +const Args = struct { + mode: Mode = .noop, + + /// This is set by the CLI parser for deinit. + _arena: ?ArenaAllocator = null, + + pub fn deinit(self: *Args) void { + if (self._arena) |arena| arena.deinit(); + self.* = undefined; + } +}; + +const Mode = enum { + // Do nothing, just read from stdin into a stack-allocated buffer. + // This is used to benchmark our base-case: it gives us our maximum + // throughput on a basic read. + noop, + + // These benchmark the throughput of the terminal stream parsing + // with and without SIMD. The "simd" option will use whatever is best + // for the running platform. + // + // Note that these run through the full VT parser but do not apply + // the operations to terminal state, so there is no terminal state + // overhead. + scalar, + simd, + + // Generate an infinite stream of random printable ASCII characters. + @"gen-ascii", +}; + +pub const std_options = struct { + pub const log_level: std.log.Level = .debug; +}; + +pub fn main() !void { + // We want to use the c allocator because it is much faster than GPA. + const alloc = std.heap.c_allocator; + + // Parse our args + var args: Args = .{}; + defer args.deinit(); + { + var iter = try std.process.argsWithAllocator(alloc); + defer iter.deinit(); + try cli.args.parse(Args, alloc, &args, &iter); + } + + const reader = std.io.getStdIn().reader(); + const writer = std.io.getStdOut().writer(); + switch (args.mode) { + .@"gen-ascii" => try genAscii(writer), + .noop => try benchNoop(alloc, reader), + .scalar => try benchScalar(alloc, reader), + .simd => try benchSimd(alloc, reader), + } +} + +/// Generates an infinite stream of random printable ASCII characters. +/// This has no control characters in it at all. +fn genAscii(writer: anytype) !void { + const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~"; + try genData(writer, alphabet); +} + +/// Generates an infinite stream of bytes from the given alphabet. +fn genData(writer: anytype, alphabet: []const u8) !void { + var prng = std.rand.DefaultPrng.init(0x12345678); + const rnd = prng.random(); + while (true) { + var buf: [1024]u8 = undefined; + for (&buf) |*c| { + const idx = rnd.uintLessThanBiased(usize, alphabet.len); + c.* = alphabet[idx]; + } + + writer.writeAll(&buf) catch |err| switch (err) { + error.BrokenPipe => return, // stdout closed + else => return err, + }; + } +} + +fn benchNoop(alloc: Allocator, reader: anytype) !void { + // Large-ish buffer because we don't want to be benchmarking + // heap allocation as much as possible. We purposely leak this + // memory because we don't want to benchmark a free cost + // either. + const buf = try alloc.alloc(u8, 1024 * 1024 * 16); + var total: usize = 0; + while (true) { + const n = try reader.readAll(buf); + if (n == 0) break; + total += n; + } + + std.log.info("total bytes len={}", .{total}); +} + +fn benchScalar(alloc: Allocator, reader: anytype) !void { + _ = alloc; + + // Create a stream that uses our noop handler so we don't + // have any terminal state overhead. + var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} }; + var buf: [4096]u8 = undefined; + while (true) { + const n = try reader.read(&buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| try stream.next(c); + } +} + +fn benchSimd(alloc: Allocator, reader: anytype) !void { + _ = alloc; + + var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} }; + var buf: [4096]u8 = undefined; + while (true) { + const n = try reader.read(&buf); + if (n == 0) break; + try stream.nextSlice(buf[0..n]); + } +} + +const NoopHandler = struct { + fn print(self: NoopHandler, cp: u21) !void { + _ = self; + _ = cp; + } +}; diff --git a/src/build_config.zig b/src/build_config.zig index 52e975717..bfb4699d3 100644 --- a/src/build_config.zig +++ b/src/build_config.zig @@ -139,4 +139,5 @@ pub const ExeEntrypoint = enum { mdgen_ghostty_1, mdgen_ghostty_5, bench_parser, + bench_stream, }; diff --git a/src/main.zig b/src/main.zig index b5307340d..393ddd541 100644 --- a/src/main.zig +++ b/src/main.zig @@ -7,4 +7,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) { .mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"), .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"), .bench_parser => @import("bench/parser.zig"), + .bench_stream => @import("bench/stream.zig"), };