bench/stream: benchmark for stream processing

This commit is contained in:
Mitchell Hashimoto
2024-02-05 11:58:10 -08:00
parent 120273aa1a
commit b030663e03
5 changed files with 174 additions and 3 deletions

15
bench.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# TODO: This script is temporary, remove it from the repo
SIZE="25M"
hyperfine \
--warmup 10 \
-n memcpy \
"./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=noop" \
-n scalar \
"./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=scalar" \
-n simd \
"./zig-out/bin/bench-stream --mode=gen-ascii | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=simd"

View File

@ -202,7 +202,7 @@ pub fn build(b: *std.Build) !void {
if (emit_helpgen) try addHelp(b, null, config); if (emit_helpgen) try addHelp(b, null, config);
// Add our benchmarks // Add our benchmarks
try benchSteps(b, target, optimize, config, emit_bench); try benchSteps(b, target, config, emit_bench);
// TODO: temporary simd tester binary // TODO: temporary simd tester binary
{ {
@ -1285,7 +1285,6 @@ fn buildDocumentation(
fn benchSteps( fn benchSteps(
b: *std.Build, b: *std.Build,
target: std.Build.ResolvedTarget, target: std.Build.ResolvedTarget,
optimize: std.builtin.OptimizeMode,
config: BuildConfig, config: BuildConfig,
install: bool, install: bool,
) !void { ) !void {
@ -1313,8 +1312,11 @@ fn benchSteps(
.name = bin_name, .name = bin_name,
.root_source_file = .{ .path = "src/main.zig" }, .root_source_file = .{ .path = "src/main.zig" },
.target = target, .target = target,
.optimize = optimize,
// We always want our benchmarks to be in release mode.
.optimize = .ReleaseFast,
}); });
c_exe.linkLibC();
if (install) b.installArtifact(c_exe); if (install) b.installArtifact(c_exe);
_ = try addDeps(b, c_exe, config: { _ = try addDeps(b, c_exe, config: {
var copy = config; var copy = config;

152
src/bench/stream.zig Normal file
View File

@ -0,0 +1,152 @@
//! This benchmark tests the throughput of the VT stream. It has a few
//! modes in order to test different methods of stream processing. It
//! provides a "noop" mode to give us the `memcpy` speed.
//!
//! This will consume all of the available stdin, so you should run it
//! with `head` in a pipe to restrict. For example, to test ASCII input:
//!
//! bench-stream --mode=gen-ascii | head -c 50M | bench-stream --mode=simd
//!
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const cli = @import("../cli.zig");
const terminal = @import("../terminal/main.zig");
const Args = struct {
mode: Mode = .noop,
/// This is set by the CLI parser for deinit.
_arena: ?ArenaAllocator = null,
pub fn deinit(self: *Args) void {
if (self._arena) |arena| arena.deinit();
self.* = undefined;
}
};
const Mode = enum {
// Do nothing, just read from stdin into a stack-allocated buffer.
// This is used to benchmark our base-case: it gives us our maximum
// throughput on a basic read.
noop,
// These benchmark the throughput of the terminal stream parsing
// with and without SIMD. The "simd" option will use whatever is best
// for the running platform.
//
// Note that these run through the full VT parser but do not apply
// the operations to terminal state, so there is no terminal state
// overhead.
scalar,
simd,
// Generate an infinite stream of random printable ASCII characters.
@"gen-ascii",
};
pub const std_options = struct {
pub const log_level: std.log.Level = .debug;
};
pub fn main() !void {
// We want to use the c allocator because it is much faster than GPA.
const alloc = std.heap.c_allocator;
// Parse our args
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}
const reader = std.io.getStdIn().reader();
const writer = std.io.getStdOut().writer();
switch (args.mode) {
.@"gen-ascii" => try genAscii(writer),
.noop => try benchNoop(alloc, reader),
.scalar => try benchScalar(alloc, reader),
.simd => try benchSimd(alloc, reader),
}
}
/// Generates an infinite stream of random printable ASCII characters.
/// This has no control characters in it at all.
fn genAscii(writer: anytype) !void {
const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~";
try genData(writer, alphabet);
}
/// Generates an infinite stream of bytes from the given alphabet.
fn genData(writer: anytype, alphabet: []const u8) !void {
var prng = std.rand.DefaultPrng.init(0x12345678);
const rnd = prng.random();
while (true) {
var buf: [1024]u8 = undefined;
for (&buf) |*c| {
const idx = rnd.uintLessThanBiased(usize, alphabet.len);
c.* = alphabet[idx];
}
writer.writeAll(&buf) catch |err| switch (err) {
error.BrokenPipe => return, // stdout closed
else => return err,
};
}
}
fn benchNoop(alloc: Allocator, reader: anytype) !void {
// Large-ish buffer because we don't want to be benchmarking
// heap allocation as much as possible. We purposely leak this
// memory because we don't want to benchmark a free cost
// either.
const buf = try alloc.alloc(u8, 1024 * 1024 * 16);
var total: usize = 0;
while (true) {
const n = try reader.readAll(buf);
if (n == 0) break;
total += n;
}
std.log.info("total bytes len={}", .{total});
}
fn benchScalar(alloc: Allocator, reader: anytype) !void {
_ = alloc;
// Create a stream that uses our noop handler so we don't
// have any terminal state overhead.
var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} };
var buf: [4096]u8 = undefined;
while (true) {
const n = try reader.read(&buf);
if (n == 0) break;
// Using stream.next directly with a for loop applies a naive
// scalar approach.
for (buf[0..n]) |c| try stream.next(c);
}
}
fn benchSimd(alloc: Allocator, reader: anytype) !void {
_ = alloc;
var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} };
var buf: [4096]u8 = undefined;
while (true) {
const n = try reader.read(&buf);
if (n == 0) break;
try stream.nextSlice(buf[0..n]);
}
}
const NoopHandler = struct {
fn print(self: NoopHandler, cp: u21) !void {
_ = self;
_ = cp;
}
};

View File

@ -139,4 +139,5 @@ pub const ExeEntrypoint = enum {
mdgen_ghostty_1, mdgen_ghostty_1,
mdgen_ghostty_5, mdgen_ghostty_5,
bench_parser, bench_parser,
bench_stream,
}; };

View File

@ -7,4 +7,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
.mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"), .mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"),
.mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"), .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"),
.bench_parser => @import("bench/parser.zig"), .bench_parser => @import("bench/parser.zig"),
.bench_stream => @import("bench/stream.zig"),
}; };