From b31099daf48650dc383090792a9aa607de4de553 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Tue, 6 Feb 2024 18:22:59 -0500 Subject: [PATCH 1/3] bench/stream: only generate benchmark input once, improve utf8 gen --- src/bench/stream.sh | 14 ++++++++++---- src/bench/stream.zig | 22 +++++++++++++++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/bench/stream.sh b/src/bench/stream.sh index 9321c018e..1099b54a2 100755 --- a/src/bench/stream.sh +++ b/src/bench/stream.sh @@ -4,17 +4,23 @@ # You probably want to tweak this script depending on what you're # trying to measure. +# Options: +# - "ascii", uniform random ASCII bytes +# - "utf8", uniform random unicode characters, encoded as utf8 DATA="ascii" -SIZE="25M" +SIZE="25000000" # Uncomment to test with an active terminal state. #ARGS=" --terminal" +# Generate the benchmark input ahead of time so it's not included in the time. +./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data + hyperfine \ --warmup 10 \ -n memcpy \ - "./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=noop${ARGS}" \ + "cat /tmp/ghostty_bench_data | ./zig-out/bin/bench-stream --mode=noop${ARGS}" \ -n scalar \ - "./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=scalar${ARGS}" \ + "cat /tmp/ghostty_bench_data | ./zig-out/bin/bench-stream --mode=scalar${ARGS}" \ -n simd \ - "./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=simd${ARGS}" + "cat /tmp/ghostty_bench_data | ./zig-out/bin/bench-stream --mode=simd${ARGS}" diff --git a/src/bench/stream.zig b/src/bench/stream.zig index 7d581d44d..ade0d0f13 100644 --- a/src/bench/stream.zig +++ b/src/bench/stream.zig @@ -133,8 +133,8 @@ fn genAscii(writer: anytype) !void { fn genData(writer: anytype, alphabet: []const u8) !void { var prng = std.rand.DefaultPrng.init(0x12345678); const rnd = prng.random(); + var buf: [1024]u8 = undefined; while (true) { - var buf: [1024]u8 = undefined; for (&buf) |*c| { const idx = rnd.uintLessThanBiased(usize, alphabet.len); c.* = alphabet[idx]; @@ -148,8 +148,21 @@ fn genData(writer: anytype, alphabet: []const u8) !void { } fn genUtf8(writer: anytype) !void { + var prng = std.rand.DefaultPrng.init(0x12345678); + const rnd = prng.random(); + var buf: [1024]u8 = undefined; while (true) { - writer.writeAll(random_utf8) catch |err| switch (err) { + var i: usize = 0; + while (i <= buf.len - 4) { + const cp: u18 = while(true) { + const cp = rnd.int(u18); + if (ziglyph.isPrint(cp)) break cp; + }; + + i += try std.unicode.utf8Encode(cp, buf[i..]); + } + + writer.writeAll(buf[0..i]) catch |err| switch (err) { error.BrokenPipe => return, // stdout closed else => return err, }; @@ -208,8 +221,3 @@ const TerminalHandler = struct { try self.t.print(cp); } }; - -/// Offline-generated random UTF-8 bytes, because generating them at runtime -/// was too slow for our benchmarks. We should replace this if we can come -/// up with something that doesn't bottleneck our benchmark. -const random_utf8 = "⨴⭬∎⯀Ⳟ⳨⍈♍⒄⣹⇚ⱎ⯡⯴↩ⵆ⼳ⶦ⑑⦥➍Ⲡ⽉❞⹀⢧€⣁ⶐ⸲⣷⏝⣶⫿▝⨽⬃ↁ↵⯙ⶵ╡∾⭡′⫼↼┫⮡ↅ⍞‡▱⺁⿒⽛⎭☜Ⱝ⣘✬⢟⁴⟹⪝ℌ❓␆╣┳⽑⴩⺄✽ⳗ␮ⵍ⦵ⱍ⭑⛒ⅉ⛠➌₯ⵔⷋ⹶❷ⱳ⣖⭐⮋ₒ⥚ⷃ╶⌈⸣❥⑎⦿⪶₮╋⅌ⳬⴛ⥚♇╬❜⺷⡬⏠⧥┺⃻❼⏲↍Ⓙ⽕╶⾉⺪⁑⎕⅕⼧⊀ⲡ⊺⪭⟾Ⅵ⍌⛄⠻⃽⣻₮ⰹⴺ⪂⃾∖⊹⤔⵫⦒⽳⫄⍮↷⣌⩐⨼⯂⵺◺⍙⭺⟂⎯ⱼ⴬⫺⹦∌⡉ⳅ⛲⡏⃘⺃⵬ⴜ⾩⭦ⷭ⨟☌⍃⧪⮧ⓛ⃄♮ⲓ∘⣝⤐⎭ⷺⰫⶔ☎⾨⾐≦␢⋔⢟ⶐ⏁⚄⦡⾞✊⾾⫿⴩⪨⮰ⓙ⌽⭲⫬⒈⊻⸣⌳⋡ⱄⲛ⓬➼⌧⟮⹖♞ℚⷱ⭥⚣⏳⟾❠☏⦻⑽−∪ⅆ☁⿑⦣⵽Ⱳ⺧⺊Ⓞ⫽⦀⃐⚽⎌⥰⚪⢌⛗⸋⛂⾽Ⰳ⍧⛗◁❠↺≍‸ⴣ⭰‾⡸⩛⭷ⵒ⵼⚉❚⨳⑫⹾⷟∇┬⚌⨙╘ℹ⢱⏴∸⴨⾀⌟⡄⺣⦦ⱏ⼚​⿇├⌮⸿⯔₮—⥟╖◡⻵ⶕ┧⒞⏖⏧⟀❲➚‏➳Ⰼ┸⬖⸓⁃⹚⫣┭↜〈☶≍☨╟⿹ⳙ⺽⸡⵵⛞⚟⯓⥟┞⿄⮖⃫⭒⠤ⓣ⬱⃅⓼ⱒ⥖✜⛘⠶ⰽ⿉⾣➌⣋⚨⒯◱⢃◔ⱕ⫡⓱⅌Ⱨ⧵⯾┰⁠ⱌ⼳♠⨽⪢⸳⠹⩡Ⓨ⡪⭞⼰⡧ⓖ⤘⽶⵶ⴺ ⨨▅⏟⊕ⴡⴰ␌⚯⦀⫭⨔⬯⨢ⱽ⟓⥫⑤⊘⟧❐▜⵸℅⋣⚏⇭⽁⪂ⲡ⯊⦥⭳⠾⹫⠮℞⒡Ⰼ⦈⭅≉⋆☈▓⺑⡻▷Ⱑ⋖⬜┃ⵍ←⣢ↁ☚⟴⦡⨍⼡◝⯤❓◢⌡⏿⭲✏⎑⧊⼤⪠⋂⚜┯▤⑘⟾⬬Ⓜ⨸⥪ⱘ⳷⷟⒖⋐⡈⏌∠⏁⓳Ⲟ⦽⢯┏Ⲹ⍰ⅹ⚏⍐⟍⣩␖⛂∜❆⤗⒨⓽"; From 2db24fdd57266240ba9fad11634b32f45dac2f92 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Tue, 6 Feb 2024 19:29:06 -0500 Subject: [PATCH 2/3] bench/stream: add gen-rand (arbitrary random bytes) --- src/bench/stream.zig | 45 +++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/bench/stream.zig b/src/bench/stream.zig index ade0d0f13..61a0b4318 100644 --- a/src/bench/stream.zig +++ b/src/bench/stream.zig @@ -19,6 +19,10 @@ const terminal = @import("../terminal/main.zig"); const Args = struct { mode: Mode = .noop, + /// The PRNG seed used by the input generators. + /// -1 uses a random seed (default) + seed: i64 = -1, + /// Process input with a real terminal. This will be MUCH slower than /// the other modes because it has to maintain terminal state but will /// help get more realistic numbers. @@ -59,10 +63,11 @@ const Mode = enum { // Generate an infinite stream of random printable ASCII characters. @"gen-ascii", - // Generate an infinite stream of repeated UTF-8 characters. We don't - // currently do random generation because trivial implementations are - // too slow and I'm a simple man. + // Generate an infinite stream of random printable unicode characters. @"gen-utf8", + + // Generate an infinite stream of arbitrary random bytes. + @"gen-rand", }; pub const std_options = struct { @@ -86,10 +91,14 @@ pub fn main() !void { const writer = std.io.getStdOut().writer(); const buf = try alloc.alloc(u8, args.@"buffer-size"); + const seed: u64 = if (args.seed >= 0) @bitCast(args.seed) + else @truncate(@as(u128, @bitCast(std.time.nanoTimestamp()))); + // Handle the modes that do not depend on terminal state first. switch (args.mode) { - .@"gen-ascii" => try genAscii(writer), - .@"gen-utf8" => try genUtf8(writer), + .@"gen-ascii" => try genAscii(writer, seed), + .@"gen-utf8" => try genUtf8(writer, seed), + .@"gen-rand" => try genRand(writer, seed), .noop => try benchNoop(reader, buf), // Handle the ones that depend on terminal state next @@ -124,14 +133,14 @@ pub fn main() !void { /// Generates an infinite stream of random printable ASCII characters. /// This has no control characters in it at all. -fn genAscii(writer: anytype) !void { +fn genAscii(writer: anytype, seed: u64) !void { const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~"; - try genData(writer, alphabet); + try genData(writer, alphabet, seed); } /// Generates an infinite stream of bytes from the given alphabet. -fn genData(writer: anytype, alphabet: []const u8) !void { - var prng = std.rand.DefaultPrng.init(0x12345678); +fn genData(writer: anytype, alphabet: []const u8, seed: u64) !void { + var prng = std.rand.DefaultPrng.init(seed); const rnd = prng.random(); var buf: [1024]u8 = undefined; while (true) { @@ -147,8 +156,8 @@ fn genData(writer: anytype, alphabet: []const u8) !void { } } -fn genUtf8(writer: anytype) !void { - var prng = std.rand.DefaultPrng.init(0x12345678); +fn genUtf8(writer: anytype, seed: u64) !void { + var prng = std.rand.DefaultPrng.init(seed); const rnd = prng.random(); var buf: [1024]u8 = undefined; while (true) { @@ -169,6 +178,20 @@ fn genUtf8(writer: anytype) !void { } } +fn genRand(writer: anytype, seed: u64) !void { + var prng = std.rand.DefaultPrng.init(seed); + const rnd = prng.random(); + var buf: [1024]u8 = undefined; + while (true) { + rnd.bytes(&buf); + + writer.writeAll(&buf) catch |err| switch (err) { + error.BrokenPipe => return, // stdout closed + else => return err, + }; + } +} + noinline fn benchNoop(reader: anytype, buf: []u8) !void { var total: usize = 0; while (true) { From d96243fa5b01f4b99875e8744200401b15840c72 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Tue, 6 Feb 2024 19:30:27 -0500 Subject: [PATCH 3/3] bench/stream: script adjustments --- src/bench/stream.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/bench/stream.sh b/src/bench/stream.sh index 1099b54a2..41d62f234 100755 --- a/src/bench/stream.sh +++ b/src/bench/stream.sh @@ -7,20 +7,24 @@ # Options: # - "ascii", uniform random ASCII bytes # - "utf8", uniform random unicode characters, encoded as utf8 -DATA="ascii" +# - "rand", pure random data, will contain many invalid code sequences. +DATA="utf8" SIZE="25000000" # Uncomment to test with an active terminal state. -#ARGS=" --terminal" +# ARGS=" --terminal" # Generate the benchmark input ahead of time so it's not included in the time. ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data +# Uncomment to instead use the contents of `stream.txt` as input. +# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data + hyperfine \ --warmup 10 \ -n memcpy \ - "cat /tmp/ghostty_bench_data | ./zig-out/bin/bench-stream --mode=noop${ARGS}" \ + "./zig-out/bin/bench-stream --mode=noop${ARGS}