diff --git a/src/bench/stream.sh b/src/bench/stream.sh index 41d62f234..5f2e4d311 100755 --- a/src/bench/stream.sh +++ b/src/bench/stream.sh @@ -17,8 +17,8 @@ SIZE="25000000" # Generate the benchmark input ahead of time so it's not included in the time. ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data -# Uncomment to instead use the contents of `stream.txt` as input. -# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data +# Uncomment to instead use the contents of `stream.txt` as input. (Ignores SIZE) +# echo $(cat ./stream.txt) > /tmp/ghostty_bench_data hyperfine \ --warmup 10 \ diff --git a/src/terminal/Parser.zig b/src/terminal/Parser.zig index bfb25f2fa..f160619e2 100644 --- a/src/terminal/Parser.zig +++ b/src/terminal/Parser.zig @@ -390,7 +390,7 @@ fn doAction(self: *Parser, action: TransitionAction, c: u8) ?Action { }; } -fn clear(self: *Parser) void { +pub fn clear(self: *Parser) void { self.intermediates_idx = 0; self.params_idx = 0; self.params_sep = .none; diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index 0362780c2..bdfcf4155 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -72,7 +72,7 @@ pub fn Stream(comptime Handler: type) type { // a code sequence, we continue until it's not. while (self.utf8decoder.state != 0) { if (offset >= input.len) return; - try self.next(input[offset]); + try self.nextUtf8(input[offset]); offset += 1; } @@ -80,6 +80,7 @@ pub fn Stream(comptime Handler: type) type { // we are. This can happen if the last chunk of input put us // in the middle of a control sequence. offset += try self.consumeUntilGround(input[offset..]); + offset += try self.consumeAllEscapes(input[offset..]); if (offset >= input.len) return; // If we're in the ground state then we can use SIMD to process @@ -94,9 +95,9 @@ pub fn Stream(comptime Handler: type) type { try self.print(@intCast(cp)); } } - // Consume the bytes we just processed. offset += res.consumed; + if (offset >= input.len) return; // If our offset is NOT an escape then we must have a @@ -104,20 +105,35 @@ pub fn Stream(comptime Handler: type) type { // to the scalar parser. if (input[offset] != 0x1B) { const rem = input[offset..]; - for (rem) |c| try self.next(c); + for (rem) |c| try self.nextUtf8(c); return; } - // Process our control sequence. + // Process control sequences until we run out. + offset += try self.consumeAllEscapes(input[offset..]); + } + } + + /// Parses back-to-back escape sequences until none are left. + /// Returns the number of bytes consumed from the provided input. + /// + /// Expects input to start with 0x1B, use consumeUntilGround first + /// if the stream may be in the middle of an escape sequence. + fn consumeAllEscapes(self: *Self, input: []const u8) !usize { + var offset: usize = 0; + while (input[offset] == 0x1B) { self.parser.state = .escape; + self.parser.clear(); offset += 1; offset += try self.consumeUntilGround(input[offset..]); + if (offset >= input.len) return input.len; } + return offset; } /// Parses escape sequences until the parser reaches the ground state. /// Returns the number of bytes consumed from the provided input. - inline fn consumeUntilGround(self: *Self, input: []const u8) !usize { + fn consumeUntilGround(self: *Self, input: []const u8) !usize { var offset: usize = 0; while (self.parser.state != .ground) { if (offset >= input.len) return input.len; @@ -133,33 +149,42 @@ pub fn Stream(comptime Handler: type) type { pub fn next(self: *Self, c: u8) !void { // The scalar path can be responsible for decoding UTF-8. if (self.parser.state == .ground and c != 0x1B) { - const res = self.utf8decoder.next(c); - const consumed = res[1]; - if (res[0]) |codepoint| { - if (codepoint < 0xF) { + try self.nextUtf8(c); + return; + } + + try self.nextNonUtf8(c); + } + + /// Process the next byte and print as necessary. + /// + /// This assumes we're in the UTF-8 decoding state. If we may not + /// be in the UTF-8 decoding state call nextSlice or next. + fn nextUtf8(self: *Self, c: u8) !void { + assert(self.parser.state == .ground and c != 0x1B); + + const res = self.utf8decoder.next(c); + const consumed = res[1]; + if (res[0]) |codepoint| { + if (codepoint <= 0xF) { + try self.execute(@intCast(codepoint)); + } else { + try self.print(@intCast(codepoint)); + } + } + if (!consumed) { + const retry = self.utf8decoder.next(c); + // It should be impossible for the decoder + // to not consume the byte twice in a row. + assert(retry[1] == true); + if (retry[0]) |codepoint| { + if (codepoint <= 0xF) { try self.execute(@intCast(codepoint)); } else { try self.print(@intCast(codepoint)); } } - if (!consumed) { - const retry = self.utf8decoder.next(c); - // It should be impossible for the decoder - // to not consume the byte twice in a row. - assert(retry[1] == true); - if (retry[0]) |codepoint| { - if (codepoint < 0xF) { - try self.execute(@intCast(codepoint)); - } else { - try self.print(@intCast(codepoint)); - } - } - } - - return; } - - try self.nextNonUtf8(c); } /// Process the next character and call any callbacks if necessary. @@ -169,6 +194,57 @@ pub fn Stream(comptime Handler: type) type { fn nextNonUtf8(self: *Self, c: u8) !void { assert(self.parser.state != .ground or c == 0x1B); + // Fast path for ESC + if (self.parser.state == .ground and c == 0x1B) { + self.parser.state = .escape; + self.parser.clear(); + return; + } + // Fast path for CSI entry. + if (self.parser.state == .escape and c == '[') { + self.parser.state = .csi_entry; + return; + } + // Fast path for CSI params. + if (self.parser.state == .csi_param) csi_param: { + switch (c) { + // A C0 escape (yes, this is valid): + 0x00...0x0F => try self.execute(c), + // We ignore C0 escapes > 0xF since execute + // doesn't have processing for them anyway: + 0x10...0x17, 0x19, 0x1C...0x1F => {}, + // We don't currently have any handling for + // 0x18 or 0x1A, but they should still move + // the parser state to ground. + 0x18, 0x1A => self.parser.state = .ground, + // A parameter digit: + '0'...'9' => if (self.parser.params_idx < 16) { + self.parser.param_acc *= 10; + self.parser.param_acc += c - '0'; + self.parser.param_acc_idx |= 1; + }, + // A parameter separator: + ':', ';' => if (self.parser.params_idx < 16) { + self.parser.params[self.parser.params_idx] = self.parser.param_acc; + self.parser.params_idx += 1; + + self.parser.param_acc = 0; + self.parser.param_acc_idx = 0; + + // Keep track of separator state. + const sep: Parser.ParamSepState = @enumFromInt(c); + if (self.parser.params_idx == 1) self.parser.params_sep = sep; + if (self.parser.params_sep != sep) self.parser.params_sep = .mixed; + }, + // Explicitly ignored: + 0x7F => {}, + // Defer to the state machine to + // handle any other characters: + else => break :csi_param, + } + return; + } + const actions = self.parser.next(c); for (actions) |action_opt| { const action = action_opt orelse continue;