terminal/stream: Added ESC parsing fast tracks

This commit is contained in:
Qwerasd
2024-02-08 21:49:58 -05:00
parent f8c544c119
commit 68c0813397
3 changed files with 105 additions and 29 deletions

View File

@ -17,8 +17,8 @@ SIZE="25000000"
# Generate the benchmark input ahead of time so it's not included in the time. # Generate the benchmark input ahead of time so it's not included in the time.
./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
# Uncomment to instead use the contents of `stream.txt` as input. # Uncomment to instead use the contents of `stream.txt` as input. (Ignores SIZE)
# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data # echo $(cat ./stream.txt) > /tmp/ghostty_bench_data
hyperfine \ hyperfine \
--warmup 10 \ --warmup 10 \

View File

@ -390,7 +390,7 @@ fn doAction(self: *Parser, action: TransitionAction, c: u8) ?Action {
}; };
} }
fn clear(self: *Parser) void { pub fn clear(self: *Parser) void {
self.intermediates_idx = 0; self.intermediates_idx = 0;
self.params_idx = 0; self.params_idx = 0;
self.params_sep = .none; self.params_sep = .none;

View File

@ -72,7 +72,7 @@ pub fn Stream(comptime Handler: type) type {
// a code sequence, we continue until it's not. // a code sequence, we continue until it's not.
while (self.utf8decoder.state != 0) { while (self.utf8decoder.state != 0) {
if (offset >= input.len) return; if (offset >= input.len) return;
try self.next(input[offset]); try self.nextUtf8(input[offset]);
offset += 1; offset += 1;
} }
@ -80,6 +80,7 @@ pub fn Stream(comptime Handler: type) type {
// we are. This can happen if the last chunk of input put us // we are. This can happen if the last chunk of input put us
// in the middle of a control sequence. // in the middle of a control sequence.
offset += try self.consumeUntilGround(input[offset..]); offset += try self.consumeUntilGround(input[offset..]);
offset += try self.consumeAllEscapes(input[offset..]);
if (offset >= input.len) return; if (offset >= input.len) return;
// If we're in the ground state then we can use SIMD to process // If we're in the ground state then we can use SIMD to process
@ -94,9 +95,9 @@ pub fn Stream(comptime Handler: type) type {
try self.print(@intCast(cp)); try self.print(@intCast(cp));
} }
} }
// Consume the bytes we just processed. // Consume the bytes we just processed.
offset += res.consumed; offset += res.consumed;
if (offset >= input.len) return; if (offset >= input.len) return;
// If our offset is NOT an escape then we must have a // If our offset is NOT an escape then we must have a
@ -104,20 +105,35 @@ pub fn Stream(comptime Handler: type) type {
// to the scalar parser. // to the scalar parser.
if (input[offset] != 0x1B) { if (input[offset] != 0x1B) {
const rem = input[offset..]; const rem = input[offset..];
for (rem) |c| try self.next(c); for (rem) |c| try self.nextUtf8(c);
return; return;
} }
// Process our control sequence. // Process control sequences until we run out.
offset += try self.consumeAllEscapes(input[offset..]);
}
}
/// Parses back-to-back escape sequences until none are left.
/// Returns the number of bytes consumed from the provided input.
///
/// Expects input to start with 0x1B, use consumeUntilGround first
/// if the stream may be in the middle of an escape sequence.
fn consumeAllEscapes(self: *Self, input: []const u8) !usize {
var offset: usize = 0;
while (input[offset] == 0x1B) {
self.parser.state = .escape; self.parser.state = .escape;
self.parser.clear();
offset += 1; offset += 1;
offset += try self.consumeUntilGround(input[offset..]); offset += try self.consumeUntilGround(input[offset..]);
if (offset >= input.len) return input.len;
} }
return offset;
} }
/// Parses escape sequences until the parser reaches the ground state. /// Parses escape sequences until the parser reaches the ground state.
/// Returns the number of bytes consumed from the provided input. /// Returns the number of bytes consumed from the provided input.
inline fn consumeUntilGround(self: *Self, input: []const u8) !usize { fn consumeUntilGround(self: *Self, input: []const u8) !usize {
var offset: usize = 0; var offset: usize = 0;
while (self.parser.state != .ground) { while (self.parser.state != .ground) {
if (offset >= input.len) return input.len; if (offset >= input.len) return input.len;
@ -133,33 +149,42 @@ pub fn Stream(comptime Handler: type) type {
pub fn next(self: *Self, c: u8) !void { pub fn next(self: *Self, c: u8) !void {
// The scalar path can be responsible for decoding UTF-8. // The scalar path can be responsible for decoding UTF-8.
if (self.parser.state == .ground and c != 0x1B) { if (self.parser.state == .ground and c != 0x1B) {
const res = self.utf8decoder.next(c); try self.nextUtf8(c);
const consumed = res[1]; return;
if (res[0]) |codepoint| { }
if (codepoint < 0xF) {
try self.nextNonUtf8(c);
}
/// Process the next byte and print as necessary.
///
/// This assumes we're in the UTF-8 decoding state. If we may not
/// be in the UTF-8 decoding state call nextSlice or next.
fn nextUtf8(self: *Self, c: u8) !void {
assert(self.parser.state == .ground and c != 0x1B);
const res = self.utf8decoder.next(c);
const consumed = res[1];
if (res[0]) |codepoint| {
if (codepoint <= 0xF) {
try self.execute(@intCast(codepoint));
} else {
try self.print(@intCast(codepoint));
}
}
if (!consumed) {
const retry = self.utf8decoder.next(c);
// It should be impossible for the decoder
// to not consume the byte twice in a row.
assert(retry[1] == true);
if (retry[0]) |codepoint| {
if (codepoint <= 0xF) {
try self.execute(@intCast(codepoint)); try self.execute(@intCast(codepoint));
} else { } else {
try self.print(@intCast(codepoint)); try self.print(@intCast(codepoint));
} }
} }
if (!consumed) {
const retry = self.utf8decoder.next(c);
// It should be impossible for the decoder
// to not consume the byte twice in a row.
assert(retry[1] == true);
if (retry[0]) |codepoint| {
if (codepoint < 0xF) {
try self.execute(@intCast(codepoint));
} else {
try self.print(@intCast(codepoint));
}
}
}
return;
} }
try self.nextNonUtf8(c);
} }
/// Process the next character and call any callbacks if necessary. /// Process the next character and call any callbacks if necessary.
@ -169,6 +194,57 @@ pub fn Stream(comptime Handler: type) type {
fn nextNonUtf8(self: *Self, c: u8) !void { fn nextNonUtf8(self: *Self, c: u8) !void {
assert(self.parser.state != .ground or c == 0x1B); assert(self.parser.state != .ground or c == 0x1B);
// Fast path for ESC
if (self.parser.state == .ground and c == 0x1B) {
self.parser.state = .escape;
self.parser.clear();
return;
}
// Fast path for CSI entry.
if (self.parser.state == .escape and c == '[') {
self.parser.state = .csi_entry;
return;
}
// Fast path for CSI params.
if (self.parser.state == .csi_param) csi_param: {
switch (c) {
// A C0 escape (yes, this is valid):
0x00...0x0F => try self.execute(c),
// We ignore C0 escapes > 0xF since execute
// doesn't have processing for them anyway:
0x10...0x17, 0x19, 0x1C...0x1F => {},
// We don't currently have any handling for
// 0x18 or 0x1A, but they should still move
// the parser state to ground.
0x18, 0x1A => self.parser.state = .ground,
// A parameter digit:
'0'...'9' => if (self.parser.params_idx < 16) {
self.parser.param_acc *= 10;
self.parser.param_acc += c - '0';
self.parser.param_acc_idx |= 1;
},
// A parameter separator:
':', ';' => if (self.parser.params_idx < 16) {
self.parser.params[self.parser.params_idx] = self.parser.param_acc;
self.parser.params_idx += 1;
self.parser.param_acc = 0;
self.parser.param_acc_idx = 0;
// Keep track of separator state.
const sep: Parser.ParamSepState = @enumFromInt(c);
if (self.parser.params_idx == 1) self.parser.params_sep = sep;
if (self.parser.params_sep != sep) self.parser.params_sep = .mixed;
},
// Explicitly ignored:
0x7F => {},
// Defer to the state machine to
// handle any other characters:
else => break :csi_param,
}
return;
}
const actions = self.parser.next(c); const actions = self.parser.next(c);
for (actions) |action_opt| { for (actions) |action_opt| {
const action = action_opt orelse continue; const action = action_opt orelse continue;