From f8c544c11978dbe34b70518f7adc66fc40f94821 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Wed, 7 Feb 2024 00:12:37 -0500 Subject: [PATCH] terminal: stream/parser changes --- src/terminal/Parser.zig | 4 +-- src/terminal/stream.zig | 57 ++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/terminal/Parser.zig b/src/terminal/Parser.zig index 5746be065..bfb25f2fa 100644 --- a/src/terminal/Parser.zig +++ b/src/terminal/Parser.zig @@ -185,7 +185,7 @@ pub const Action = union(enum) { /// Keeps track of the parameter sep used for CSI params. We allow colons /// to be used ONLY by the 'm' CSI action. -const ParamSepState = enum(u8) { +pub const ParamSepState = enum(u8) { none = 0, semicolon = ';', colon = ':', @@ -279,7 +279,7 @@ pub fn next(self: *Parser, c: u8) [3]?Action { }; } -fn collect(self: *Parser, c: u8) void { +pub fn collect(self: *Parser, c: u8) void { if (self.intermediates_idx >= MAX_INTERMEDIATE) { log.warn("invalid intermediates count", .{}); return; diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index 982a8ab20..0362780c2 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -74,17 +74,14 @@ pub fn Stream(comptime Handler: type) type { if (offset >= input.len) return; try self.next(input[offset]); offset += 1; - } else if (self.parser.state != .ground) { - // If we're not in the ground state then we process until - // we are. This can happen if the last chunk of input put us - // in the middle of a control sequence. - for (input[offset..]) |single| { - try self.nextNonUtf8(single); - offset += 1; - if (self.parser.state == .ground) break; - } } + // If we're not in the ground state then we process until + // we are. This can happen if the last chunk of input put us + // in the middle of a control sequence. + offset += try self.consumeUntilGround(input[offset..]); + if (offset >= input.len) return; + // If we're in the ground state then we can use SIMD to process // input until we see an ESC (0x1B), since all other characters // up to that point are just UTF-8. @@ -112,26 +109,46 @@ pub fn Stream(comptime Handler: type) type { } // Process our control sequence. - for (input[offset..]) |single| { - try self.nextNonUtf8(single); - offset += 1; - if (self.parser.state == .ground) break; - } + self.parser.state = .escape; + offset += 1; + offset += try self.consumeUntilGround(input[offset..]); } } + /// Parses escape sequences until the parser reaches the ground state. + /// Returns the number of bytes consumed from the provided input. + inline fn consumeUntilGround(self: *Self, input: []const u8) !usize { + var offset: usize = 0; + while (self.parser.state != .ground) { + if (offset >= input.len) return input.len; + try self.nextNonUtf8(input[offset]); + offset += 1; + } + return offset; + } + /// Like nextSlice but takes one byte and is necessarilly a scalar /// operation that can't use SIMD. Prefer nextSlice if you can and /// try to get multiple bytes at once. pub fn next(self: *Self, c: u8) !void { // The scalar path can be responsible for decoding UTF-8. if (self.parser.state == .ground and c != 0x1B) { - var consumed = false; - while (!consumed) { - const res = self.utf8decoder.next(c); - consumed = res[1]; - if (res[0]) |codepoint| { - if (codepoint <= 0xF) { + const res = self.utf8decoder.next(c); + const consumed = res[1]; + if (res[0]) |codepoint| { + if (codepoint < 0xF) { + try self.execute(@intCast(codepoint)); + } else { + try self.print(@intCast(codepoint)); + } + } + if (!consumed) { + const retry = self.utf8decoder.next(c); + // It should be impossible for the decoder + // to not consume the byte twice in a row. + assert(retry[1] == true); + if (retry[0]) |codepoint| { + if (codepoint < 0xF) { try self.execute(@intCast(codepoint)); } else { try self.print(@intCast(codepoint));