From f8c544c11978dbe34b70518f7adc66fc40f94821 Mon Sep 17 00:00:00 2001
From: Qwerasd <qwerasd205@users.noreply.github.com>
Date: Wed, 7 Feb 2024 00:12:37 -0500
Subject: [PATCH] terminal: stream/parser changes

---
 src/terminal/Parser.zig |  4 +--
 src/terminal/stream.zig | 57 ++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/src/terminal/Parser.zig b/src/terminal/Parser.zig
index 5746be065..bfb25f2fa 100644
--- a/src/terminal/Parser.zig
+++ b/src/terminal/Parser.zig
@@ -185,7 +185,7 @@ pub const Action = union(enum) {
 
 /// Keeps track of the parameter sep used for CSI params. We allow colons
 /// to be used ONLY by the 'm' CSI action.
-const ParamSepState = enum(u8) {
+pub const ParamSepState = enum(u8) {
     none = 0,
     semicolon = ';',
     colon = ':',
@@ -279,7 +279,7 @@ pub fn next(self: *Parser, c: u8) [3]?Action {
     };
 }
 
-fn collect(self: *Parser, c: u8) void {
+pub fn collect(self: *Parser, c: u8) void {
     if (self.intermediates_idx >= MAX_INTERMEDIATE) {
         log.warn("invalid intermediates count", .{});
         return;
diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig
index 982a8ab20..0362780c2 100644
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@@ -74,17 +74,14 @@ pub fn Stream(comptime Handler: type) type {
                 if (offset >= input.len) return;
                 try self.next(input[offset]);
                 offset += 1;
-            } else if (self.parser.state != .ground) {
-                // If we're not in the ground state then we process until
-                // we are. This can happen if the last chunk of input put us
-                // in the middle of a control sequence.
-                for (input[offset..]) |single| {
-                    try self.nextNonUtf8(single);
-                    offset += 1;
-                    if (self.parser.state == .ground) break;
-                }
             }
 
+            // If we're not in the ground state then we process until
+            // we are. This can happen if the last chunk of input put us
+            // in the middle of a control sequence.
+            offset += try self.consumeUntilGround(input[offset..]);
+            if (offset >= input.len) return;
+
             // If we're in the ground state then we can use SIMD to process
             // input until we see an ESC (0x1B), since all other characters
             // up to that point are just UTF-8.
@@ -112,26 +109,46 @@ pub fn Stream(comptime Handler: type) type {
                 }
 
                 // Process our control sequence.
-                for (input[offset..]) |single| {
-                    try self.nextNonUtf8(single);
-                    offset += 1;
-                    if (self.parser.state == .ground) break;
-                }
+                self.parser.state = .escape;
+                offset += 1;
+                offset += try self.consumeUntilGround(input[offset..]);
             }
         }
 
+        /// Parses escape sequences until the parser reaches the ground state.
+        /// Returns the number of bytes consumed from the provided input.
+        inline fn consumeUntilGround(self: *Self, input: []const u8) !usize {
+            var offset: usize = 0;
+            while (self.parser.state != .ground) {
+                if (offset >= input.len) return input.len;
+                try self.nextNonUtf8(input[offset]);
+                offset += 1;
+            }
+            return offset;
+        }
+
         /// Like nextSlice but takes one byte and is necessarilly a scalar
         /// operation that can't use SIMD. Prefer nextSlice if you can and
         /// try to get multiple bytes at once.
         pub fn next(self: *Self, c: u8) !void {
             // The scalar path can be responsible for decoding UTF-8.
             if (self.parser.state == .ground and c != 0x1B) {
-                var consumed = false;
-                while (!consumed) {
-                    const res = self.utf8decoder.next(c);
-                    consumed = res[1];
-                    if (res[0]) |codepoint| {
-                        if (codepoint <= 0xF) {
+                const res = self.utf8decoder.next(c);
+                const consumed = res[1];
+                if (res[0]) |codepoint| {
+                    if (codepoint < 0xF) {
+                        try self.execute(@intCast(codepoint));
+                    } else {
+                        try self.print(@intCast(codepoint));
+                    }
+                }
+                if (!consumed) {
+                    const retry = self.utf8decoder.next(c);
+                    // It should be impossible for the decoder
+                    // to not consume the byte twice in a row.
+                    assert(retry[1] == true);
+                    if (retry[0]) |codepoint| {
+                        if (codepoint < 0xF) {
                             try self.execute(@intCast(codepoint));
                         } else {
                             try self.print(@intCast(codepoint));