From f165d36dd226c613db99df44c29d026be1997f28 Mon Sep 17 00:00:00 2001
From: Qwerasd <qwerasd205@users.noreply.github.com>
Date: Mon, 5 Feb 2024 22:15:58 -0500
Subject: [PATCH 1/4] Add fast DFA utf-8 decoder implementation

---
 src/terminal/utf8.zig | 132 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 src/terminal/utf8.zig

diff --git a/src/terminal/utf8.zig b/src/terminal/utf8.zig
new file mode 100644
index 000000000..c45c7252c
--- /dev/null
+++ b/src/terminal/utf8.zig
@@ -0,0 +1,132 @@
+//! DFA-based non-allocating error-replacing UTF-8 decoder.
+//!
+//! This implementation is based largely on the excellent work of
+//! Bjoern Hoehrmann, with slight modifications to support error-
+//! replacement.
+//!
+//! For details on Bjoern's DFA-based UTF-8 decoder, see
+//! http://bjoern.hoehrmann.de/utf-8/decoder/dfa (MIT licensed)
+
+const std = @import("std");
+const testing = std.testing;
+
+const log = std.log.scoped(.utf8decoder);
+
+//zig fmt: off
+const char_classes = [_]u4{
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+};
+
+const transitions = [_]u8 {
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
+};
+//zig fmt: on
+
+// This is where we accumulate our current codepoint.
+var accumulator: u21 = 0;
+// The internal state of the DFA.
+const ACCEPT_STATE = 0;
+const REJECT_STATE = 12;
+var state: u8 = ACCEPT_STATE;
+
+/// Takes the next byte in the utf-8 sequence and emits a tuple of
+/// - The codepoint that was generated, if there is one.
+/// - A boolean that indicates whether the provided byte was consumed.
+///
+/// The only case where the byte is not consumed is if an ill-formed
+/// sequence is reached, in which case a replacement character will be
+/// emitted and the byte will not be consumed.
+///
+/// If the byte is not consumed, the caller is responsible for calling
+/// again with the same byte before continuing.
+pub inline fn next(byte: u8) struct { ?u21, bool } {
+    const char_class = char_classes[byte];
+
+    const initial_state = state;
+
+    if (state != ACCEPT_STATE) {
+        accumulator <<= 6;
+        accumulator |= (byte & 0x3F);
+    } else {
+        accumulator = (@as(u21, 0xFF) >> char_class) & (byte);
+    }
+
+    state = transitions[state + char_class];
+
+    if (state == ACCEPT_STATE) {
+        defer { accumulator = 0; }
+        // Emit the fully decoded codepoint.
+        return .{ accumulator, true };
+    } else if (state == REJECT_STATE) {
+        accumulator = 0;
+        state = ACCEPT_STATE;
+        // Emit a replacement character. If we rejected the first byte
+        // in a sequence, then it was consumed, otherwise it was not.
+        return .{ 0xFFFD, initial_state == ACCEPT_STATE };
+    } else {
+        // Emit nothing, we're in the middle of a sequence.
+        return .{ null, true };
+    }
+}
+
+test "ASCII" {
+    var out = std.mem.zeroes([13]u8);
+    for ("Hello, World!", 0..) |byte, i| {
+        const res = next(byte);
+        try testing.expect(res[1]);
+        if (res[0]) |codepoint| {
+            out[i] = @intCast(codepoint);
+        }
+    }
+    try testing.expect(std.mem.eql(u8, &out, "Hello, World!"));
+}
+
+test "Well formed utf-8" {
+    var out = std.mem.zeroes([4]u21);
+    var i: usize = 0;
+    // 4 bytes, 3 bytes, 2 bytes, 1 byte
+    for ("😄✤ÁA") |byte| {
+        var consumed = false;
+        while (!consumed) {
+            const res = next(byte);
+            consumed = res[1];
+            // There are no errors in this sequence, so
+            // every byte should be consumed first try.
+            try testing.expect(consumed == true);
+            if (res[0]) |codepoint| {
+                out[i] = codepoint;
+                i += 1;
+            }
+        }
+    }
+    try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0x1F604, 0x2724, 0xC1, 0x41 }));
+}
+
+test "Partially invalid utf-8" {
+    var out = std.mem.zeroes([5]u21);
+    var i: usize = 0;
+    // Illegally terminated sequence, valid sequence, illegal surrogate pair.
+    for ("\xF0\x9F😄\xED\xA0\x80") |byte| {
+        var consumed = false;
+        while (!consumed) {
+            const res = next(byte);
+            consumed = res[1];
+            if (res[0]) |codepoint| {
+                out[i] = codepoint;
+                i += 1;
+            }
+        }
+    }
+    try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0xFFFD, 0x1F604, 0xFFFD, 0xFFFD, 0xFFFD }));
+}

From 846b3421e607aaac920101be132d8f54760da948 Mon Sep 17 00:00:00 2001
From: Qwerasd <qwerasd205@users.noreply.github.com>
Date: Mon, 5 Feb 2024 23:20:47 -0500
Subject: [PATCH 2/4] terminal: replace utf8 decoding with custom decoder in
 stream.zig

(Completely removed utf8 handling from Parser.zig)
---
 src/terminal/Parser.zig                    | 168 ++++-----------------
 src/terminal/{utf8.zig => UTF8Decoder.zig} |  50 +++---
 src/terminal/parse_table.zig               |  11 --
 src/terminal/stream.zig                    |  23 +++
 4 files changed, 82 insertions(+), 170 deletions(-)
 rename src/terminal/{utf8.zig => UTF8Decoder.zig} (81%)

diff --git a/src/terminal/Parser.zig b/src/terminal/Parser.zig
index b242ba6fd..41cca7191 100644
--- a/src/terminal/Parser.zig
+++ b/src/terminal/Parser.zig
@@ -28,9 +28,6 @@ pub const State = enum {
     dcs_ignore,
     osc_string,
     sos_pm_apc_string,
-
-    // Custom states added that aren't present on vt100.net
-    utf8,
 };
 
 /// Transition action is an action that can be taken during a state
@@ -230,11 +227,6 @@ pub fn deinit(self: *Parser) void {
 /// Up to 3 actions may need to be executed -- in order -- representing
 /// the state exit, transition, and entry actions.
 pub fn next(self: *Parser, c: u8) [3]?Action {
-    // If we're processing UTF-8, we handle this manually.
-    if (self.state == .utf8) {
-        return .{ self.next_utf8(c), null, null };
-    }
-
     const effect = table[c][@intFromEnum(self.state)];
 
     // log.info("next: {x}", .{c});
@@ -282,57 +274,11 @@ pub fn next(self: *Parser, c: u8) [3]?Action {
                 },
             },
             .sos_pm_apc_string => Action{ .apc_start = {} },
-            .utf8 => utf8: {
-                // When entering the UTF8 state, we need to grab the
-                // last intermediate as our first byte and reset
-                // the intermediates, because prior actions (i.e. CSI)
-                // can pollute the intermediates and we use it to build
-                // our UTF-8 string.
-                if (self.intermediates_idx > 1) {
-                    const last = self.intermediates_idx - 1;
-                    self.intermediates[0] = self.intermediates[last];
-                    self.clear();
-                    self.intermediates_idx = 1;
-                }
-                break :utf8 null;
-            },
             else => null,
         },
     };
 }
 
-/// Processes the next byte in a UTF8 sequence. It is assumed that
-/// intermediates[0] already has the first byte of a UTF8 sequence
-/// (triggered via the state machine).
-fn next_utf8(self: *Parser, c: u8) ?Action {
-    // Collect the byte into the intermediates array
-    self.collect(c);
-
-    // Error is unreachable because the first byte comes from the state machine.
-    // If we get an error here, it is a bug in the state machine that we want
-    // to chase down.
-    const len = std.unicode.utf8ByteSequenceLength(self.intermediates[0]) catch unreachable;
-
-    // We need to collect more
-    if (self.intermediates_idx < len) return null;
-
-    // No matter what happens, we go back to ground since we know we have
-    // enough bytes for the UTF8 sequence.
-    defer {
-        self.state = .ground;
-        self.intermediates_idx = 0;
-    }
-
-    // We have enough bytes, decode!
-    const bytes = self.intermediates[0..len];
-    const rune = std.unicode.utf8Decode(bytes) catch rune: {
-        log.warn("invalid UTF-8 sequence: {any}", .{bytes});
-        break :rune 0xFFFD; // �
-    };
-
-    return Action{ .print = rune };
-}
-
 fn collect(self: *Parser, c: u8) void {
     if (self.intermediates_idx >= MAX_INTERMEDIATE) {
         log.warn("invalid intermediates count", .{});
@@ -828,91 +774,35 @@ test "osc: 112 incomplete sequence" {
     }
 }
 
-test "print: utf8 2 byte" {
-    var p = init();
-    var a: [3]?Action = undefined;
-    for ("£") |c| a = p.next(c);
-
-    try testing.expect(p.state == .ground);
-    try testing.expect(a[0].? == .print);
-    try testing.expect(a[1] == null);
-    try testing.expect(a[2] == null);
-
-    const rune = a[0].?.print;
-    try testing.expectEqual(try std.unicode.utf8Decode("£"), rune);
-}
-
-test "print: utf8 3 byte" {
-    var p = init();
-    var a: [3]?Action = undefined;
-    for ("€") |c| a = p.next(c);
-
-    try testing.expect(p.state == .ground);
-    try testing.expect(a[0].? == .print);
-    try testing.expect(a[1] == null);
-    try testing.expect(a[2] == null);
-
-    const rune = a[0].?.print;
-    try testing.expectEqual(try std.unicode.utf8Decode("€"), rune);
-}
-
-test "print: utf8 4 byte" {
-    var p = init();
-    var a: [3]?Action = undefined;
-    for ("𐍈") |c| a = p.next(c);
-
-    try testing.expect(p.state == .ground);
-    try testing.expect(a[0].? == .print);
-    try testing.expect(a[1] == null);
-    try testing.expect(a[2] == null);
-
-    const rune = a[0].?.print;
-    try testing.expectEqual(try std.unicode.utf8Decode("𐍈"), rune);
-}
-
-test "print: utf8 invalid" {
-    var p = init();
-    var a: [3]?Action = undefined;
-    for ("\xC3\x28") |c| a = p.next(c);
-
-    try testing.expect(p.state == .ground);
-    try testing.expect(a[0].? == .print);
-    try testing.expect(a[1] == null);
-    try testing.expect(a[2] == null);
-
-    const rune = a[0].?.print;
-    try testing.expectEqual(try std.unicode.utf8Decode("�"), rune);
-}
-
-test "csi followed by utf8" {
-    var p = init();
-    const prefix = &[_]u8{
-        // CSI sequence
-        0x1b, 0x5b, 0x3f, 0x32, 0x30, 0x30, 0x34, 0x64, '\r',
-
-        // UTF8 prefix (not complete)
-        0xe2,
-    };
-    for (prefix) |char| {
-        _ = p.next(char);
-    }
-
-    {
-        const a = p.next(0x94);
-        try testing.expect(p.state == .utf8);
-        try testing.expect(a[0] == null);
-        try testing.expect(a[1] == null);
-        try testing.expect(a[2] == null);
-    }
-
-    {
-        const a = p.next(0x94);
-        try testing.expect(p.state == .ground);
-        try testing.expect(a[0].? == .print);
-        try testing.expect(a[1] == null);
-        try testing.expect(a[2] == null);
-    }
-}
+// test "csi followed by utf8" {
+//     var p = init();
+//     const prefix = &[_]u8{
+//         // CSI sequence
+//         0x1b, 0x5b, 0x3f, 0x32, 0x30, 0x30, 0x34, 0x64, '\r',
+//
+//         // UTF8 prefix (not complete)
+//         0xe2,
+//     };
+//     for (prefix) |char| {
+//         _ = p.next(char);
+//     }
+//
+//     {
+//         const a = p.next(0x94);
+//         try testing.expect(p.state == .utf8);
+//         try testing.expect(a[0] == null);
+//         try testing.expect(a[1] == null);
+//         try testing.expect(a[2] == null);
+//     }
+//
+//     {
+//         const a = p.next(0x94);
+//         try testing.expect(p.state == .ground);
+//         try testing.expect(a[0].? == .print);
+//         try testing.expect(a[1] == null);
+//         try testing.expect(a[2] == null);
+//     }
+// }
 
 test "csi: too many params" {
     var p = init();
diff --git a/src/terminal/utf8.zig b/src/terminal/UTF8Decoder.zig
similarity index 81%
rename from src/terminal/utf8.zig
rename to src/terminal/UTF8Decoder.zig
index c45c7252c..c020549c7 100644
--- a/src/terminal/utf8.zig
+++ b/src/terminal/UTF8Decoder.zig
@@ -6,6 +6,7 @@
 //!
 //! For details on Bjoern's DFA-based UTF-8 decoder, see
 //! http://bjoern.hoehrmann.de/utf-8/decoder/dfa (MIT licensed)
+const UTF8Decoder = @This();
 
 const std = @import("std");
 const testing = std.testing;
@@ -33,12 +34,18 @@ const transitions = [_]u8 {
 };
 //zig fmt: on
 
-// This is where we accumulate our current codepoint.
-var accumulator: u21 = 0;
-// The internal state of the DFA.
+// DFA states
 const ACCEPT_STATE = 0;
 const REJECT_STATE = 12;
-var state: u8 = ACCEPT_STATE;
+
+// This is where we accumulate our current codepoint.
+accumulator: u21 = 0,
+// The internal state of the DFA.
+state: u8 = ACCEPT_STATE,
+
+pub fn init() UTF8Decoder {
+    return .{};
+}
 
 /// Takes the next byte in the utf-8 sequence and emits a tuple of
 /// - The codepoint that was generated, if there is one.
@@ -50,27 +57,27 @@ var state: u8 = ACCEPT_STATE;
 ///
 /// If the byte is not consumed, the caller is responsible for calling
 /// again with the same byte before continuing.
-pub inline fn next(byte: u8) struct { ?u21, bool } {
+pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } {
     const char_class = char_classes[byte];
 
-    const initial_state = state;
+    const initial_state = self.state;
 
-    if (state != ACCEPT_STATE) {
-        accumulator <<= 6;
-        accumulator |= (byte & 0x3F);
+    if (self.state != ACCEPT_STATE) {
+        self.accumulator <<= 6;
+        self.accumulator |= (byte & 0x3F);
     } else {
-        accumulator = (@as(u21, 0xFF) >> char_class) & (byte);
+        self.accumulator = (@as(u21, 0xFF) >> char_class) & (byte);
     }
 
-    state = transitions[state + char_class];
+    self.state = transitions[self.state + char_class];
 
-    if (state == ACCEPT_STATE) {
-        defer { accumulator = 0; }
+    if (self.state == ACCEPT_STATE) {
+        defer { self.accumulator = 0; }
         // Emit the fully decoded codepoint.
-        return .{ accumulator, true };
-    } else if (state == REJECT_STATE) {
-        accumulator = 0;
-        state = ACCEPT_STATE;
+        return .{ self.accumulator, true };
+    } else if (self.state == REJECT_STATE) {
+        self.accumulator = 0;
+        self.state = ACCEPT_STATE;
         // Emit a replacement character. If we rejected the first byte
         // in a sequence, then it was consumed, otherwise it was not.
         return .{ 0xFFFD, initial_state == ACCEPT_STATE };
@@ -81,9 +88,10 @@ pub inline fn next(byte: u8) struct { ?u21, bool } {
 }
 
 test "ASCII" {
+    var d = init();
     var out = std.mem.zeroes([13]u8);
     for ("Hello, World!", 0..) |byte, i| {
-        const res = next(byte);
+        const res = d.next(byte);
         try testing.expect(res[1]);
         if (res[0]) |codepoint| {
             out[i] = @intCast(codepoint);
@@ -93,13 +101,14 @@ test "ASCII" {
 }
 
 test "Well formed utf-8" {
+    var d = init();
     var out = std.mem.zeroes([4]u21);
     var i: usize = 0;
     // 4 bytes, 3 bytes, 2 bytes, 1 byte
     for ("😄✤ÁA") |byte| {
         var consumed = false;
         while (!consumed) {
-            const res = next(byte);
+            const res = d.next(byte);
             consumed = res[1];
             // There are no errors in this sequence, so
             // every byte should be consumed first try.
@@ -114,13 +123,14 @@ test "Well formed utf-8" {
 }
 
 test "Partially invalid utf-8" {
+    var d = init();
     var out = std.mem.zeroes([5]u21);
     var i: usize = 0;
     // Illegally terminated sequence, valid sequence, illegal surrogate pair.
     for ("\xF0\x9F😄\xED\xA0\x80") |byte| {
         var consumed = false;
         while (!consumed) {
-            const res = next(byte);
+            const res = d.next(byte);
             consumed = res[1];
             if (res[0]) |codepoint| {
                 out[i] = codepoint;
diff --git a/src/terminal/parse_table.zig b/src/terminal/parse_table.zig
index e7542b062..66c443783 100644
--- a/src/terminal/parse_table.zig
+++ b/src/terminal/parse_table.zig
@@ -5,10 +5,6 @@
 //! https://vt100.net/emu/dec_ansi_parser
 //! But has some modifications:
 //!
-//!   * utf8 state introduced to detect UTF8-encoded sequences. The
-//!     actual handling back OUT of the utf8 state is done manually in the
-//!     parser.
-//!
 //!   * csi_param accepts the colon character (':') since the SGR command
 //!     accepts colon as a valid parameter value.
 //!
@@ -92,18 +88,11 @@ fn genTable() Table {
 
     // ground
     {
-        const source = State.ground;
-
         // events
         single(&result, 0x19, .ground, .ground, .execute);
         range(&result, 0, 0x17, .ground, .ground, .execute);
         range(&result, 0x1C, 0x1F, .ground, .ground, .execute);
         range(&result, 0x20, 0x7F, .ground, .ground, .print);
-
-        // => utf8
-        range(&result, 0xC2, 0xDF, source, .utf8, .collect);
-        range(&result, 0xE0, 0xEF, source, .utf8, .collect);
-        range(&result, 0xF0, 0xF4, source, .utf8, .collect);
     }
 
     // escape_intermediate
diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig
index f33f52942..ad884b201 100644
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@@ -9,6 +9,7 @@ const kitty = @import("kitty.zig");
 const modes = @import("modes.zig");
 const osc = @import("osc.zig");
 const sgr = @import("sgr.zig");
+const UTF8Decoder = @import("UTF8Decoder.zig");
 const MouseShape = @import("mouse_shape.zig").MouseShape;
 
 const log = std.log.scoped(.stream);
@@ -37,6 +38,7 @@ pub fn Stream(comptime Handler: type) type {
 
         handler: Handler,
         parser: Parser = .{},
+        utf8decoder: UTF8Decoder = .{},
 
         pub fn deinit(self: *Self) void {
             self.parser.deinit();
@@ -50,6 +52,21 @@ pub fn Stream(comptime Handler: type) type {
         /// Process the next character and call any callbacks if necessary.
         pub fn next(self: *Self, c: u8) !void {
             // log.debug("char: {c}", .{c});
+            if (self.parser.state == .ground and c != 0x1B) {
+                var consumed = false;
+                while (!consumed) {
+                    const res = self.utf8decoder.next(c);
+                    consumed = res[1];
+                    if (res[0]) |codepoint| {
+                        if (codepoint < 0xF) {
+                            try self.execute(@intCast(codepoint));
+                        } else {
+                            try self.print(@intCast(codepoint));
+                        }
+                    }
+                }
+                return;
+            }
             const actions = self.parser.next(c);
             for (actions) |action_opt| {
                 const action = action_opt orelse continue;
@@ -101,6 +118,12 @@ pub fn Stream(comptime Handler: type) type {
             }
         }
 
+        pub fn print(self: *Self, c: u21) !void {
+            if (@hasDecl(T, "print")) {
+                try self.handler.print(c);
+            }
+        }
+
         pub fn execute(self: *Self, c: u8) !void {
             switch (@as(ansi.C0, @enumFromInt(c))) {
                 // We ignore SOH/STX: https://github.com/microsoft/terminal/issues/10786

From cd570890f640d57745bb1723f1738fad1d468d75 Mon Sep 17 00:00:00 2001
From: Qwerasd <qwerasd205@users.noreply.github.com>
Date: Mon, 5 Feb 2024 23:32:47 -0500
Subject: [PATCH 3/4] remove commented out test

---
 src/terminal/Parser.zig | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/src/terminal/Parser.zig b/src/terminal/Parser.zig
index 41cca7191..5746be065 100644
--- a/src/terminal/Parser.zig
+++ b/src/terminal/Parser.zig
@@ -774,36 +774,6 @@ test "osc: 112 incomplete sequence" {
     }
 }
 
-// test "csi followed by utf8" {
-//     var p = init();
-//     const prefix = &[_]u8{
-//         // CSI sequence
-//         0x1b, 0x5b, 0x3f, 0x32, 0x30, 0x30, 0x34, 0x64, '\r',
-//
-//         // UTF8 prefix (not complete)
-//         0xe2,
-//     };
-//     for (prefix) |char| {
-//         _ = p.next(char);
-//     }
-//
-//     {
-//         const a = p.next(0x94);
-//         try testing.expect(p.state == .utf8);
-//         try testing.expect(a[0] == null);
-//         try testing.expect(a[1] == null);
-//         try testing.expect(a[2] == null);
-//     }
-//
-//     {
-//         const a = p.next(0x94);
-//         try testing.expect(p.state == .ground);
-//         try testing.expect(a[0].? == .print);
-//         try testing.expect(a[1] == null);
-//         try testing.expect(a[2] == null);
-//     }
-// }
-
 test "csi: too many params" {
     var p = init();
     _ = p.next(0x1B);

From 0c2a87e5fb8b2d2e0e581b7f8b80a7ee806cfb41 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Mon, 5 Feb 2024 21:20:20 -0800
Subject: [PATCH 4/4] terminal: small stylistic tweaks

---
 src/terminal/UTF8Decoder.zig | 26 +++++++++++++-------------
 src/terminal/stream.zig      |  1 +
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/terminal/UTF8Decoder.zig b/src/terminal/UTF8Decoder.zig
index c020549c7..6bb0d9815 100644
--- a/src/terminal/UTF8Decoder.zig
+++ b/src/terminal/UTF8Decoder.zig
@@ -13,7 +13,7 @@ const testing = std.testing;
 
 const log = std.log.scoped(.utf8decoder);
 
-//zig fmt: off
+// zig fmt: off
 const char_classes = [_]u4{
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -32,7 +32,7 @@ const transitions = [_]u8 {
   12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
   12,36,12,12,12,12,12,12,12,12,12,12,
 };
-//zig fmt: on
+// zig fmt: on
 
 // DFA states
 const ACCEPT_STATE = 0;
@@ -43,10 +43,6 @@ accumulator: u21 = 0,
 // The internal state of the DFA.
 state: u8 = ACCEPT_STATE,
 
-pub fn init() UTF8Decoder {
-    return .{};
-}
-
 /// Takes the next byte in the utf-8 sequence and emits a tuple of
 /// - The codepoint that was generated, if there is one.
 /// - A boolean that indicates whether the provided byte was consumed.
@@ -72,7 +68,8 @@ pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } {
     self.state = transitions[self.state + char_class];
 
     if (self.state == ACCEPT_STATE) {
-        defer { self.accumulator = 0; }
+        defer self.accumulator = 0;
+
         // Emit the fully decoded codepoint.
         return .{ self.accumulator, true };
     } else if (self.state == REJECT_STATE) {
@@ -88,8 +85,8 @@ pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } {
 }
 
 test "ASCII" {
-    var d = init();
-    var out = std.mem.zeroes([13]u8);
+    var d: UTF8Decoder = .{};
+    var out: [13]u8 = undefined;
     for ("Hello, World!", 0..) |byte, i| {
         const res = d.next(byte);
         try testing.expect(res[1]);
@@ -97,12 +94,13 @@ test "ASCII" {
             out[i] = @intCast(codepoint);
         }
     }
+
     try testing.expect(std.mem.eql(u8, &out, "Hello, World!"));
 }
 
 test "Well formed utf-8" {
-    var d = init();
-    var out = std.mem.zeroes([4]u21);
+    var d: UTF8Decoder = .{};
+    var out: [4]u21 = undefined;
     var i: usize = 0;
     // 4 bytes, 3 bytes, 2 bytes, 1 byte
     for ("😄✤ÁA") |byte| {
@@ -119,12 +117,13 @@ test "Well formed utf-8" {
             }
         }
     }
+
     try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0x1F604, 0x2724, 0xC1, 0x41 }));
 }
 
 test "Partially invalid utf-8" {
-    var d = init();
-    var out = std.mem.zeroes([5]u21);
+    var d: UTF8Decoder = .{};
+    var out: [5]u21 = undefined;
     var i: usize = 0;
     // Illegally terminated sequence, valid sequence, illegal surrogate pair.
     for ("\xF0\x9F😄\xED\xA0\x80") |byte| {
@@ -138,5 +137,6 @@ test "Partially invalid utf-8" {
             }
         }
     }
+
     try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0xFFFD, 0x1F604, 0xFFFD, 0xFFFD, 0xFFFD }));
 }
diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig
index ad884b201..fe4c5c53a 100644
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@@ -67,6 +67,7 @@ pub fn Stream(comptime Handler: type) type {
                 }
                 return;
             }
+
             const actions = self.parser.next(c);
             for (actions) |action_opt| {
                 const action = action_opt orelse continue;