terminal/stream: Added ESC parsing fast tracks

2025-07-16 16:56:09 +03:00 · 2024-02-08 21:49:58 -05:00
parent f8c544c119
commit 68c0813397
3 changed files with 105 additions and 29 deletions
--- a/src/bench/stream.sh
+++ b/src/bench/stream.sh
@ -17,8 +17,8 @@ SIZE="25000000"
 # Generate the benchmark input ahead of time so it's not included in the time.
 ./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
-# Uncomment to instead use the contents of `stream.txt` as input.
+# Uncomment to instead use the contents of `stream.txt` as input. (Ignores SIZE)
-# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
+# echo $(cat ./stream.txt) > /tmp/ghostty_bench_data
 hyperfine \
  --warmup 10 \
--- a/src/terminal/Parser.zig
+++ b/src/terminal/Parser.zig
@ -390,7 +390,7 @@ fn doAction(self: *Parser, action: TransitionAction, c: u8) ?Action {
    };
 }
-fn clear(self: *Parser) void {
+pub fn clear(self: *Parser) void {
    self.intermediates_idx = 0;
    self.params_idx = 0;
    self.params_sep = .none;
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@ -72,7 +72,7 @@ pub fn Stream(comptime Handler: type) type {
            // a code sequence, we continue until it's not.
            while (self.utf8decoder.state != 0) {
                if (offset >= input.len) return;
-                try self.next(input[offset]);
+                try self.nextUtf8(input[offset]);
                offset += 1;
            }
@ -80,6 +80,7 @@ pub fn Stream(comptime Handler: type) type {
            // we are. This can happen if the last chunk of input put us
            // in the middle of a control sequence.
            offset += try self.consumeUntilGround(input[offset..]);
            offset += try self.consumeAllEscapes(input[offset..]);
            if (offset >= input.len) return;
            // If we're in the ground state then we can use SIMD to process
@ -94,9 +95,9 @@ pub fn Stream(comptime Handler: type) type {
                        try self.print(@intCast(cp));
                    }
                }
                // Consume the bytes we just processed.
                offset += res.consumed;
                if (offset >= input.len) return;
                // If our offset is NOT an escape then we must have a
@ -104,20 +105,35 @@ pub fn Stream(comptime Handler: type) type {
                // to the scalar parser.
                if (input[offset] != 0x1B) {
                    const rem = input[offset..];
-                    for (rem) |c| try self.next(c);
+                    for (rem) |c| try self.nextUtf8(c);
                    return;
                }
-                // Process our control sequence.
+                // Process control sequences until we run out.
                offset += try self.consumeAllEscapes(input[offset..]);
            }
        }
        /// Parses back-to-back escape sequences until none are left.
        /// Returns the number of bytes consumed from the provided input.
        ///
        /// Expects input to start with 0x1B, use consumeUntilGround first
        /// if the stream may be in the middle of an escape sequence.
        fn consumeAllEscapes(self: *Self, input: []const u8) !usize {
            var offset: usize = 0;
            while (input[offset] == 0x1B) {
                self.parser.state = .escape;
                self.parser.clear();
                offset += 1;
                offset += try self.consumeUntilGround(input[offset..]);
                if (offset >= input.len) return input.len;
            }
            return offset;
        }
        /// Parses escape sequences until the parser reaches the ground state.
        /// Returns the number of bytes consumed from the provided input.
-        inline fn consumeUntilGround(self: *Self, input: []const u8) !usize {
+        fn consumeUntilGround(self: *Self, input: []const u8) !usize {
            var offset: usize = 0;
            while (self.parser.state != .ground) {
                if (offset >= input.len) return input.len;
@ -133,33 +149,42 @@ pub fn Stream(comptime Handler: type) type {
        pub fn next(self: *Self, c: u8) !void {
            // The scalar path can be responsible for decoding UTF-8.
            if (self.parser.state == .ground and c != 0x1B) {
-                const res = self.utf8decoder.next(c);
+                try self.nextUtf8(c);
-                const consumed = res[1];
+                return;
-                if (res[0]) |codepoint| {
+            }
-                    if (codepoint < 0xF) {
+
            try self.nextNonUtf8(c);
        }
        /// Process the next byte and print as necessary.
        ///
        /// This assumes we're in the UTF-8 decoding state. If we may not
        /// be in the UTF-8 decoding state call nextSlice or next.
        fn nextUtf8(self: *Self, c: u8) !void {
            assert(self.parser.state == .ground and c != 0x1B);
            const res = self.utf8decoder.next(c);
            const consumed = res[1];
            if (res[0]) |codepoint| {
                if (codepoint <= 0xF) {
                    try self.execute(@intCast(codepoint));
                } else {
                    try self.print(@intCast(codepoint));
                }
            }
            if (!consumed) {
                const retry = self.utf8decoder.next(c);
                // It should be impossible for the decoder
                // to not consume the byte twice in a row.
                assert(retry[1] == true);
                if (retry[0]) |codepoint| {
                    if (codepoint <= 0xF) {
                        try self.execute(@intCast(codepoint));
                    } else {
                        try self.print(@intCast(codepoint));
                    }
                }
                if (!consumed) {
                    const retry = self.utf8decoder.next(c);
                    // It should be impossible for the decoder
                    // to not consume the byte twice in a row.
                    assert(retry[1] == true);
                    if (retry[0]) |codepoint| {
                        if (codepoint < 0xF) {
                            try self.execute(@intCast(codepoint));
                        } else {
                            try self.print(@intCast(codepoint));
                        }
                    }
                }
                return;
            }
            try self.nextNonUtf8(c);
        }
        /// Process the next character and call any callbacks if necessary.
@ -169,6 +194,57 @@ pub fn Stream(comptime Handler: type) type {
        fn nextNonUtf8(self: *Self, c: u8) !void {
            assert(self.parser.state != .ground or c == 0x1B);
            // Fast path for ESC
            if (self.parser.state == .ground and c == 0x1B) {
                self.parser.state = .escape;
                self.parser.clear();
                return;
            }
            // Fast path for CSI entry.
            if (self.parser.state == .escape and c == '[') {
                self.parser.state = .csi_entry;
                return;
            }
            // Fast path for CSI params.
            if (self.parser.state == .csi_param) csi_param: {
                switch (c) {
                    // A C0 escape (yes, this is valid):
                    0x00...0x0F => try self.execute(c),
                    // We ignore C0 escapes > 0xF since execute
                    // doesn't have processing for them anyway:
                    0x10...0x17, 0x19, 0x1C...0x1F => {},
                    // We don't currently have any handling for
                    // 0x18 or 0x1A, but they should still move
                    // the parser state to ground.
                    0x18, 0x1A => self.parser.state = .ground,
                    // A parameter digit:
                    '0'...'9' => if (self.parser.params_idx < 16) {
                        self.parser.param_acc *= 10;
                        self.parser.param_acc += c - '0';
                        self.parser.param_acc_idx |= 1;
                    },
                    // A parameter separator:
                    ':', ';' => if (self.parser.params_idx < 16) {
                        self.parser.params[self.parser.params_idx] = self.parser.param_acc;
                        self.parser.params_idx += 1;
                        self.parser.param_acc = 0;
                        self.parser.param_acc_idx = 0;
                        // Keep track of separator state.
                        const sep: Parser.ParamSepState = @enumFromInt(c);
                        if (self.parser.params_idx == 1) self.parser.params_sep = sep;
                        if (self.parser.params_sep != sep) self.parser.params_sep = .mixed;
                    },
                    // Explicitly ignored:
                    0x7F => {},
                    // Defer to the state machine to
                    // handle any other characters:
                    else => break :csi_param,
                }
                return;
            }
            const actions = self.parser.next(c);
            for (actions) |action_opt| {
                const action = action_opt orelse continue;