diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index bb1c7b74e..51ff1ee73 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -73,9 +73,11 @@ pub fn Stream(comptime Handler: type) type { var offset: usize = 0; - // If we have a partial UTF-8 sequence then we process manually. - if (self.partial_utf8_len > 0) { - offset += try self.completePartialUtf8(input); + // If the scalar UTF-8 decoder was in the middle of processing + // a code sequence, we continue until it's not. + while (self.utf8decoder.state != 0) { + try self.next(input[offset]); + offset += 1; } else if (self.parser.state != .ground) { // If we're not in the ground state then we process until // we are. This can happen if the last chunk of input put us @@ -105,13 +107,14 @@ pub fn Stream(comptime Handler: type) type { if (offset >= input.len) return; // If our offset is NOT an escape then we must have a - // partial UTF-8 sequence. In that case, we save it and - // return. + // partial UTF-8 sequence. In that case, we pass it off + // to the scalar parser. if (input[offset] != 0x1B) { const rem = input[offset..]; assert(rem.len <= self.partial_utf8.len); - @memcpy(self.partial_utf8[0..rem.len], rem); - self.partial_utf8_len = @intCast(rem.len); + for (rem) |c| { + try self.next(c); + } return; } @@ -124,53 +127,6 @@ pub fn Stream(comptime Handler: type) type { } } - // Complete a partial UTF-8 sequence from a prior input chunk. - // This processes the UTF-8 sequence and then returns the number - // of bytes consumed from the input. - fn completePartialUtf8(self: *Self, input: []const u8) !usize { - assert(self.partial_utf8_len > 0); - assert(self.parser.state == .ground); - - // This cannot fail because the nature of partial utf8 - // existing means we successfully processed it last time. - const len = std.unicode.utf8ByteSequenceLength(self.partial_utf8[0]) catch - unreachable; - - // This is the length we need in the input in addition to - // our partial_utf8 to complete the sequence. - const input_len = len - self.partial_utf8_len; - - // If we STILL don't have enough bytes, then we copy and continue. - // This is a really bizarre and stupid program thats running to - // send us incomplete UTF-8 sequences over multiple write() calls. - if (input_len > input.len) { - @memcpy( - self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input.len], - input, - ); - self.partial_utf8_len += @intCast(input.len); - return input.len; - } - - // Process the complete UTF-8 sequence. - @memcpy( - self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input_len], - input[0..input_len], - ); - const cp = cp: { - if (std.unicode.utf8Decode(self.partial_utf8[0..len])) |cp| { - break :cp cp; - } else |err| { - log.warn("invalid UTF-8, ignoring err={}", .{err}); - break :cp 0xFFFD; // replacement character - } - }; - - self.partial_utf8_len = 0; - try self.print(cp); - return input_len; - } - /// Like nextSlice but takes one byte and is necessarilly a scalar /// operation that can't use SIMD. Prefer nextSlice if you can and /// try to get multiple bytes at once.