From fdc67a08f4a107fdc63a93021379fb3107762e71 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 6 Feb 2024 08:45:41 -0800 Subject: [PATCH] terminal: add tests for incomplete utf-8, fix one bug --- src/simd/vt.zig | 17 +++++++++++++++++ src/terminal/stream.zig | 37 ++++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/simd/vt.zig b/src/simd/vt.zig index 2e59957c1..dc1c0a511 100644 --- a/src/simd/vt.zig +++ b/src/simd/vt.zig @@ -119,3 +119,20 @@ test "decode invalid UTF-8" { try testing.expectEqual(@as(u32, 0xFFFD), output[5]); } + +// This is testing our current behavior so that we know we have to handle +// this case in terminal/stream.zig. If we change this behavior, we can +// remove the special handling in terminal/stream.zig. +test "decode invalid leading byte isn't consumed or replaced" { + const testing = std.testing; + + var output: [64]u32 = undefined; + + { + const str = "hello\xFF"; + try testing.expectEqual(DecodeResult{ + .consumed = 5, + .decoded = 5, + }, utf8DecodeUntilControlSeq(str, &output)); + } +} diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index 70a643bf8..5d016bffc 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -71,6 +71,7 @@ pub fn Stream(comptime Handler: type) type { // If the scalar UTF-8 decoder was in the middle of processing // a code sequence, we continue until it's not. while (self.utf8decoder.state != 0) { + if (offset >= input.len) return; try self.next(input[offset]); offset += 1; } else if (self.parser.state != .ground) { @@ -106,9 +107,7 @@ pub fn Stream(comptime Handler: type) type { // to the scalar parser. if (input[offset] != 0x1B) { const rem = input[offset..]; - for (rem) |c| { - try self.next(c); - } + for (rem) |c| try self.next(c); return; } @@ -1456,6 +1455,38 @@ test "stream: print" { try testing.expectEqual(@as(u21, 'x'), s.handler.c.?); } +test "simd: print invalid utf-8" { + const H = struct { + c: ?u21 = 0, + + pub fn print(self: *@This(), c: u21) !void { + self.c = c; + } + }; + + var s: Stream(H) = .{ .handler = .{} }; + try s.nextSlice(&.{0xFF}); + try testing.expectEqual(@as(u21, 0xFFFD), s.handler.c.?); +} + +test "simd: complete incomplete utf-8" { + const H = struct { + c: ?u21 = null, + + pub fn print(self: *@This(), c: u21) !void { + self.c = c; + } + }; + + var s: Stream(H) = .{ .handler = .{} }; + try s.nextSlice(&.{0xE0}); // 3 byte + try testing.expect(s.handler.c == null); + try s.nextSlice(&.{0xA0}); // still incomplete + try testing.expect(s.handler.c == null); + try s.nextSlice(&.{0x80}); + try testing.expectEqual(@as(u21, 0x800), s.handler.c.?); +} + test "stream: cursor right (CUF)" { const H = struct { amount: u16 = 0,