invalid utf8 turns into �

This commit is contained in:
Mitchell Hashimoto
2022-05-16 09:34:34 -07:00
parent ead4cec159
commit 421a1c3039

View File

@ -225,9 +225,9 @@ fn next_utf8(self: *Parser, c: u8) ?Action {
// We have enough bytes, decode! // We have enough bytes, decode!
const bytes = self.intermediates[0..len]; const bytes = self.intermediates[0..len];
const rune = std.unicode.utf8Decode(bytes) catch { const rune = std.unicode.utf8Decode(bytes) catch rune: {
log.warn("invalid UTF-8 sequence: {any}", .{bytes}); log.warn("invalid UTF-8 sequence: {any}", .{bytes});
return null; break :rune 0xFFFD; // <EFBFBD>
}; };
return Action{ .print = rune }; return Action{ .print = rune };
@ -534,7 +534,10 @@ test "print: utf8 invalid" {
for ("\xC3\x28") |c| a = p.next(c); for ("\xC3\x28") |c| a = p.next(c);
try testing.expect(p.state == .ground); try testing.expect(p.state == .ground);
try testing.expect(a[0] == null); try testing.expect(a[0].? == .print);
try testing.expect(a[1] == null); try testing.expect(a[1] == null);
try testing.expect(a[2] == null); try testing.expect(a[2] == null);
const rune = a[0].?.print;
try testing.expectEqual(try std.unicode.utf8Decode("<EFBFBD>"), rune);
} }