mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 16:26:08 +03:00
Merge pull request #1475 from qwerasd205/fix-invalid-boundary-crash
Fix boundary utf-8 decoding crash
This commit is contained in:
@ -119,3 +119,20 @@ test "decode invalid UTF-8" {
|
|||||||
|
|
||||||
try testing.expectEqual(@as(u32, 0xFFFD), output[5]);
|
try testing.expectEqual(@as(u32, 0xFFFD), output[5]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is testing our current behavior so that we know we have to handle
|
||||||
|
// this case in terminal/stream.zig. If we change this behavior, we can
|
||||||
|
// remove the special handling in terminal/stream.zig.
|
||||||
|
test "decode invalid leading byte isn't consumed or replaced" {
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
var output: [64]u32 = undefined;
|
||||||
|
|
||||||
|
{
|
||||||
|
const str = "hello\xFF";
|
||||||
|
try testing.expectEqual(DecodeResult{
|
||||||
|
.consumed = 5,
|
||||||
|
.decoded = 5,
|
||||||
|
}, utf8DecodeUntilControlSeq(str, &output));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -42,11 +42,6 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
parser: Parser = .{},
|
parser: Parser = .{},
|
||||||
utf8decoder: UTF8Decoder = .{},
|
utf8decoder: UTF8Decoder = .{},
|
||||||
|
|
||||||
/// Keep track of any partial UTF-8 sequences that we need to
|
|
||||||
/// process in the next call to nextAssumeUtf8.
|
|
||||||
partial_utf8: [4]u8 = undefined,
|
|
||||||
partial_utf8_len: u3 = 0,
|
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
pub fn deinit(self: *Self) void {
|
||||||
self.parser.deinit();
|
self.parser.deinit();
|
||||||
}
|
}
|
||||||
@ -73,9 +68,12 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
|
|
||||||
var offset: usize = 0;
|
var offset: usize = 0;
|
||||||
|
|
||||||
// If we have a partial UTF-8 sequence then we process manually.
|
// If the scalar UTF-8 decoder was in the middle of processing
|
||||||
if (self.partial_utf8_len > 0) {
|
// a code sequence, we continue until it's not.
|
||||||
offset += try self.completePartialUtf8(input);
|
while (self.utf8decoder.state != 0) {
|
||||||
|
if (offset >= input.len) return;
|
||||||
|
try self.next(input[offset]);
|
||||||
|
offset += 1;
|
||||||
} else if (self.parser.state != .ground) {
|
} else if (self.parser.state != .ground) {
|
||||||
// If we're not in the ground state then we process until
|
// If we're not in the ground state then we process until
|
||||||
// we are. This can happen if the last chunk of input put us
|
// we are. This can happen if the last chunk of input put us
|
||||||
@ -105,13 +103,11 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
if (offset >= input.len) return;
|
if (offset >= input.len) return;
|
||||||
|
|
||||||
// If our offset is NOT an escape then we must have a
|
// If our offset is NOT an escape then we must have a
|
||||||
// partial UTF-8 sequence. In that case, we save it and
|
// partial UTF-8 sequence. In that case, we pass it off
|
||||||
// return.
|
// to the scalar parser.
|
||||||
if (input[offset] != 0x1B) {
|
if (input[offset] != 0x1B) {
|
||||||
const rem = input[offset..];
|
const rem = input[offset..];
|
||||||
assert(rem.len <= self.partial_utf8.len);
|
for (rem) |c| try self.next(c);
|
||||||
@memcpy(self.partial_utf8[0..rem.len], rem);
|
|
||||||
self.partial_utf8_len = @intCast(rem.len);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,53 +120,6 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Complete a partial UTF-8 sequence from a prior input chunk.
|
|
||||||
// This processes the UTF-8 sequence and then returns the number
|
|
||||||
// of bytes consumed from the input.
|
|
||||||
fn completePartialUtf8(self: *Self, input: []const u8) !usize {
|
|
||||||
assert(self.partial_utf8_len > 0);
|
|
||||||
assert(self.parser.state == .ground);
|
|
||||||
|
|
||||||
// This cannot fail because the nature of partial utf8
|
|
||||||
// existing means we successfully processed it last time.
|
|
||||||
const len = std.unicode.utf8ByteSequenceLength(self.partial_utf8[0]) catch
|
|
||||||
unreachable;
|
|
||||||
|
|
||||||
// This is the length we need in the input in addition to
|
|
||||||
// our partial_utf8 to complete the sequence.
|
|
||||||
const input_len = len - self.partial_utf8_len;
|
|
||||||
|
|
||||||
// If we STILL don't have enough bytes, then we copy and continue.
|
|
||||||
// This is a really bizarre and stupid program thats running to
|
|
||||||
// send us incomplete UTF-8 sequences over multiple write() calls.
|
|
||||||
if (input_len > input.len) {
|
|
||||||
@memcpy(
|
|
||||||
self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input.len],
|
|
||||||
input,
|
|
||||||
);
|
|
||||||
self.partial_utf8_len += @intCast(input.len);
|
|
||||||
return input.len;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the complete UTF-8 sequence.
|
|
||||||
@memcpy(
|
|
||||||
self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input_len],
|
|
||||||
input[0..input_len],
|
|
||||||
);
|
|
||||||
const cp = cp: {
|
|
||||||
if (std.unicode.utf8Decode(self.partial_utf8[0..len])) |cp| {
|
|
||||||
break :cp cp;
|
|
||||||
} else |err| {
|
|
||||||
log.warn("invalid UTF-8, ignoring err={}", .{err});
|
|
||||||
break :cp 0xFFFD; // replacement character
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
self.partial_utf8_len = 0;
|
|
||||||
try self.print(cp);
|
|
||||||
return input_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Like nextSlice but takes one byte and is necessarilly a scalar
|
/// Like nextSlice but takes one byte and is necessarilly a scalar
|
||||||
/// operation that can't use SIMD. Prefer nextSlice if you can and
|
/// operation that can't use SIMD. Prefer nextSlice if you can and
|
||||||
/// try to get multiple bytes at once.
|
/// try to get multiple bytes at once.
|
||||||
@ -1506,6 +1455,38 @@ test "stream: print" {
|
|||||||
try testing.expectEqual(@as(u21, 'x'), s.handler.c.?);
|
try testing.expectEqual(@as(u21, 'x'), s.handler.c.?);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "simd: print invalid utf-8" {
|
||||||
|
const H = struct {
|
||||||
|
c: ?u21 = 0,
|
||||||
|
|
||||||
|
pub fn print(self: *@This(), c: u21) !void {
|
||||||
|
self.c = c;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var s: Stream(H) = .{ .handler = .{} };
|
||||||
|
try s.nextSlice(&.{0xFF});
|
||||||
|
try testing.expectEqual(@as(u21, 0xFFFD), s.handler.c.?);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "simd: complete incomplete utf-8" {
|
||||||
|
const H = struct {
|
||||||
|
c: ?u21 = null,
|
||||||
|
|
||||||
|
pub fn print(self: *@This(), c: u21) !void {
|
||||||
|
self.c = c;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var s: Stream(H) = .{ .handler = .{} };
|
||||||
|
try s.nextSlice(&.{0xE0}); // 3 byte
|
||||||
|
try testing.expect(s.handler.c == null);
|
||||||
|
try s.nextSlice(&.{0xA0}); // still incomplete
|
||||||
|
try testing.expect(s.handler.c == null);
|
||||||
|
try s.nextSlice(&.{0x80});
|
||||||
|
try testing.expectEqual(@as(u21, 0x800), s.handler.c.?);
|
||||||
|
}
|
||||||
|
|
||||||
test "stream: cursor right (CUF)" {
|
test "stream: cursor right (CUF)" {
|
||||||
const H = struct {
|
const H = struct {
|
||||||
amount: u16 = 0,
|
amount: u16 = 0,
|
||||||
|
Reference in New Issue
Block a user