mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 08:16:13 +03:00
terminal: use new VT simd to process slices
This commit is contained in:
@ -52,38 +52,56 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Process a string of characters.
|
/// Process a string of characters.
|
||||||
pub fn nextSlice(self: *Self, c: []const u8) !void {
|
pub fn nextSlice(self: *Self, input: []const u8) !void {
|
||||||
// If we're not in the ground state then we process until we are.
|
|
||||||
var offset: usize = 0;
|
var offset: usize = 0;
|
||||||
if (self.parser.state != .ground) {
|
|
||||||
for (c[offset..]) |single| {
|
// If we have a partial UTF-8 sequence then we process manually.
|
||||||
|
if (self.partial_utf8_len > 0) {
|
||||||
|
offset += try self.completePartialUtf8(input);
|
||||||
|
} else if (self.parser.state != .ground) {
|
||||||
|
// If we're not in the ground state then we process until
|
||||||
|
// we are. This can happen if the last chunk of input put us
|
||||||
|
// in the middle of a control sequence.
|
||||||
|
for (input[offset..]) |single| {
|
||||||
try self.next(single);
|
try self.next(single);
|
||||||
offset += 1;
|
offset += 1;
|
||||||
if (self.parser.state == .ground) break;
|
if (self.parser.state == .ground) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: do something better
|
||||||
|
var cp_buf: [4096]u32 = undefined;
|
||||||
|
|
||||||
// If we're in the ground state then we can use SIMD to process
|
// If we're in the ground state then we can use SIMD to process
|
||||||
// input until we see an ESC (0x1B), since all other characters
|
// input until we see an ESC (0x1B), since all other characters
|
||||||
// up to that point are just UTF-8.
|
// up to that point are just UTF-8.
|
||||||
while (self.parser.state == .ground and offset < c.len) {
|
while (self.parser.state == .ground and offset < input.len) {
|
||||||
// Find the next ESC character to trigger a control sequence.
|
const res = simd.vt.utf8DecodeUntilControlSeq(input[offset..], &cp_buf);
|
||||||
//const idx = std.mem.indexOfScalar(u8, c[offset..], 0x1B) orelse {
|
for (cp_buf[0..res.decoded]) |cp| {
|
||||||
const idx = simd.index_of.Hwy.indexOf(c[offset..], 0x1B) orelse {
|
if (cp < 0xF) {
|
||||||
// No ESC character, remainder is all UTF-8.
|
try self.execute(@intCast(cp));
|
||||||
try self.nextAssumeUtf8(c[offset..]);
|
} else {
|
||||||
|
try self.print(@intCast(cp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consume the bytes we just processed.
|
||||||
|
offset += res.consumed;
|
||||||
|
if (offset >= input.len) return;
|
||||||
|
|
||||||
|
// If our offset is NOT an escape then we must have a
|
||||||
|
// partial UTF-8 sequence. In that case, we save it and
|
||||||
|
// return.
|
||||||
|
if (input[offset] != 0x1B) {
|
||||||
|
const rem = input[offset..];
|
||||||
|
assert(rem.len <= self.partial_utf8.len);
|
||||||
|
@memcpy(self.partial_utf8[0..rem.len], rem);
|
||||||
|
self.partial_utf8_len = @intCast(rem.len);
|
||||||
return;
|
return;
|
||||||
};
|
}
|
||||||
|
|
||||||
// Process the UTF-8 characters up to the ESC character.
|
// Process our control sequence.
|
||||||
const next_offset = offset + idx;
|
for (input[offset..]) |single| {
|
||||||
if (idx > 0) try self.nextAssumeUtf8(c[offset..next_offset]);
|
|
||||||
|
|
||||||
// Process the control sequence and bail out once we reach
|
|
||||||
// the ground state which means we're looking for ESC again.
|
|
||||||
offset = next_offset;
|
|
||||||
assert(c[offset] == 0x1B);
|
|
||||||
for (c[offset..]) |single| {
|
|
||||||
try self.next(single);
|
try self.next(single);
|
||||||
offset += 1;
|
offset += 1;
|
||||||
if (self.parser.state == .ground) break;
|
if (self.parser.state == .ground) break;
|
||||||
@ -91,21 +109,15 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process the data in "input" assuming it is all UTF-8. The UTF-8
|
// Complete a partial UTF-8 sequence from a prior input chunk.
|
||||||
/// may be invalid and we will replace any invalid sequences with
|
// This processes the UTF-8 sequence and then returns the number
|
||||||
/// the replacement character (U+FFFD).
|
// of bytes consumed from the input.
|
||||||
///
|
fn completePartialUtf8(self: *Self, input: []const u8) !usize {
|
||||||
/// The input may also be incomplete, i.e. it ends in the middle of
|
assert(self.partial_utf8_len > 0);
|
||||||
/// a UTF-8 sequence. In that case we will process as much as we can
|
assert(self.parser.state == .ground);
|
||||||
/// and save the rest for the next call to nextAssumeUtf8.
|
|
||||||
fn nextAssumeUtf8(self: *Self, input: []const u8) !void {
|
|
||||||
var i: usize = 0;
|
|
||||||
|
|
||||||
// If we have a partial UTF-8 sequence from the last call then
|
// This cannot fail because the nature of partial utf8
|
||||||
// we need to process that first.
|
// existing means we successfully processed it last time.
|
||||||
if (self.partial_utf8_len > 0) {
|
|
||||||
// This cannot fail because the nature of partial utf8 existing
|
|
||||||
// means we successfully processed it last time.
|
|
||||||
const len = std.unicode.utf8ByteSequenceLength(self.partial_utf8[0]) catch
|
const len = std.unicode.utf8ByteSequenceLength(self.partial_utf8[0]) catch
|
||||||
unreachable;
|
unreachable;
|
||||||
|
|
||||||
@ -122,7 +134,7 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
input,
|
input,
|
||||||
);
|
);
|
||||||
self.partial_utf8_len += @intCast(input.len);
|
self.partial_utf8_len += @intCast(input.len);
|
||||||
return;
|
return input.len;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the complete UTF-8 sequence.
|
// Process the complete UTF-8 sequence.
|
||||||
@ -141,48 +153,7 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
|
|
||||||
self.partial_utf8_len = 0;
|
self.partial_utf8_len = 0;
|
||||||
try self.print(cp);
|
try self.print(cp);
|
||||||
i += input_len;
|
return input_len;
|
||||||
}
|
|
||||||
|
|
||||||
while (i < input.len) {
|
|
||||||
const len = std.unicode.utf8ByteSequenceLength(input[i]) catch |err| {
|
|
||||||
log.warn("invalid UTF-8, ignoring err={}", .{err});
|
|
||||||
i += 1;
|
|
||||||
try self.print(@intCast(input[i]));
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
// If we have exactly one byte and its a control character,
|
|
||||||
// then process it directly.
|
|
||||||
if (len == 1 and input[i] < 0xF) {
|
|
||||||
try self.execute(@intCast(input[i]));
|
|
||||||
i += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a partial UTF-8 sequence then we save it for
|
|
||||||
// the next call to nextAssumeUtf8.
|
|
||||||
if (i + len > input.len) {
|
|
||||||
const remaining = input.len - i;
|
|
||||||
@memcpy(self.partial_utf8[0..remaining], input[i..]);
|
|
||||||
self.partial_utf8_len = @intCast(remaining);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decode the UTF-8 sequence and handle any errors by
|
|
||||||
// replacing the character with the replacement character.
|
|
||||||
const cp = cp: {
|
|
||||||
if (std.unicode.utf8Decode(input[i .. i + len])) |cp| {
|
|
||||||
break :cp cp;
|
|
||||||
} else |err| {
|
|
||||||
log.warn("invalid UTF-8, ignoring err={}", .{err});
|
|
||||||
break :cp 0xFFFD; // replacement character
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
try self.print(cp);
|
|
||||||
i += len;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process the next character and call any callbacks if necessary.
|
/// Process the next character and call any callbacks if necessary.
|
||||||
|
Reference in New Issue
Block a user