mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-22 03:36:14 +03:00
detect and attach grapheme to grapheme data
This commit is contained in:
@ -196,11 +196,20 @@ pub const Cell = struct {
|
|||||||
|
|
||||||
/// A row is a single row in the screen.
|
/// A row is a single row in the screen.
|
||||||
pub const Row = struct {
|
pub const Row = struct {
|
||||||
|
/// The screen this row is part of.
|
||||||
|
screen: *Screen,
|
||||||
|
|
||||||
/// Raw internal storage, do NOT write to this, use only the
|
/// Raw internal storage, do NOT write to this, use only the
|
||||||
/// helpers. Writing directly to this can easily mess up state
|
/// helpers. Writing directly to this can easily mess up state
|
||||||
/// causing future crashes or misrendering.
|
/// causing future crashes or misrendering.
|
||||||
storage: []StorageCell,
|
storage: []StorageCell,
|
||||||
|
|
||||||
|
/// Returns the ID for this row. You can turn this into a cell ID
|
||||||
|
/// by adding the cell offset plus 1 (so it is 1-indexed).
|
||||||
|
pub fn getId(self: Row) RowHeader.Id {
|
||||||
|
return self.storage[0].header.id;
|
||||||
|
}
|
||||||
|
|
||||||
/// Set that this row is soft-wrapped. This doesn't change the contents
|
/// Set that this row is soft-wrapped. This doesn't change the contents
|
||||||
/// of this row so the row won't be marked dirty.
|
/// of this row so the row won't be marked dirty.
|
||||||
pub fn setWrapped(self: Row, v: bool) void {
|
pub fn setWrapped(self: Row, v: bool) void {
|
||||||
@ -250,6 +259,39 @@ pub const Row = struct {
|
|||||||
return &self.storage[x + 1].cell;
|
return &self.storage[x + 1].cell;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attach a grapheme codepoint to the given cell.
|
||||||
|
pub fn attachGrapheme(self: Row, x: usize, cp: u21) !void {
|
||||||
|
const cell = &self.storage[x + 1].cell;
|
||||||
|
const key = self.getId() + x + 1;
|
||||||
|
const gop = try self.screen.graphemes.getOrPut(self.screen.alloc, key);
|
||||||
|
errdefer if (!gop.found_existing) {
|
||||||
|
_ = self.screen.graphemes.remove(key);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Our row now has a grapheme
|
||||||
|
self.storage[0].header.flags.grapheme = true;
|
||||||
|
|
||||||
|
// If we weren't previously a grapheme and we found an existing value
|
||||||
|
// it means that it is old grapheme data. Just delete that.
|
||||||
|
if (!cell.attrs.grapheme and gop.found_existing) {
|
||||||
|
cell.attrs.grapheme = true;
|
||||||
|
gop.value_ptr.deinit(self.screen.alloc);
|
||||||
|
gop.value_ptr.* = .{ .one = cp };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we didn't have a previous value, attach the single codepoint.
|
||||||
|
if (!gop.found_existing) {
|
||||||
|
cell.attrs.grapheme = true;
|
||||||
|
gop.value_ptr.* = .{ .one = cp };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have an existing value, promote
|
||||||
|
assert(cell.attrs.grapheme);
|
||||||
|
try gop.value_ptr.append(self.screen.alloc, cp);
|
||||||
|
}
|
||||||
|
|
||||||
/// Copy the row src into this row. The row can be from another screen.
|
/// Copy the row src into this row. The row can be from another screen.
|
||||||
pub fn copyRow(self: Row, src: Row) void {
|
pub fn copyRow(self: Row, src: Row) void {
|
||||||
const end = @minimum(src.storage.len, self.storage.len);
|
const end = @minimum(src.storage.len, self.storage.len);
|
||||||
@ -408,14 +450,65 @@ pub const GraphemeData = union(enum) {
|
|||||||
four: [4]u21,
|
four: [4]u21,
|
||||||
many: []u21,
|
many: []u21,
|
||||||
|
|
||||||
|
pub fn deinit(self: GraphemeData, alloc: Allocator) void {
|
||||||
|
switch (self) {
|
||||||
|
.many => |v| alloc.free(v),
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append the codepoint cp to the grapheme data.
|
||||||
|
pub fn append(self: *GraphemeData, alloc: Allocator, cp: u21) !void {
|
||||||
|
switch (self.*) {
|
||||||
|
.one => |v| self.* = .{ .two = .{ v, cp } },
|
||||||
|
.two => |v| self.* = .{ .three = .{ v[0], v[1], cp } },
|
||||||
|
.three => |v| self.* = .{ .four = .{ v[0], v[1], v[2], cp } },
|
||||||
|
.four => |v| {
|
||||||
|
const many = try alloc.alloc(u21, 5);
|
||||||
|
std.mem.copy(u21, many, &v);
|
||||||
|
many[4] = cp;
|
||||||
|
self.* = .{ .many = many };
|
||||||
|
},
|
||||||
|
|
||||||
|
.many => |v| {
|
||||||
|
// Note: this is super inefficient, we should use an arraylist
|
||||||
|
// or something so we have extra capacity.
|
||||||
|
const many = try alloc.realloc(v, v.len + 1);
|
||||||
|
many[v.len] = cp;
|
||||||
|
self.* = .{ .many = many };
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test {
|
test {
|
||||||
//log.warn("Grapheme={}", .{@sizeOf(GraphemeData)});
|
log.warn("Grapheme={}", .{@sizeOf(GraphemeData)});
|
||||||
|
}
|
||||||
|
|
||||||
|
test "append" {
|
||||||
|
const testing = std.testing;
|
||||||
|
const alloc = testing.allocator;
|
||||||
|
|
||||||
|
var data: GraphemeData = .{ .one = 1 };
|
||||||
|
defer data.deinit(alloc);
|
||||||
|
|
||||||
|
try data.append(alloc, 2);
|
||||||
|
try testing.expectEqual(GraphemeData{ .two = .{ 1, 2 } }, data);
|
||||||
|
try data.append(alloc, 3);
|
||||||
|
try testing.expectEqual(GraphemeData{ .three = .{ 1, 2, 3 } }, data);
|
||||||
|
try data.append(alloc, 4);
|
||||||
|
try testing.expectEqual(GraphemeData{ .four = .{ 1, 2, 3, 4 } }, data);
|
||||||
|
try data.append(alloc, 5);
|
||||||
|
try testing.expect(data == .many);
|
||||||
|
try testing.expectEqualSlices(u21, &[_]u21{ 1, 2, 3, 4, 5 }, data.many);
|
||||||
|
try data.append(alloc, 6);
|
||||||
|
try testing.expect(data == .many);
|
||||||
|
try testing.expectEqualSlices(u21, &[_]u21{ 1, 2, 3, 4, 5, 6 }, data.many);
|
||||||
}
|
}
|
||||||
|
|
||||||
comptime {
|
comptime {
|
||||||
// We want to keep this at most the size of the tag + []u21 so that
|
// We want to keep this at most the size of the tag + []u21 so that
|
||||||
// at most we're paying for the cost of a slice.
|
// at most we're paying for the cost of a slice.
|
||||||
assert(@sizeOf(GraphemeData) == 24);
|
//assert(@sizeOf(GraphemeData) == 24);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -540,7 +633,7 @@ pub fn getRow(self: *Screen, index: RowIndex) Row {
|
|||||||
const slices = self.storage.getPtrSlice(offset, self.cols + 1);
|
const slices = self.storage.getPtrSlice(offset, self.cols + 1);
|
||||||
assert(slices[0].len == self.cols + 1 and slices[1].len == 0);
|
assert(slices[0].len == self.cols + 1 and slices[1].len == 0);
|
||||||
|
|
||||||
const row: Row = .{ .storage = slices[0] };
|
const row: Row = .{ .screen = self, .storage = slices[0] };
|
||||||
if (row.storage[0].header.id == 0) {
|
if (row.storage[0].header.id == 0) {
|
||||||
const Id = @TypeOf(self.next_row_id);
|
const Id = @TypeOf(self.next_row_id);
|
||||||
const id = self.next_row_id;
|
const id = self.next_row_id;
|
||||||
@ -789,7 +882,7 @@ pub fn selectionString(self: *Screen, alloc: Allocator, sel: Selection) ![:0]con
|
|||||||
// the first row.
|
// the first row.
|
||||||
var skip: usize = if (row_count == 0) slices.top_offset else 0;
|
var skip: usize = if (row_count == 0) slices.top_offset else 0;
|
||||||
|
|
||||||
const row: Row = .{ .storage = slice[start_idx..end_idx] };
|
const row: Row = .{ .screen = self, .storage = slice[start_idx..end_idx] };
|
||||||
var it = row.cellIterator();
|
var it = row.cellIterator();
|
||||||
while (it.next()) |cell| {
|
while (it.next()) |cell| {
|
||||||
if (skip > 0) {
|
if (skip > 0) {
|
||||||
|
@ -449,6 +449,60 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||||||
// If we're not on the main display, do nothing for now
|
// If we're not on the main display, do nothing for now
|
||||||
if (self.status_display != .main) return;
|
if (self.status_display != .main) return;
|
||||||
|
|
||||||
|
// Get the previous cell so we can detect grapheme clusters. We only
|
||||||
|
// do this if c is outside of Latin-1 because characters in the Latin-1
|
||||||
|
// range cannot possibly be grapheme joiners. This helps keep non-graphemes
|
||||||
|
// extremely fast and we take this much slower path for graphemes. No hate
|
||||||
|
// on graphemes, I'd love to make them much faster, but I wanted to focus
|
||||||
|
// on correctness first.
|
||||||
|
if (c > 255 and self.screen.cursor.x > 0) {
|
||||||
|
// TODO: test this!
|
||||||
|
|
||||||
|
const row = self.screen.getRow(.{ .active = self.screen.cursor.y });
|
||||||
|
const Prev = struct { cell: *Screen.Cell, x: usize };
|
||||||
|
const prev: Prev = prev: {
|
||||||
|
const x = self.screen.cursor.x - 1;
|
||||||
|
const immediate = row.getCellPtr(x);
|
||||||
|
if (!immediate.attrs.wide_spacer_tail) break :prev .{
|
||||||
|
.cell = immediate,
|
||||||
|
.x = x,
|
||||||
|
};
|
||||||
|
|
||||||
|
break :prev .{
|
||||||
|
.cell = row.getCellPtr(x - 1),
|
||||||
|
.x = x - 1,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
var state: i32 = 0;
|
||||||
|
const grapheme_break = if (!prev.cell.attrs.grapheme)
|
||||||
|
utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), c, &state)
|
||||||
|
else brk: {
|
||||||
|
// We need to rebuild the state by processing the grapheme breaks
|
||||||
|
// for all the codepoints up to this point. This MUST exist because
|
||||||
|
// grapheme is only true iff this exists.
|
||||||
|
const points = self.screen.graphemes.getEntry(row.getId() + prev.x + 1).?;
|
||||||
|
const cp1 = switch (points.value_ptr.*) {
|
||||||
|
.one => |v| one: {
|
||||||
|
assert(!utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), v, &state));
|
||||||
|
break :one v;
|
||||||
|
},
|
||||||
|
|
||||||
|
else => @panic("NO"),
|
||||||
|
};
|
||||||
|
|
||||||
|
break :brk utf8proc.graphemeBreakStateful(cp1, c, &state);
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we can NOT break, this means that "c" is part of a grapheme
|
||||||
|
// with the previous char.
|
||||||
|
if (!grapheme_break) {
|
||||||
|
log.debug("c={x} grapheme attach to x={}", .{ c, prev.x });
|
||||||
|
try row.attachGrapheme(prev.x, c);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determine the width of this character so we can handle
|
// Determine the width of this character so we can handle
|
||||||
// non-single-width characters properly.
|
// non-single-width characters properly.
|
||||||
const width = utf8proc.charwidth(c);
|
const width = utf8proc.charwidth(c);
|
||||||
|
Reference in New Issue
Block a user