test writing graphemes

This commit is contained in:
Mitchell Hashimoto
2022-09-05 10:29:52 -07:00
parent 7ceff79ea9
commit 47242ad12a
2 changed files with 142 additions and 2 deletions

View File

@ -107,6 +107,15 @@ pub const RunIterator = struct {
// Continue with our run // Continue with our run
self.shaper.hb_buf.add(cell.char, @intCast(u32, j)); self.shaper.hb_buf.add(cell.char, @intCast(u32, j));
// If this cell is part of a grapheme cluster, add all the grapheme
// data points.
if (cell.attrs.grapheme) {
var it = self.row.codepointIterator(j);
while (it.next()) |cp| {
self.shaper.hb_buf.add(cp, @intCast(u32, j));
}
}
} }
// Finalize our buffer // Finalize our buffer

View File

@ -353,6 +353,18 @@ pub const Row = struct {
pub fn cellIterator(self: Row) CellIterator { pub fn cellIterator(self: Row) CellIterator {
return .{ .row = self }; return .{ .row = self };
} }
/// Read-only iterator for the grapheme codepoints in a cell. This only
/// iterates over the EXTRA GRAPHEME codepoints and not the primary
/// codepoint in cell.char.
pub fn codepointIterator(self: Row, x: usize) CodepointIterator {
const cell = &self.storage[x + 1].cell;
assert(cell.attrs.grapheme);
const key = self.getId() + x + 1;
const data = self.screen.graphemes.get(key).?;
return .{ .data = data };
}
}; };
/// Used to iterate through the rows of a specific region. /// Used to iterate through the rows of a specific region.
@ -384,6 +396,47 @@ pub const CellIterator = struct {
} }
}; };
/// Used to iterate through the codepoints of a cell. This only iterates
/// over the extra grapheme codepoints and not the primary codepoint.
pub const CodepointIterator = struct {
data: GraphemeData,
i: usize = 0,
pub fn next(self: *CodepointIterator) ?u21 {
switch (self.data) {
.one => |v| {
if (self.i >= 1) return null;
self.i += 1;
return v;
},
.two => |v| {
if (self.i >= v.len) return null;
defer self.i += 1;
return v[self.i];
},
.three => |v| {
if (self.i >= v.len) return null;
defer self.i += 1;
return v[self.i];
},
.four => |v| {
if (self.i >= v.len) return null;
defer self.i += 1;
return v[self.i];
},
.many => |v| {
if (self.i >= v.len) return null;
defer self.i += 1;
return v[self.i];
},
}
}
};
/// RowIndex represents a row within the screen. There are various meanings /// RowIndex represents a row within the screen. There are various meanings
/// of a row index and this union represents the available types. For example, /// of a row index and this union represents the available types. For example,
/// when talking about row "0" you may want the first row in the viewport, /// when talking about row "0" you may want the first row in the viewport,
@ -864,6 +917,19 @@ fn scrollDelta(self: *Screen, delta: isize, grow: bool) !void {
// If we can't fit our rows into our capacity, we delete some scrollback. // If we can't fit our rows into our capacity, we delete some scrollback.
const rows_deleted = if (rows_final > self.rowsCapacity()) deleted: { const rows_deleted = if (rows_final > self.rowsCapacity()) deleted: {
const rows_to_delete = rows_final - self.rowsCapacity(); const rows_to_delete = rows_final - self.rowsCapacity();
// Fast-path: we have no graphemes.
// Slow-path: we have graphemes, we have to check each row
// we're going to delete to see if they contain graphemes and
// clear the ones that do so we clear memory properly.
if (self.graphemes.count() > 0) {
var y: usize = 0;
while (y < rows_to_delete) : (y += 1) {
const row = self.getRow(.{ .active = y });
if (row.storage[0].header.flags.grapheme) row.clear(.{});
}
}
self.viewport -= rows_to_delete; self.viewport -= rows_to_delete;
self.storage.deleteOldest(rows_to_delete * (self.cols + 1)); self.storage.deleteOldest(rows_to_delete * (self.cols + 1));
break :deleted rows_to_delete; break :deleted rows_to_delete;
@ -1403,8 +1469,13 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void {
/// each row. If a line is longer than the available columns, soft-wrapping /// each row. If a line is longer than the available columns, soft-wrapping
/// will occur. This will automatically handle basic wide chars. /// will occur. This will automatically handle basic wide chars.
pub fn testWriteString(self: *Screen, text: []const u8) !void { pub fn testWriteString(self: *Screen, text: []const u8) !void {
var y: usize = 0; var y: usize = self.cursor.y;
var x: usize = 0; var x: usize = self.cursor.x;
var grapheme: struct {
x: usize = 0,
cell: ?*Cell = null,
} = .{};
const view = std.unicode.Utf8View.init(text) catch unreachable; const view = std.unicode.Utf8View.init(text) catch unreachable;
var iter = view.iterator(); var iter = view.iterator();
@ -1413,6 +1484,7 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
if (c == '\n') { if (c == '\n') {
y += 1; y += 1;
x = 0; x = 0;
grapheme = .{};
continue; continue;
} }
@ -1425,6 +1497,32 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
// Get our row // Get our row
var row = self.getRow(.{ .active = y }); var row = self.getRow(.{ .active = y });
if (grapheme.cell) |prev_cell| {
const grapheme_break = brk: {
var state: i32 = 0;
var cp1 = @intCast(u21, prev_cell.char);
if (prev_cell.attrs.grapheme) {
var it = row.codepointIterator(grapheme.x);
while (it.next()) |cp2| {
assert(!utf8proc.graphemeBreakStateful(
cp1,
cp2,
&state,
));
cp1 = cp2;
}
}
break :brk utf8proc.graphemeBreakStateful(cp1, c, &state);
};
if (!grapheme_break) {
try row.attachGrapheme(grapheme.x, c);
continue;
}
}
// If we're writing past the end, we need to soft wrap. // If we're writing past the end, we need to soft wrap.
if (x == self.cols) { if (x == self.cols) {
row.setWrapped(true); row.setWrapped(true);
@ -1444,6 +1542,9 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
1 => { 1 => {
const cell = row.getCellPtr(x); const cell = row.getCellPtr(x);
cell.char = @intCast(u32, c); cell.char = @intCast(u32, c);
grapheme.x = x;
grapheme.cell = cell;
}, },
2 => { 2 => {
@ -1467,6 +1568,9 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
const cell = row.getCellPtr(x); const cell = row.getCellPtr(x);
cell.char = @intCast(u32, c); cell.char = @intCast(u32, c);
cell.attrs.wide = true; cell.attrs.wide = true;
grapheme.x = x;
grapheme.cell = cell;
} }
{ {
@ -1482,6 +1586,10 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
x += 1; x += 1;
} }
// So the cursor doesn't go off screen
self.cursor.x = @minimum(x, self.cols - 1);
self.cursor.y = y;
} }
/// Turns the screen into a string. Different regions of the screen can /// Turns the screen into a string. Different regions of the screen can
@ -1645,6 +1753,25 @@ test "Screen" {
} }
} }
test "Screen: write graphemes" {
const testing = std.testing;
const alloc = testing.allocator;
var s = try init(alloc, 5, 5, 0);
defer s.deinit();
// Sanity check that our test helpers work
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
buf_idx += try std.unicode.utf8Encode(0x1F44D, buf[buf_idx..]); // Thumbs up plain
buf_idx += try std.unicode.utf8Encode(0x1F44D, buf[buf_idx..]); // Thumbs up plain
buf_idx += try std.unicode.utf8Encode(0x1F3FD, buf[buf_idx..]); // Medium skin tone
try s.testWriteString(buf[0..buf_idx]);
try testing.expect(s.rowsWritten() == 1);
try testing.expectEqual(@as(usize, 4), s.cursor.x);
}
test "Screen: scrolling" { test "Screen: scrolling" {
const testing = std.testing; const testing = std.testing;
const alloc = testing.allocator; const alloc = testing.allocator;
@ -2512,6 +2639,8 @@ test "Screen: resize less rows no scrollback" {
defer s.deinit(); defer s.deinit();
const str = "1ABCD\n2EFGH\n3IJKL"; const str = "1ABCD\n2EFGH\n3IJKL";
try s.testWriteString(str); try s.testWriteString(str);
s.cursor.x = 0;
s.cursor.y = 0;
const cursor = s.cursor; const cursor = s.cursor;
try s.resize(1, 5); try s.resize(1, 5);
@ -2629,6 +2758,8 @@ test "Screen: resize less cols no reflow" {
defer s.deinit(); defer s.deinit();
const str = "1AB\n2EF\n3IJ"; const str = "1AB\n2EF\n3IJ";
try s.testWriteString(str); try s.testWriteString(str);
s.cursor.x = 0;
s.cursor.y = 0;
const cursor = s.cursor; const cursor = s.cursor;
try s.resize(3, 3); try s.resize(3, 3);