From a605ff07e16f979d9ae1b39b354fba694a80c2d7 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 4 Sep 2022 11:17:41 -0700 Subject: [PATCH 1/7] setup flags, storage for graphemes --- src/terminal/Screen.zig | 110 ++++++++++++++++++++++++++++++---------- src/terminal/color.zig | 6 +-- 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 5a66893df..552ca8f09 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -97,7 +97,7 @@ const StorageCell = union { comptime { // We only check this during ReleaseFast because safety checks // have to be disabled to get this size. - if (builtin.mode == .ReleaseFast) { + if (!std.debug.runtime_safety) { // We want to be at most the size of a cell always. We have WAY // more cells than other fields, so we don't want to pay the cost // of padding due to other fields. @@ -113,12 +113,23 @@ const StorageCell = union { /// The row header is at the start of every row within the storage buffer. /// It can store row-specific data. pub const RowHeader = struct { - /// Used internally to track if this row has been initialized. - init: bool = false, + const Id = u32; - /// If true, this row is soft-wrapped. The first cell of the next - /// row is a continuous of this row. - wrap: bool = false, + /// The ID of this row, used to uniquely identify this row. The cells + /// are also ID'd by id + cell index (0-indexed). This will wrap around + /// when it reaches the maximum value for the type. For caching purposes, + /// when wrapping happens, all rows in the screen will be marked dirty. + id: Id = 0, + + // Packed flags + flags: packed struct { + /// If true, this row is soft-wrapped. The first cell of the next + /// row is a continuous of this row. + wrap: bool = false, + + /// True if any cell in this row has a grapheme associated with it. + grapheme: bool = false, + } = .{}, }; /// Cell is a single cell within the screen. @@ -136,8 +147,8 @@ pub const Cell = struct { /// Foreground and background color. attrs.has_{bg/fg} must be checked /// to see if these are useful values. - fg: color.RGB = undefined, - bg: color.RGB = undefined, + fg: color.RGB = .{}, + bg: color.RGB = .{}, /// On/off attributes that can be set attrs: packed struct { @@ -157,6 +168,12 @@ pub const Cell = struct { /// wide character (tail) or following (head). wide_spacer_tail: bool = false, wide_spacer_head: bool = false, + + /// True if this cell has additional codepoints to form a complete + /// grapheme cluster. If this is true, then the row grapheme flag must + /// also be true. The grapheme code points can be looked up in the + /// screen grapheme map. + grapheme: bool = false, } = .{}, /// True if the cell should be skipped for drawing @@ -187,7 +204,7 @@ pub const Row = struct { /// Set that this row is soft-wrapped. This doesn't change the contents /// of this row so the row won't be marked dirty. pub fn setWrapped(self: Row, v: bool) void { - self.storage[0].header.wrap = v; + self.storage[0].header.flags.wrap = v; } /// Retrieve the header for this row. @@ -220,7 +237,6 @@ pub const Row = struct { /// Get a single immutable cell. pub fn getCell(self: Row, x: usize) Cell { - assert(self.header().init); assert(x < self.storage.len - 1); return self.storage[x + 1].cell; } @@ -230,32 +246,20 @@ pub const Row = struct { /// next call to re-render this cell. Any change detection to avoid /// this should be done prior. pub fn getCellPtr(self: Row, x: usize) *Cell { - assert(self.header().init); assert(x < self.storage.len - 1); return &self.storage[x + 1].cell; } /// Copy the row src into this row. The row can be from another screen. pub fn copyRow(self: Row, src: Row) void { - assert(self.header().init); const end = @minimum(src.storage.len, self.storage.len); std.mem.copy(StorageCell, self.storage[1..], src.storage[1..end]); } /// Read-only iterator for the cells in the row. pub fn cellIterator(self: Row) CellIterator { - assert(self.header().init); return .{ .row = self }; } - - /// If this row isn't initialized, this sets all our cells to the - /// proper union tag so that it is properly zeroed. - fn initIfNeeded(self: Row) void { - if (!self.storage[0].header.init) { - self.fill(.{}); - self.storage[0].header.init = true; - } - } }; /// Used to iterate through the rows of a specific region. @@ -389,16 +393,53 @@ pub const RowIndexTag = enum { } }; +/// Stores the extra unicode codepoints that form a complete grapheme +/// cluster alongside a cell. We store this separately from a Cell because +/// grapheme clusters are relatively rare (depending on the language) and +/// we don't want to pay for the full cost all the time. +pub const GraphemeData = union(enum) { + // The named counts allow us to avoid allocators. We do this because + // []u21 is sizeof([4]u21) anyways so if we can store avoid small allocations + // we prefer it. Grapheme clusters are almost always <= 4 codepoints. + + one: u21, + two: [2]u21, + three: [3]u21, + four: [4]u21, + many: []u21, + + test { + //log.warn("Grapheme={}", .{@sizeOf(GraphemeData)}); + } + + comptime { + // We want to keep this at most the size of the tag + []u21 so that + // at most we're paying for the cost of a slice. + assert(@sizeOf(GraphemeData) == 24); + } +}; + // Initialize to header and not a cell so that we can check header.init // to know if the remainder of the row has been initialized or not. const StorageBuf = CircBuf(StorageCell, .{ .header = .{} }); +/// Stores a mapping of cell ID (row ID + cell offset + 1) to +/// graphemes associated with a cell. To know if a cell has graphemes, +/// check the "grapheme" flag of a cell. +const GraphemeMap = std.AutoHashMapUnmanaged(usize, GraphemeData); + /// The allocator used for all the storage operations alloc: Allocator, /// The full set of storage. storage: StorageBuf, +/// Graphemes associated with our current screen. +graphemes: GraphemeMap = .{}, + +/// The next ID to assign to a row. The value of this is NOT assigned. +next_row_id: RowHeader.Id = 1, + /// The number of rows and columns in the visible space. rows: usize, cols: usize, @@ -448,6 +489,10 @@ pub fn init( pub fn deinit(self: *Screen) void { self.storage.deinit(self.alloc); + + var grapheme_it = self.graphemes.valueIterator(); + while (grapheme_it.next()) |data| if (data.* == .many) self.alloc.free(data.many); + self.graphemes.deinit(self.alloc); } /// Returns true if the viewport is scrolled to the bottom of the screen. @@ -496,7 +541,18 @@ pub fn getRow(self: *Screen, index: RowIndex) Row { assert(slices[0].len == self.cols + 1 and slices[1].len == 0); const row: Row = .{ .storage = slices[0] }; - row.initIfNeeded(); + if (row.storage[0].header.id == 0) { + const Id = @TypeOf(self.next_row_id); + const id = self.next_row_id; + self.next_row_id +%= @intCast(Id, self.cols); + + // Store the header + row.storage[0].header.id = id; + + // We only need to fill with runtime safety because unions are + // tag-checked. Otherwise, the default value of zero will be valid. + if (std.debug.runtime_safety) row.fill(.{}); + } return row; } @@ -750,7 +806,7 @@ pub fn selectionString(self: *Screen, alloc: Allocator, sel: Selection) ![:0]con } // If this row is not soft-wrapped, add a newline - if (!row.header().wrap) { + if (!row.header().flags.wrap) { buf[buf_i] = '\n'; buf_i += 1; } @@ -975,7 +1031,7 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { } // If no reflow, just keep going - if (!old_row.header().wrap) { + if (!old_row.header().flags.wrap) { y += 1; continue; } @@ -1029,7 +1085,7 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { // We copied the full amount left in this wrapped row. if (copy_len == wrapped_cells_rem) { // If this row isn't also wrapped, we're done! - if (!wrapped_row.header().wrap) { + if (!wrapped_row.header().flags.wrap) { // If we were able to copy the entire row then // we shortened the screen by one. We need to reflect // this in our viewport. @@ -1168,7 +1224,7 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { // If we aren't wrapping, then move to the next row if (trimmed_row.len == 0 or - !old_row.header().wrap) + !old_row.header().flags.wrap) { y += 1; x = 0; diff --git a/src/terminal/color.zig b/src/terminal/color.zig index c8c8a37d4..a12a2bb6d 100644 --- a/src/terminal/color.zig +++ b/src/terminal/color.zig @@ -95,9 +95,9 @@ pub const Name = enum(u8) { /// RGB pub const RGB = packed struct { - r: u8, - g: u8, - b: u8, + r: u8 = 0, + g: u8 = 0, + b: u8 = 0, test { try std.testing.expectEqual(@as(usize, 3), @sizeOf(RGB)); From b3ec7028fb6206f13281439521b17a28b5e7a403 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 4 Sep 2022 21:57:07 -0700 Subject: [PATCH 2/7] detect and attach grapheme to grapheme data --- src/terminal/Screen.zig | 101 ++++++++++++++++++++++++++++++++++++-- src/terminal/Terminal.zig | 54 ++++++++++++++++++++ 2 files changed, 151 insertions(+), 4 deletions(-) diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 552ca8f09..8601b0f32 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -196,11 +196,20 @@ pub const Cell = struct { /// A row is a single row in the screen. pub const Row = struct { + /// The screen this row is part of. + screen: *Screen, + /// Raw internal storage, do NOT write to this, use only the /// helpers. Writing directly to this can easily mess up state /// causing future crashes or misrendering. storage: []StorageCell, + /// Returns the ID for this row. You can turn this into a cell ID + /// by adding the cell offset plus 1 (so it is 1-indexed). + pub fn getId(self: Row) RowHeader.Id { + return self.storage[0].header.id; + } + /// Set that this row is soft-wrapped. This doesn't change the contents /// of this row so the row won't be marked dirty. pub fn setWrapped(self: Row, v: bool) void { @@ -250,6 +259,39 @@ pub const Row = struct { return &self.storage[x + 1].cell; } + /// Attach a grapheme codepoint to the given cell. + pub fn attachGrapheme(self: Row, x: usize, cp: u21) !void { + const cell = &self.storage[x + 1].cell; + const key = self.getId() + x + 1; + const gop = try self.screen.graphemes.getOrPut(self.screen.alloc, key); + errdefer if (!gop.found_existing) { + _ = self.screen.graphemes.remove(key); + }; + + // Our row now has a grapheme + self.storage[0].header.flags.grapheme = true; + + // If we weren't previously a grapheme and we found an existing value + // it means that it is old grapheme data. Just delete that. + if (!cell.attrs.grapheme and gop.found_existing) { + cell.attrs.grapheme = true; + gop.value_ptr.deinit(self.screen.alloc); + gop.value_ptr.* = .{ .one = cp }; + return; + } + + // If we didn't have a previous value, attach the single codepoint. + if (!gop.found_existing) { + cell.attrs.grapheme = true; + gop.value_ptr.* = .{ .one = cp }; + return; + } + + // We have an existing value, promote + assert(cell.attrs.grapheme); + try gop.value_ptr.append(self.screen.alloc, cp); + } + /// Copy the row src into this row. The row can be from another screen. pub fn copyRow(self: Row, src: Row) void { const end = @minimum(src.storage.len, self.storage.len); @@ -408,14 +450,65 @@ pub const GraphemeData = union(enum) { four: [4]u21, many: []u21, + pub fn deinit(self: GraphemeData, alloc: Allocator) void { + switch (self) { + .many => |v| alloc.free(v), + else => {}, + } + } + + /// Append the codepoint cp to the grapheme data. + pub fn append(self: *GraphemeData, alloc: Allocator, cp: u21) !void { + switch (self.*) { + .one => |v| self.* = .{ .two = .{ v, cp } }, + .two => |v| self.* = .{ .three = .{ v[0], v[1], cp } }, + .three => |v| self.* = .{ .four = .{ v[0], v[1], v[2], cp } }, + .four => |v| { + const many = try alloc.alloc(u21, 5); + std.mem.copy(u21, many, &v); + many[4] = cp; + self.* = .{ .many = many }; + }, + + .many => |v| { + // Note: this is super inefficient, we should use an arraylist + // or something so we have extra capacity. + const many = try alloc.realloc(v, v.len + 1); + many[v.len] = cp; + self.* = .{ .many = many }; + }, + } + } + test { - //log.warn("Grapheme={}", .{@sizeOf(GraphemeData)}); + log.warn("Grapheme={}", .{@sizeOf(GraphemeData)}); + } + + test "append" { + const testing = std.testing; + const alloc = testing.allocator; + + var data: GraphemeData = .{ .one = 1 }; + defer data.deinit(alloc); + + try data.append(alloc, 2); + try testing.expectEqual(GraphemeData{ .two = .{ 1, 2 } }, data); + try data.append(alloc, 3); + try testing.expectEqual(GraphemeData{ .three = .{ 1, 2, 3 } }, data); + try data.append(alloc, 4); + try testing.expectEqual(GraphemeData{ .four = .{ 1, 2, 3, 4 } }, data); + try data.append(alloc, 5); + try testing.expect(data == .many); + try testing.expectEqualSlices(u21, &[_]u21{ 1, 2, 3, 4, 5 }, data.many); + try data.append(alloc, 6); + try testing.expect(data == .many); + try testing.expectEqualSlices(u21, &[_]u21{ 1, 2, 3, 4, 5, 6 }, data.many); } comptime { // We want to keep this at most the size of the tag + []u21 so that // at most we're paying for the cost of a slice. - assert(@sizeOf(GraphemeData) == 24); + //assert(@sizeOf(GraphemeData) == 24); } }; @@ -540,7 +633,7 @@ pub fn getRow(self: *Screen, index: RowIndex) Row { const slices = self.storage.getPtrSlice(offset, self.cols + 1); assert(slices[0].len == self.cols + 1 and slices[1].len == 0); - const row: Row = .{ .storage = slices[0] }; + const row: Row = .{ .screen = self, .storage = slices[0] }; if (row.storage[0].header.id == 0) { const Id = @TypeOf(self.next_row_id); const id = self.next_row_id; @@ -789,7 +882,7 @@ pub fn selectionString(self: *Screen, alloc: Allocator, sel: Selection) ![:0]con // the first row. var skip: usize = if (row_count == 0) slices.top_offset else 0; - const row: Row = .{ .storage = slice[start_idx..end_idx] }; + const row: Row = .{ .screen = self, .storage = slice[start_idx..end_idx] }; var it = row.cellIterator(); while (it.next()) |cell| { if (skip > 0) { diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index 81b39fda8..aeea76937 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -449,6 +449,60 @@ pub fn print(self: *Terminal, c: u21) !void { // If we're not on the main display, do nothing for now if (self.status_display != .main) return; + // Get the previous cell so we can detect grapheme clusters. We only + // do this if c is outside of Latin-1 because characters in the Latin-1 + // range cannot possibly be grapheme joiners. This helps keep non-graphemes + // extremely fast and we take this much slower path for graphemes. No hate + // on graphemes, I'd love to make them much faster, but I wanted to focus + // on correctness first. + if (c > 255 and self.screen.cursor.x > 0) { + // TODO: test this! + + const row = self.screen.getRow(.{ .active = self.screen.cursor.y }); + const Prev = struct { cell: *Screen.Cell, x: usize }; + const prev: Prev = prev: { + const x = self.screen.cursor.x - 1; + const immediate = row.getCellPtr(x); + if (!immediate.attrs.wide_spacer_tail) break :prev .{ + .cell = immediate, + .x = x, + }; + + break :prev .{ + .cell = row.getCellPtr(x - 1), + .x = x - 1, + }; + }; + + var state: i32 = 0; + const grapheme_break = if (!prev.cell.attrs.grapheme) + utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), c, &state) + else brk: { + // We need to rebuild the state by processing the grapheme breaks + // for all the codepoints up to this point. This MUST exist because + // grapheme is only true iff this exists. + const points = self.screen.graphemes.getEntry(row.getId() + prev.x + 1).?; + const cp1 = switch (points.value_ptr.*) { + .one => |v| one: { + assert(!utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), v, &state)); + break :one v; + }, + + else => @panic("NO"), + }; + + break :brk utf8proc.graphemeBreakStateful(cp1, c, &state); + }; + + // If we can NOT break, this means that "c" is part of a grapheme + // with the previous char. + if (!grapheme_break) { + log.debug("c={x} grapheme attach to x={}", .{ c, prev.x }); + try row.attachGrapheme(prev.x, c); + return; + } + } + // Determine the width of this character so we can handle // non-single-width characters properly. const width = utf8proc.charwidth(c); From 7ceff79ea94e9dba81c5268cc263364168b86c5c Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Sep 2022 09:47:35 -0700 Subject: [PATCH 3/7] various methods on Row are grapheme-aware and tested --- src/Window.zig | 10 +-- src/terminal/Screen.zig | 164 +++++++++++++++++++++++++++++++++++--- src/terminal/Terminal.zig | 39 ++++----- 3 files changed, 181 insertions(+), 32 deletions(-) diff --git a/src/Window.zig b/src/Window.zig index 03a7eff90..523909fea 100644 --- a/src/Window.zig +++ b/src/Window.zig @@ -1499,7 +1499,7 @@ pub fn eraseChars(self: *Window, count: usize) !void { } pub fn insertLines(self: *Window, count: usize) !void { - self.terminal.insertLines(count); + try self.terminal.insertLines(count); } pub fn insertBlanks(self: *Window, count: usize) !void { @@ -1507,7 +1507,7 @@ pub fn insertBlanks(self: *Window, count: usize) !void { } pub fn deleteLines(self: *Window, count: usize) !void { - self.terminal.deleteLines(count); + try self.terminal.deleteLines(count); } pub fn reverseIndex(self: *Window) !void { @@ -1663,7 +1663,7 @@ pub fn setCursorStyle( } pub fn decaln(self: *Window) !void { - self.terminal.decaln(); + try self.terminal.decaln(); } pub fn tabClear(self: *Window, cmd: terminal.TabClear) !void { @@ -1687,11 +1687,11 @@ pub fn enquiry(self: *Window) !void { } pub fn scrollDown(self: *Window, count: usize) !void { - self.terminal.scrollDown(count); + try self.terminal.scrollDown(count); } pub fn scrollUp(self: *Window, count: usize) !void { - self.terminal.scrollUp(count); + try self.terminal.scrollUp(count); } pub fn setActiveStatusDisplay( diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 8601b0f32..9f34b29ac 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -235,13 +235,32 @@ pub const Row = struct { /// Fill the entire row with a copy of a single cell. pub fn fill(self: Row, cell: Cell) void { - std.mem.set(StorageCell, self.storage[1..], .{ .cell = cell }); + self.fillSlice(cell, 0, self.storage.len - 1); } /// Fill a slice of a row. pub fn fillSlice(self: Row, cell: Cell, start: usize, len: usize) void { assert(len <= self.storage.len - 1); - std.mem.set(StorageCell, self.storage[start + 1 .. len + 1], .{ .cell = cell }); + assert(!cell.attrs.grapheme); // you can't fill with graphemes + + // If our row has no graphemes, then this is a fast copy + if (!self.storage[0].header.flags.grapheme) { + std.mem.set(StorageCell, self.storage[start + 1 .. len + 1], .{ .cell = cell }); + return; + } + + // We have graphemes, so we have to clear those first. + for (self.storage[start + 1 .. len + 1]) |*storage_cell, x| { + if (storage_cell.cell.attrs.grapheme) self.clearGraphemes(x); + storage_cell.* = .{ .cell = cell }; + } + + // We only reset the grapheme flag if we fill the whole row, for now. + // We can improve performance by more correctly setting this but I'm + // going to defer that until we can measure. + if (start == 0 and len == self.storage.len - 1) { + self.storage[0].header.flags.grapheme = false; + } } /// Get a single immutable cell. @@ -292,10 +311,42 @@ pub const Row = struct { try gop.value_ptr.append(self.screen.alloc, cp); } + /// Removes all graphemes associated with a cell. + pub fn clearGraphemes(self: Row, x: usize) void { + const cell = &self.storage[x + 1].cell; + const key = self.getId() + x + 1; + cell.attrs.grapheme = false; + _ = self.screen.graphemes.remove(key); + } + /// Copy the row src into this row. The row can be from another screen. - pub fn copyRow(self: Row, src: Row) void { + pub fn copyRow(self: Row, src: Row) !void { + // If we have graphemes, clear first to unset them. + if (self.storage[0].header.flags.grapheme) self.clear(.{}); + + // If the source has no graphemes (likely) then this is fast. const end = @minimum(src.storage.len, self.storage.len); - std.mem.copy(StorageCell, self.storage[1..], src.storage[1..end]); + if (!src.storage[0].header.flags.grapheme) { + std.mem.copy(StorageCell, self.storage[1..], src.storage[1..end]); + return; + } + + // Source has graphemes, this is slow. + for (src.storage[1..end]) |storage, x| { + self.storage[x + 1] = .{ .cell = storage.cell }; + + // Copy grapheme data if it exists + if (storage.cell.attrs.grapheme) { + const src_key = src.getId() + x + 1; + const src_data = src.screen.graphemes.get(src_key) orelse continue; + + const dst_key = self.getId() + x + 1; + const dst_gop = try self.screen.graphemes.getOrPut(self.screen.alloc, dst_key); + dst_gop.value_ptr.* = try src_data.copy(self.screen.alloc); + + self.storage[0].header.flags.grapheme = true; + } + } } /// Read-only iterator for the cells in the row. @@ -480,6 +531,14 @@ pub const GraphemeData = union(enum) { } } + pub fn copy(self: GraphemeData, alloc: Allocator) !GraphemeData { + // If we're not many we're not allocated so just copy on stack. + if (self != .many) return self; + + // Heap allocated + return GraphemeData{ .many = try alloc.dupe(u21, self.many) }; + } + test { log.warn("Grapheme={}", .{@sizeOf(GraphemeData)}); } @@ -650,12 +709,12 @@ pub fn getRow(self: *Screen, index: RowIndex) Row { } /// Copy the row at src to dst. -pub fn copyRow(self: *Screen, dst: RowIndex, src: RowIndex) void { +pub fn copyRow(self: *Screen, dst: RowIndex, src: RowIndex) !void { // One day we can make this more efficient but for now // we do the easy thing. const dst_row = self.getRow(dst); const src_row = self.getRow(src); - dst_row.copyRow(src_row); + try dst_row.copyRow(src_row); } /// Returns the offset into the storage buffer that the given row can @@ -1032,7 +1091,7 @@ pub fn resizeWithoutReflow(self: *Screen, rows: usize, cols: usize) !void { // Get this row const new_row = self.getRow(.{ .active = y }); - new_row.copyRow(old_row); + try new_row.copyRow(old_row); // Next row y += 1; @@ -1114,7 +1173,7 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { // Get this row var new_row = self.getRow(.{ .active = y }); - new_row.copyRow(old_row); + try new_row.copyRow(old_row); // We need to check if our cursor was on this line. If so, // we set the new cursor. @@ -1458,6 +1517,93 @@ pub fn testString(self: *Screen, alloc: Allocator, tag: RowIndexTag) ![]const u8 return try alloc.realloc(buf, str.len); } +test "Row: clear with graphemes" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + const row = s.getRow(.{ .active = 0 }); + try testing.expect(row.getId() > 0); + try testing.expectEqual(@as(usize, 5), row.lenCells()); + try testing.expect(!row.header().flags.grapheme); + + // Lets add a cell with a grapheme + { + const cell = row.getCellPtr(2); + cell.*.char = 'A'; + try row.attachGrapheme(2, 'B'); + try testing.expect(cell.attrs.grapheme); + try testing.expect(row.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 1); + } + + // Clear the row + row.clear(.{}); + try testing.expect(!row.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 0); +} + +test "Row: copy row with graphemes in destination" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Source row does NOT have graphemes + const row_src = s.getRow(.{ .active = 0 }); + { + const cell = row_src.getCellPtr(2); + cell.*.char = 'A'; + } + + // Destination has graphemes + const row = s.getRow(.{ .active = 1 }); + { + const cell = row.getCellPtr(1); + cell.*.char = 'B'; + try row.attachGrapheme(1, 'C'); + try testing.expect(cell.attrs.grapheme); + try testing.expect(row.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 1); + } + + // Copy + try row.copyRow(row_src); + try testing.expect(!row.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 0); +} + +test "Row: copy row with graphemes in source" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Source row does NOT have graphemes + const row_src = s.getRow(.{ .active = 0 }); + { + const cell = row_src.getCellPtr(2); + cell.*.char = 'A'; + try row_src.attachGrapheme(2, 'B'); + try testing.expect(cell.attrs.grapheme); + try testing.expect(row_src.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 1); + } + + // Destination has no graphemes + const row = s.getRow(.{ .active = 1 }); + try row.copyRow(row_src); + try testing.expect(row.header().flags.grapheme); + try testing.expect(s.graphemes.count() == 2); + + row_src.clear(.{}); + try testing.expect(s.graphemes.count() == 1); +} + test "Screen" { const testing = std.testing; const alloc = testing.allocator; @@ -1758,7 +1904,7 @@ test "Screen: row copy" { // Copy try s.scroll(.{ .delta = 1 }); - s.copyRow(.{ .active = 2 }, .{ .active = 0 }); + try s.copyRow(.{ .active = 2 }, .{ .active = 0 }); // Test our contents var contents = try s.testString(alloc, .viewport); diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index aeea76937..c5e7b6803 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -608,6 +608,9 @@ fn printCell(self: *Terminal, unmapped_c: u21) *Screen.Cell { } } + // If the prior value had graphemes, clear those + if (cell.attrs.grapheme) row.clearGraphemes(self.screen.cursor.x); + // Write cell.* = self.screen.cursor.pen; cell.char = @intCast(u32, c); @@ -640,7 +643,7 @@ fn clearWideSpacerHead(self: *Terminal) void { /// Resets all margins and fills the whole screen with the character 'E' /// /// Sets the cursor to the top left corner. -pub fn decaln(self: *Terminal) void { +pub fn decaln(self: *Terminal) !void { const tracy = trace(@src()); defer tracy.end(); @@ -654,7 +657,7 @@ pub fn decaln(self: *Terminal) void { var row: usize = 1; while (row < self.rows) : (row += 1) { - self.screen.getRow(.{ .active = row }).copyRow(filled); + try self.screen.getRow(.{ .active = row }).copyRow(filled); } } @@ -697,7 +700,7 @@ pub fn index(self: *Terminal) !void { try self.screen.scroll(.{ .delta = 1 }); } else { // TODO: test - self.scrollUp(1); + try self.scrollUp(1); } return; @@ -726,7 +729,7 @@ pub fn reverseIndex(self: *Terminal) !void { // TODO: scrolling region if (self.screen.cursor.y == 0) { - self.scrollDown(1); + try self.scrollDown(1); } else { self.screen.cursor.y -|= 1; } @@ -1128,7 +1131,7 @@ pub fn insertBlanks(self: *Terminal, count: usize) void { /// All cleared space is colored according to the current SGR state. /// /// Moves the cursor to the left margin. -pub fn insertLines(self: *Terminal, count: usize) void { +pub fn insertLines(self: *Terminal, count: usize) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1149,7 +1152,7 @@ pub fn insertLines(self: *Terminal, count: usize) void { // Ensure we have the lines populated to the end while (y > top) : (y -= 1) { - self.screen.copyRow(.{ .active = y }, .{ .active = y - adjusted_count }); + try self.screen.copyRow(.{ .active = y }, .{ .active = y - adjusted_count }); } // Insert count blank lines @@ -1176,7 +1179,7 @@ pub fn insertLines(self: *Terminal, count: usize) void { /// cleared space is colored according to the current SGR state. /// /// Moves the cursor to the left margin. -pub fn deleteLines(self: *Terminal, count: usize) void { +pub fn deleteLines(self: *Terminal, count: usize) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1194,7 +1197,7 @@ pub fn deleteLines(self: *Terminal, count: usize) void { // Scroll up the count amount. var y: usize = self.screen.cursor.y; while (y <= self.scrolling_region.bottom - adjusted_count) : (y += 1) { - self.screen.copyRow(.{ .active = y }, .{ .active = y + adjusted_count }); + try self.screen.copyRow(.{ .active = y }, .{ .active = y + adjusted_count }); } while (y <= self.scrolling_region.bottom) : (y += 1) { @@ -1205,7 +1208,7 @@ pub fn deleteLines(self: *Terminal, count: usize) void { /// Scroll the text down by one row. /// TODO: test -pub fn scrollDown(self: *Terminal, count: usize) void { +pub fn scrollDown(self: *Terminal, count: usize) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1215,7 +1218,7 @@ pub fn scrollDown(self: *Terminal, count: usize) void { // Move to the top of the scroll region self.screen.cursor.y = self.scrolling_region.top; - self.insertLines(count); + try self.insertLines(count); } /// Removes amount lines from the top of the scroll region. The remaining lines @@ -1226,14 +1229,14 @@ pub fn scrollDown(self: *Terminal, count: usize) void { /// /// Does not change the (absolute) cursor position. // TODO: test -pub fn scrollUp(self: *Terminal, count: usize) void { +pub fn scrollUp(self: *Terminal, count: usize) !void { // Preserve the cursor const cursor = self.screen.cursor; defer self.screen.cursor = cursor; // Move to the top of the scroll region self.screen.cursor.y = self.scrolling_region.top; - self.deleteLines(count); + try self.deleteLines(count); } /// Options for scrolling the viewport of the terminal grid. @@ -1597,7 +1600,7 @@ test "Terminal: deleteLines" { try t.print('D'); t.cursorUp(2); - t.deleteLines(1); + try t.deleteLines(1); try t.print('E'); t.carriageReturn(); @@ -1633,7 +1636,7 @@ test "Terminal: deleteLines with scroll region" { t.setScrollingRegion(1, 3); t.setCursorPos(1, 1); - t.deleteLines(1); + try t.deleteLines(1); try t.print('E'); t.carriageReturn(); @@ -1674,7 +1677,7 @@ test "Terminal: insertLines" { t.setCursorPos(2, 1); // Insert two lines - t.insertLines(2); + try t.insertLines(2); { var str = try t.plainString(testing.allocator); @@ -1705,7 +1708,7 @@ test "Terminal: insertLines with scroll region" { t.setScrollingRegion(1, 2); t.setCursorPos(1, 1); - t.insertLines(1); + try t.insertLines(1); try t.print('X'); @@ -1740,7 +1743,7 @@ test "Terminal: insertLines more than remaining" { t.setCursorPos(2, 1); // Insert a bunch of lines - t.insertLines(20); + try t.insertLines(20); { var str = try t.plainString(testing.allocator); @@ -1881,7 +1884,7 @@ test "Terminal: DECALN" { t.carriageReturn(); try t.linefeed(); try t.print('B'); - t.decaln(); + try t.decaln(); try testing.expectEqual(@as(usize, 0), t.screen.cursor.y); try testing.expectEqual(@as(usize, 0), t.screen.cursor.x); From 47242ad12a8f7a567b72ef854c2120c27759313b Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Sep 2022 10:29:52 -0700 Subject: [PATCH 4/7] test writing graphemes --- src/font/Shaper.zig | 9 +++ src/terminal/Screen.zig | 135 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/font/Shaper.zig b/src/font/Shaper.zig index 26dfc9a7d..91e511f0b 100644 --- a/src/font/Shaper.zig +++ b/src/font/Shaper.zig @@ -107,6 +107,15 @@ pub const RunIterator = struct { // Continue with our run self.shaper.hb_buf.add(cell.char, @intCast(u32, j)); + + // If this cell is part of a grapheme cluster, add all the grapheme + // data points. + if (cell.attrs.grapheme) { + var it = self.row.codepointIterator(j); + while (it.next()) |cp| { + self.shaper.hb_buf.add(cp, @intCast(u32, j)); + } + } } // Finalize our buffer diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 9f34b29ac..d50c67d33 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -353,6 +353,18 @@ pub const Row = struct { pub fn cellIterator(self: Row) CellIterator { return .{ .row = self }; } + + /// Read-only iterator for the grapheme codepoints in a cell. This only + /// iterates over the EXTRA GRAPHEME codepoints and not the primary + /// codepoint in cell.char. + pub fn codepointIterator(self: Row, x: usize) CodepointIterator { + const cell = &self.storage[x + 1].cell; + assert(cell.attrs.grapheme); + + const key = self.getId() + x + 1; + const data = self.screen.graphemes.get(key).?; + return .{ .data = data }; + } }; /// Used to iterate through the rows of a specific region. @@ -384,6 +396,47 @@ pub const CellIterator = struct { } }; +/// Used to iterate through the codepoints of a cell. This only iterates +/// over the extra grapheme codepoints and not the primary codepoint. +pub const CodepointIterator = struct { + data: GraphemeData, + i: usize = 0, + + pub fn next(self: *CodepointIterator) ?u21 { + switch (self.data) { + .one => |v| { + if (self.i >= 1) return null; + self.i += 1; + return v; + }, + + .two => |v| { + if (self.i >= v.len) return null; + defer self.i += 1; + return v[self.i]; + }, + + .three => |v| { + if (self.i >= v.len) return null; + defer self.i += 1; + return v[self.i]; + }, + + .four => |v| { + if (self.i >= v.len) return null; + defer self.i += 1; + return v[self.i]; + }, + + .many => |v| { + if (self.i >= v.len) return null; + defer self.i += 1; + return v[self.i]; + }, + } + } +}; + /// RowIndex represents a row within the screen. There are various meanings /// of a row index and this union represents the available types. For example, /// when talking about row "0" you may want the first row in the viewport, @@ -864,6 +917,19 @@ fn scrollDelta(self: *Screen, delta: isize, grow: bool) !void { // If we can't fit our rows into our capacity, we delete some scrollback. const rows_deleted = if (rows_final > self.rowsCapacity()) deleted: { const rows_to_delete = rows_final - self.rowsCapacity(); + + // Fast-path: we have no graphemes. + // Slow-path: we have graphemes, we have to check each row + // we're going to delete to see if they contain graphemes and + // clear the ones that do so we clear memory properly. + if (self.graphemes.count() > 0) { + var y: usize = 0; + while (y < rows_to_delete) : (y += 1) { + const row = self.getRow(.{ .active = y }); + if (row.storage[0].header.flags.grapheme) row.clear(.{}); + } + } + self.viewport -= rows_to_delete; self.storage.deleteOldest(rows_to_delete * (self.cols + 1)); break :deleted rows_to_delete; @@ -1403,8 +1469,13 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { /// each row. If a line is longer than the available columns, soft-wrapping /// will occur. This will automatically handle basic wide chars. pub fn testWriteString(self: *Screen, text: []const u8) !void { - var y: usize = 0; - var x: usize = 0; + var y: usize = self.cursor.y; + var x: usize = self.cursor.x; + + var grapheme: struct { + x: usize = 0, + cell: ?*Cell = null, + } = .{}; const view = std.unicode.Utf8View.init(text) catch unreachable; var iter = view.iterator(); @@ -1413,6 +1484,7 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { if (c == '\n') { y += 1; x = 0; + grapheme = .{}; continue; } @@ -1425,6 +1497,32 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { // Get our row var row = self.getRow(.{ .active = y }); + if (grapheme.cell) |prev_cell| { + const grapheme_break = brk: { + var state: i32 = 0; + var cp1 = @intCast(u21, prev_cell.char); + if (prev_cell.attrs.grapheme) { + var it = row.codepointIterator(grapheme.x); + while (it.next()) |cp2| { + assert(!utf8proc.graphemeBreakStateful( + cp1, + cp2, + &state, + )); + + cp1 = cp2; + } + } + + break :brk utf8proc.graphemeBreakStateful(cp1, c, &state); + }; + + if (!grapheme_break) { + try row.attachGrapheme(grapheme.x, c); + continue; + } + } + // If we're writing past the end, we need to soft wrap. if (x == self.cols) { row.setWrapped(true); @@ -1444,6 +1542,9 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { 1 => { const cell = row.getCellPtr(x); cell.char = @intCast(u32, c); + + grapheme.x = x; + grapheme.cell = cell; }, 2 => { @@ -1467,6 +1568,9 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { const cell = row.getCellPtr(x); cell.char = @intCast(u32, c); cell.attrs.wide = true; + + grapheme.x = x; + grapheme.cell = cell; } { @@ -1482,6 +1586,10 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { x += 1; } + + // So the cursor doesn't go off screen + self.cursor.x = @minimum(x, self.cols - 1); + self.cursor.y = y; } /// Turns the screen into a string. Different regions of the screen can @@ -1645,6 +1753,25 @@ test "Screen" { } } +test "Screen: write graphemes" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Sanity check that our test helpers work + var buf: [32]u8 = undefined; + var buf_idx: usize = 0; + buf_idx += try std.unicode.utf8Encode(0x1F44D, buf[buf_idx..]); // Thumbs up plain + buf_idx += try std.unicode.utf8Encode(0x1F44D, buf[buf_idx..]); // Thumbs up plain + buf_idx += try std.unicode.utf8Encode(0x1F3FD, buf[buf_idx..]); // Medium skin tone + + try s.testWriteString(buf[0..buf_idx]); + try testing.expect(s.rowsWritten() == 1); + try testing.expectEqual(@as(usize, 4), s.cursor.x); +} + test "Screen: scrolling" { const testing = std.testing; const alloc = testing.allocator; @@ -2512,6 +2639,8 @@ test "Screen: resize less rows no scrollback" { defer s.deinit(); const str = "1ABCD\n2EFGH\n3IJKL"; try s.testWriteString(str); + s.cursor.x = 0; + s.cursor.y = 0; const cursor = s.cursor; try s.resize(1, 5); @@ -2629,6 +2758,8 @@ test "Screen: resize less cols no reflow" { defer s.deinit(); const str = "1AB\n2EF\n3IJ"; try s.testWriteString(str); + s.cursor.x = 0; + s.cursor.y = 0; const cursor = s.cursor; try s.resize(3, 3); From 802f230fa2d2c879eeb7754e21080a86c956f2d1 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Sep 2022 10:35:22 -0700 Subject: [PATCH 5/7] handle graphemes with more than 1 joiner --- src/terminal/Screen.zig | 1 + src/terminal/Terminal.zig | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index d50c67d33..158b5b18c 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -1497,6 +1497,7 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void { // Get our row var row = self.getRow(.{ .active = y }); + // If we have a previous cell, we check if we're part of a grapheme. if (grapheme.cell) |prev_cell| { const grapheme_break = brk: { var state: i32 = 0; diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index c5e7b6803..d42fb1d29 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -474,22 +474,21 @@ pub fn print(self: *Terminal, c: u21) !void { }; }; - var state: i32 = 0; - const grapheme_break = if (!prev.cell.attrs.grapheme) - utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), c, &state) - else brk: { - // We need to rebuild the state by processing the grapheme breaks - // for all the codepoints up to this point. This MUST exist because - // grapheme is only true iff this exists. - const points = self.screen.graphemes.getEntry(row.getId() + prev.x + 1).?; - const cp1 = switch (points.value_ptr.*) { - .one => |v| one: { - assert(!utf8proc.graphemeBreakStateful(@intCast(u21, prev.cell.char), v, &state)); - break :one v; - }, + const grapheme_break = brk: { + var state: i32 = 0; + var cp1 = @intCast(u21, prev.cell.char); + if (prev.cell.attrs.grapheme) { + var it = row.codepointIterator(prev.x); + while (it.next()) |cp2| { + assert(!utf8proc.graphemeBreakStateful( + cp1, + cp2, + &state, + )); - else => @panic("NO"), - }; + cp1 = cp2; + } + } break :brk utf8proc.graphemeBreakStateful(cp1, c, &state); }; From 8ef31aaaf988a9f246f6213255c4ce338ec048c4 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Sep 2022 11:22:29 -0700 Subject: [PATCH 6/7] Disable --- src/terminal/Terminal.zig | 7 ++++++- src/terminal/stream.zig | 4 +--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index d42fb1d29..97657ae92 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -455,7 +455,12 @@ pub fn print(self: *Terminal, c: u21) !void { // extremely fast and we take this much slower path for graphemes. No hate // on graphemes, I'd love to make them much faster, but I wanted to focus // on correctness first. - if (c > 255 and self.screen.cursor.x > 0) { + // + // NOTE: This is disabled because no shells handle this correctly. We'll + // need to work with shells and other emulators to probably figure out + // a way to support this. In the mean time, I'm going to keep all the + // grapheme detection and keep it up to date so we're ready to go. + if (false and c > 255 and self.screen.cursor.x > 0) { // TODO: test this! const row = self.screen.getRow(.{ .active = self.screen.cursor.y }); diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index e554c59fe..e1904a70e 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -51,9 +51,7 @@ pub fn Stream(comptime Handler: type) type { const actions = self.parser.next(c); for (actions) |action_opt| { // if (action_opt) |action| { - // if (action != .print) { - // log.info("action: {}", .{action}); - // } + // log.info("action: {}", .{action}); // } switch (action_opt orelse continue) { .print => |p| if (@hasDecl(T, "print")) try self.handler.print(p), From 9e8d00f140affab6b30f4f7d91ec90e5c524c8ce Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Sep 2022 11:32:14 -0700 Subject: [PATCH 7/7] attach zero-width joiners to cells --- src/terminal/Terminal.zig | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index 97657ae92..1eae05331 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -512,9 +512,20 @@ pub fn print(self: *Terminal, c: u21) !void { const width = utf8proc.charwidth(c); assert(width <= 2); - // For now, we ignore zero-width characters. When we support ligatures, - // this will have to change. - if (width == 0) return; + // Attach zero-width characters to our cell as grapheme data. + if (width == 0) { + // Find our previous cell + const row = self.screen.getRow(.{ .active = self.screen.cursor.y }); + const prev: usize = prev: { + const x = self.screen.cursor.x - 1; + const immediate = row.getCellPtr(x); + if (!immediate.attrs.wide_spacer_tail) break :prev x; + break :prev x - 1; + }; + + try row.attachGrapheme(prev, c); + return; + } // If we're soft-wrapping, then handle that first. if (self.screen.cursor.pending_wrap and self.modes.autowrap)