From bcefbfd7b42dcb6389c738e0ebad95b0fc3c16cc Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 9 Nov 2024 09:37:03 -0800 Subject: [PATCH] terminal: move UTF8 encoding to Page and wrap around it --- src/terminal/PageList.zig | 38 ++++++++++++++ src/terminal/Screen.zig | 86 ++----------------------------- src/terminal/page.zig | 106 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 83 deletions(-) diff --git a/src/terminal/PageList.zig b/src/terminal/PageList.zig index 01e7ed71d..175e3f64f 100644 --- a/src/terminal/PageList.zig +++ b/src/terminal/PageList.zig @@ -2544,6 +2544,44 @@ pub fn getCell(self: *const PageList, pt: point.Point) ?Cell { }; } +pub const EncodeUtf8Options = struct { + /// The start and end points of the dump, both inclusive. The x will + /// be ignored and the full row will always be dumped. + tl: Pin, + br: ?Pin = null, + + /// If true, this will unwrap soft-wrapped lines. If false, this will + /// dump the screen as it is visually seen in a rendered window. + unwrap: bool = true, +}; + +/// Encode the pagelist to utf8 to the given writer. +/// +/// The writer should be buffered; this function does not attempt to +/// efficiently write and often writes one byte at a time. +/// +/// Note: this is tested using Screen.dumpString. This is a function that +/// predates this and is a thin wrapper around it so the tests all live there. +pub fn encodeUtf8( + self: *const PageList, + writer: anytype, + opts: EncodeUtf8Options, +) anyerror!void { + // We don't currently use self at all. There is an argument that this + // function should live on Pin instead but there is some future we might + // need state on here so... letting it go. + _ = self; + + var page_opts: Page.EncodeUtf8Options = .{ .unwrap = opts.unwrap }; + var iter = opts.tl.pageIterator(.right_down, opts.br); + while (iter.next()) |chunk| { + const page: *const Page = &chunk.node.data; + page_opts.start_y = chunk.start; + page_opts.end_y = chunk.end; + page_opts.preceding = try page.encodeUtf8(writer, page_opts); + } +} + /// Log a debug diagram of the page list to the provided writer. /// /// EXAMPLE: diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index d8787487f..bf63e7e05 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -2731,95 +2731,15 @@ pub fn promptPath( return .{ .x = to_x - from_x, .y = to_y - from_y }; } -pub const DumpString = struct { - /// The start and end points of the dump, both inclusive. The x will - /// be ignored and the full row will always be dumped. - tl: Pin, - br: ?Pin = null, - - /// If true, this will unwrap soft-wrapped lines. If false, this will - /// dump the screen as it is visually seen in a rendered window. - unwrap: bool = true, -}; - /// Dump the screen to a string. The writer given should be buffered; /// this function does not attempt to efficiently write and generally writes /// one byte at a time. pub fn dumpString( self: *const Screen, writer: anytype, - opts: DumpString, -) !void { - var blank_rows: usize = 0; - var blank_cells: usize = 0; - - var iter = opts.tl.rowIterator(.right_down, opts.br); - while (iter.next()) |row_offset| { - const rac = row_offset.rowAndCell(); - const row = rac.row; - const cells = cells: { - const cells: [*]pagepkg.Cell = @ptrCast(rac.cell); - break :cells cells[0..self.pages.cols]; - }; - - if (!pagepkg.Cell.hasTextAny(cells)) { - blank_rows += 1; - continue; - } - if (blank_rows > 0) { - for (0..blank_rows) |_| try writer.writeByte('\n'); - blank_rows = 0; - } - - if (!row.wrap or !opts.unwrap) { - // If we're not wrapped, we always add a newline. - // If we are wrapped, we only add a new line if we're unwrapping - // soft-wrapped lines. - blank_rows += 1; - } - - if (!row.wrap_continuation or !opts.unwrap) { - // We should also reset blank cell counts at the start of each row - // unless we're unwrapping and this row is a wrap continuation. - blank_cells = 0; - } - - for (cells) |*cell| { - // Skip spacers - switch (cell.wide) { - .narrow, .wide => {}, - .spacer_head, .spacer_tail => continue, - } - - // If we have a zero value, then we accumulate a counter. We - // only want to turn zero values into spaces if we have a non-zero - // char sometime later. - if (!cell.hasText()) { - blank_cells += 1; - continue; - } - if (blank_cells > 0) { - try writer.writeByteNTimes(' ', blank_cells); - blank_cells = 0; - } - - switch (cell.content_tag) { - .codepoint => { - try writer.print("{u}", .{cell.content.codepoint}); - }, - - .codepoint_grapheme => { - try writer.print("{u}", .{cell.content.codepoint}); - const cps = row_offset.node.data.lookupGrapheme(cell).?; - for (cps) |cp| { - try writer.print("{u}", .{cp}); - } - }, - - else => unreachable, - } - } - } + opts: PageList.EncodeUtf8Options, +) anyerror!void { + try self.pages.encodeUtf8(writer, opts); } /// You should use dumpString, this is a restricted version mostly for diff --git a/src/terminal/page.zig b/src/terminal/page.zig index 8c470d726..d41f37e8d 100644 --- a/src/terminal/page.zig +++ b/src/terminal/page.zig @@ -1481,6 +1481,112 @@ pub const Page = struct { return self.grapheme_map.map(self.memory).capacity(); } + /// Options for encoding the page as UTF-8. + pub const EncodeUtf8Options = struct { + /// The range of rows to encode. If end_y is null, then it will + /// encode to the end of the page. + start_y: size.CellCountInt = 0, + end_y: ?size.CellCountInt = null, + + /// If true, this will unwrap soft-wrapped lines. If false, this will + /// dump the screen as it is visually seen in a rendered window. + unwrap: bool = true, + + /// Preceding state from encoding the prior page. Used to preserve + /// blanks properly across multiple pages. + preceding: TrailingUtf8State = .{}, + + /// Trailing state for UTF-8 encoding. + pub const TrailingUtf8State = struct { + rows: usize = 0, + cells: usize = 0, + }; + }; + + /// Encode the page contents as UTF-8. + /// + /// If preceding is non-null, then it will be used to initialize our + /// blank rows/cells count so that we can accumulate blanks across + /// multiple pages. + /// + /// Note: The tests for this function are done via Screen.dumpString + /// tests since that function is a thin wrapper around this one and + /// it makes it easier to test input contents. + pub fn encodeUtf8( + self: *const Page, + writer: anytype, + opts: EncodeUtf8Options, + ) anyerror!EncodeUtf8Options.TrailingUtf8State { + var blank_rows: usize = opts.preceding.rows; + var blank_cells: usize = opts.preceding.cells; + + const start_y: size.CellCountInt = opts.start_y; + const end_y: size.CellCountInt = opts.end_y orelse self.size.rows; + for (start_y..end_y) |y| { + const row: *Row = self.getRow(y); + const cells: []const Cell = self.getCells(row); + + // If this row is blank, accumulate to avoid a bunch of extra + // work later. If it isn't blank, make sure we dump all our + // blanks. + if (!Cell.hasTextAny(cells)) { + blank_rows += 1; + continue; + } + for (0..blank_rows) |_| try writer.writeByte('\n'); + blank_rows = 0; + + // If we're not wrapped, we always add a newline so after + // the row is printed we can add a newline. + if (!row.wrap or !opts.unwrap) blank_rows += 1; + + // If the row doesn't continue a wrap then we need to reset + // our blank cell count. + if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0; + + // Go through each cell and print it + for (cells) |*cell| { + // Skip spacers + switch (cell.wide) { + .narrow, .wide => {}, + .spacer_head, .spacer_tail => continue, + } + + // If we have a zero value, then we accumulate a counter. We + // only want to turn zero values into spaces if we have a non-zero + // char sometime later. + if (!cell.hasText()) { + blank_cells += 1; + continue; + } + if (blank_cells > 0) { + try writer.writeByteNTimes(' ', blank_cells); + blank_cells = 0; + } + + switch (cell.content_tag) { + .codepoint => { + try writer.print("{u}", .{cell.content.codepoint}); + }, + + .codepoint_grapheme => { + try writer.print("{u}", .{cell.content.codepoint}); + for (self.lookupGrapheme(cell).?) |cp| { + try writer.print("{u}", .{cp}); + } + }, + + // Unreachable since we do hasText() above + .bg_color_palette, + .bg_color_rgb, + => unreachable, + } + } + } + + return .{ .rows = blank_rows, .cells = blank_cells }; + } + /// Returns the bitset for the dirty bits on this page. /// /// The returned value is a DynamicBitSetUnmanaged but it is NOT