terminal: move UTF8 encoding to Page and wrap around it

2025-07-14 15:56:13 +03:00 · 2024-11-09 09:37:03 -08:00
parent 50dc4b75d7
commit bcefbfd7b4
3 changed files with 147 additions and 83 deletions
--- a/src/terminal/PageList.zig
+++ b/src/terminal/PageList.zig
@ -2544,6 +2544,44 @@ pub fn getCell(self: *const PageList, pt: point.Point) ?Cell {
    };
 }

+pub const EncodeUtf8Options = struct {
+    /// The start and end points of the dump, both inclusive. The x will
+    /// be ignored and the full row will always be dumped.
+    tl: Pin,
+    br: ?Pin = null,
+
+    /// If true, this will unwrap soft-wrapped lines. If false, this will
+    /// dump the screen as it is visually seen in a rendered window.
+    unwrap: bool = true,
+};
+
+/// Encode the pagelist to utf8 to the given writer.
+///
+/// The writer should be buffered; this function does not attempt to
+/// efficiently write and often writes one byte at a time.
+///
+/// Note: this is tested using Screen.dumpString. This is a function that
+/// predates this and is a thin wrapper around it so the tests all live there.
+pub fn encodeUtf8(
+    self: *const PageList,
+    writer: anytype,
+    opts: EncodeUtf8Options,
+) anyerror!void {
+    // We don't currently use self at all. There is an argument that this
+    // function should live on Pin instead but there is some future we might
+    // need state on here so... letting it go.
+    _ = self;
+
+    var page_opts: Page.EncodeUtf8Options = .{ .unwrap = opts.unwrap };
+    var iter = opts.tl.pageIterator(.right_down, opts.br);
+    while (iter.next()) |chunk| {
+        const page: *const Page = &chunk.node.data;
+        page_opts.start_y = chunk.start;
+        page_opts.end_y = chunk.end;
+        page_opts.preceding = try page.encodeUtf8(writer, page_opts);
+    }
+}
+
 /// Log a debug diagram of the page list to the provided writer.
 ///
 /// EXAMPLE:
--- a/src/terminal/Screen.zig
+++ b/src/terminal/Screen.zig
@ -2731,95 +2731,15 @@ pub fn promptPath(
    return .{ .x = to_x - from_x, .y = to_y - from_y };
 }

-pub const DumpString = struct {
-    /// The start and end points of the dump, both inclusive. The x will
-    /// be ignored and the full row will always be dumped.
-    tl: Pin,
-    br: ?Pin = null,
-
-    /// If true, this will unwrap soft-wrapped lines. If false, this will
-    /// dump the screen as it is visually seen in a rendered window.
-    unwrap: bool = true,
-};
-
 /// Dump the screen to a string. The writer given should be buffered;
 /// this function does not attempt to efficiently write and generally writes
 /// one byte at a time.
 pub fn dumpString(
    self: *const Screen,
    writer: anytype,
-    opts: DumpString,
-) !void {
-    var blank_rows: usize = 0;
-    var blank_cells: usize = 0;
-
-    var iter = opts.tl.rowIterator(.right_down, opts.br);
-    while (iter.next()) |row_offset| {
-        const rac = row_offset.rowAndCell();
-        const row = rac.row;
-        const cells = cells: {
-            const cells: [*]pagepkg.Cell = @ptrCast(rac.cell);
-            break :cells cells[0..self.pages.cols];
-        };
-
-        if (!pagepkg.Cell.hasTextAny(cells)) {
-            blank_rows += 1;
-            continue;
-        }
-        if (blank_rows > 0) {
-            for (0..blank_rows) |_| try writer.writeByte('\n');
-            blank_rows = 0;
-        }
-
-        if (!row.wrap or !opts.unwrap) {
-            // If we're not wrapped, we always add a newline.
-            // If we are wrapped, we only add a new line if we're unwrapping
-            // soft-wrapped lines.
-            blank_rows += 1;
-        }
-
-        if (!row.wrap_continuation or !opts.unwrap) {
-            // We should also reset blank cell counts at the start of each row
-            // unless we're unwrapping and this row is a wrap continuation.
-            blank_cells = 0;
-        }
-
-        for (cells) |*cell| {
-            // Skip spacers
-            switch (cell.wide) {
-                .narrow, .wide => {},
-                .spacer_head, .spacer_tail => continue,
-            }
-
-            // If we have a zero value, then we accumulate a counter. We
-            // only want to turn zero values into spaces if we have a non-zero
-            // char sometime later.
-            if (!cell.hasText()) {
-                blank_cells += 1;
-                continue;
-            }
-            if (blank_cells > 0) {
-                try writer.writeByteNTimes(' ', blank_cells);
-                blank_cells = 0;
-            }
-
-            switch (cell.content_tag) {
-                .codepoint => {
-                    try writer.print("{u}", .{cell.content.codepoint});
-                },
-
-                .codepoint_grapheme => {
-                    try writer.print("{u}", .{cell.content.codepoint});
-                    const cps = row_offset.node.data.lookupGrapheme(cell).?;
-                    for (cps) |cp| {
-                        try writer.print("{u}", .{cp});
-                    }
-                },
-
-                else => unreachable,
-            }
-        }
-    }
+    opts: PageList.EncodeUtf8Options,
+) anyerror!void {
+    try self.pages.encodeUtf8(writer, opts);
 }

 /// You should use dumpString, this is a restricted version mostly for
--- a/src/terminal/page.zig
+++ b/src/terminal/page.zig
@ -1481,6 +1481,112 @@ pub const Page = struct {
        return self.grapheme_map.map(self.memory).capacity();
    }

+    /// Options for encoding the page as UTF-8.
+    pub const EncodeUtf8Options = struct {
+        /// The range of rows to encode. If end_y is null, then it will
+        /// encode to the end of the page.
+        start_y: size.CellCountInt = 0,
+        end_y: ?size.CellCountInt = null,
+
+        /// If true, this will unwrap soft-wrapped lines. If false, this will
+        /// dump the screen as it is visually seen in a rendered window.
+        unwrap: bool = true,
+
+        /// Preceding state from encoding the prior page. Used to preserve
+        /// blanks properly across multiple pages.
+        preceding: TrailingUtf8State = .{},
+
+        /// Trailing state for UTF-8 encoding.
+        pub const TrailingUtf8State = struct {
+            rows: usize = 0,
+            cells: usize = 0,
+        };
+    };
+
+    /// Encode the page contents as UTF-8.
+    ///
+    /// If preceding is non-null, then it will be used to initialize our
+    /// blank rows/cells count so that we can accumulate blanks across
+    /// multiple pages.
+    ///
+    /// Note: The tests for this function are done via Screen.dumpString
+    /// tests since that function is a thin wrapper around this one and
+    /// it makes it easier to test input contents.
+    pub fn encodeUtf8(
+        self: *const Page,
+        writer: anytype,
+        opts: EncodeUtf8Options,
+    ) anyerror!EncodeUtf8Options.TrailingUtf8State {
+        var blank_rows: usize = opts.preceding.rows;
+        var blank_cells: usize = opts.preceding.cells;
+
+        const start_y: size.CellCountInt = opts.start_y;
+        const end_y: size.CellCountInt = opts.end_y orelse self.size.rows;
+        for (start_y..end_y) |y| {
+            const row: *Row = self.getRow(y);
+            const cells: []const Cell = self.getCells(row);
+
+            // If this row is blank, accumulate to avoid a bunch of extra
+            // work later. If it isn't blank, make sure we dump all our
+            // blanks.
+            if (!Cell.hasTextAny(cells)) {
+                blank_rows += 1;
+                continue;
+            }
+            for (0..blank_rows) |_| try writer.writeByte('\n');
+            blank_rows = 0;
+
+            // If we're not wrapped, we always add a newline so after
+            // the row is printed we can add a newline.
+            if (!row.wrap or !opts.unwrap) blank_rows += 1;
+
+            // If the row doesn't continue a wrap then we need to reset
+            // our blank cell count.
+            if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0;
+
+            // Go through each cell and print it
+            for (cells) |*cell| {
+                // Skip spacers
+                switch (cell.wide) {
+                    .narrow, .wide => {},
+                    .spacer_head, .spacer_tail => continue,
+                }
+
+                // If we have a zero value, then we accumulate a counter. We
+                // only want to turn zero values into spaces if we have a non-zero
+                // char sometime later.
+                if (!cell.hasText()) {
+                    blank_cells += 1;
+                    continue;
+                }
+                if (blank_cells > 0) {
+                    try writer.writeByteNTimes(' ', blank_cells);
+                    blank_cells = 0;
+                }
+
+                switch (cell.content_tag) {
+                    .codepoint => {
+                        try writer.print("{u}", .{cell.content.codepoint});
+                    },
+
+                    .codepoint_grapheme => {
+                        try writer.print("{u}", .{cell.content.codepoint});
+                        for (self.lookupGrapheme(cell).?) |cp| {
+                            try writer.print("{u}", .{cp});
+                        }
+                    },
+
+                    // Unreachable since we do hasText() above
+                    .bg_color_palette,
+                    .bg_color_rgb,
+                    => unreachable,
+                }
+            }
+        }
+
+        return .{ .rows = blank_rows, .cells = blank_cells };
+    }
+
    /// Returns the bitset for the dirty bits on this page.
    ///
    /// The returned value is a DynamicBitSetUnmanaged but it is NOT