terminal: move UTF8 encoding to Page and wrap around it

This commit is contained in:
Mitchell Hashimoto
2024-11-09 09:37:03 -08:00
parent 50dc4b75d7
commit bcefbfd7b4
3 changed files with 147 additions and 83 deletions

View File

@ -2544,6 +2544,44 @@ pub fn getCell(self: *const PageList, pt: point.Point) ?Cell {
};
}
pub const EncodeUtf8Options = struct {
/// The start and end points of the dump, both inclusive. The x will
/// be ignored and the full row will always be dumped.
tl: Pin,
br: ?Pin = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
};
/// Encode the pagelist to utf8 to the given writer.
///
/// The writer should be buffered; this function does not attempt to
/// efficiently write and often writes one byte at a time.
///
/// Note: this is tested using Screen.dumpString. This is a function that
/// predates this and is a thin wrapper around it so the tests all live there.
pub fn encodeUtf8(
self: *const PageList,
writer: anytype,
opts: EncodeUtf8Options,
) anyerror!void {
// We don't currently use self at all. There is an argument that this
// function should live on Pin instead but there is some future we might
// need state on here so... letting it go.
_ = self;
var page_opts: Page.EncodeUtf8Options = .{ .unwrap = opts.unwrap };
var iter = opts.tl.pageIterator(.right_down, opts.br);
while (iter.next()) |chunk| {
const page: *const Page = &chunk.node.data;
page_opts.start_y = chunk.start;
page_opts.end_y = chunk.end;
page_opts.preceding = try page.encodeUtf8(writer, page_opts);
}
}
/// Log a debug diagram of the page list to the provided writer.
///
/// EXAMPLE:

View File

@ -2731,95 +2731,15 @@ pub fn promptPath(
return .{ .x = to_x - from_x, .y = to_y - from_y };
}
pub const DumpString = struct {
/// The start and end points of the dump, both inclusive. The x will
/// be ignored and the full row will always be dumped.
tl: Pin,
br: ?Pin = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
};
/// Dump the screen to a string. The writer given should be buffered;
/// this function does not attempt to efficiently write and generally writes
/// one byte at a time.
pub fn dumpString(
self: *const Screen,
writer: anytype,
opts: DumpString,
) !void {
var blank_rows: usize = 0;
var blank_cells: usize = 0;
var iter = opts.tl.rowIterator(.right_down, opts.br);
while (iter.next()) |row_offset| {
const rac = row_offset.rowAndCell();
const row = rac.row;
const cells = cells: {
const cells: [*]pagepkg.Cell = @ptrCast(rac.cell);
break :cells cells[0..self.pages.cols];
};
if (!pagepkg.Cell.hasTextAny(cells)) {
blank_rows += 1;
continue;
}
if (blank_rows > 0) {
for (0..blank_rows) |_| try writer.writeByte('\n');
blank_rows = 0;
}
if (!row.wrap or !opts.unwrap) {
// If we're not wrapped, we always add a newline.
// If we are wrapped, we only add a new line if we're unwrapping
// soft-wrapped lines.
blank_rows += 1;
}
if (!row.wrap_continuation or !opts.unwrap) {
// We should also reset blank cell counts at the start of each row
// unless we're unwrapping and this row is a wrap continuation.
blank_cells = 0;
}
for (cells) |*cell| {
// Skip spacers
switch (cell.wide) {
.narrow, .wide => {},
.spacer_head, .spacer_tail => continue,
}
// If we have a zero value, then we accumulate a counter. We
// only want to turn zero values into spaces if we have a non-zero
// char sometime later.
if (!cell.hasText()) {
blank_cells += 1;
continue;
}
if (blank_cells > 0) {
try writer.writeByteNTimes(' ', blank_cells);
blank_cells = 0;
}
switch (cell.content_tag) {
.codepoint => {
try writer.print("{u}", .{cell.content.codepoint});
},
.codepoint_grapheme => {
try writer.print("{u}", .{cell.content.codepoint});
const cps = row_offset.node.data.lookupGrapheme(cell).?;
for (cps) |cp| {
try writer.print("{u}", .{cp});
}
},
else => unreachable,
}
}
}
opts: PageList.EncodeUtf8Options,
) anyerror!void {
try self.pages.encodeUtf8(writer, opts);
}
/// You should use dumpString, this is a restricted version mostly for

View File

@ -1481,6 +1481,112 @@ pub const Page = struct {
return self.grapheme_map.map(self.memory).capacity();
}
/// Options for encoding the page as UTF-8.
pub const EncodeUtf8Options = struct {
/// The range of rows to encode. If end_y is null, then it will
/// encode to the end of the page.
start_y: size.CellCountInt = 0,
end_y: ?size.CellCountInt = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
/// Preceding state from encoding the prior page. Used to preserve
/// blanks properly across multiple pages.
preceding: TrailingUtf8State = .{},
/// Trailing state for UTF-8 encoding.
pub const TrailingUtf8State = struct {
rows: usize = 0,
cells: usize = 0,
};
};
/// Encode the page contents as UTF-8.
///
/// If preceding is non-null, then it will be used to initialize our
/// blank rows/cells count so that we can accumulate blanks across
/// multiple pages.
///
/// Note: The tests for this function are done via Screen.dumpString
/// tests since that function is a thin wrapper around this one and
/// it makes it easier to test input contents.
pub fn encodeUtf8(
self: *const Page,
writer: anytype,
opts: EncodeUtf8Options,
) anyerror!EncodeUtf8Options.TrailingUtf8State {
var blank_rows: usize = opts.preceding.rows;
var blank_cells: usize = opts.preceding.cells;
const start_y: size.CellCountInt = opts.start_y;
const end_y: size.CellCountInt = opts.end_y orelse self.size.rows;
for (start_y..end_y) |y| {
const row: *Row = self.getRow(y);
const cells: []const Cell = self.getCells(row);
// If this row is blank, accumulate to avoid a bunch of extra
// work later. If it isn't blank, make sure we dump all our
// blanks.
if (!Cell.hasTextAny(cells)) {
blank_rows += 1;
continue;
}
for (0..blank_rows) |_| try writer.writeByte('\n');
blank_rows = 0;
// If we're not wrapped, we always add a newline so after
// the row is printed we can add a newline.
if (!row.wrap or !opts.unwrap) blank_rows += 1;
// If the row doesn't continue a wrap then we need to reset
// our blank cell count.
if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0;
// Go through each cell and print it
for (cells) |*cell| {
// Skip spacers
switch (cell.wide) {
.narrow, .wide => {},
.spacer_head, .spacer_tail => continue,
}
// If we have a zero value, then we accumulate a counter. We
// only want to turn zero values into spaces if we have a non-zero
// char sometime later.
if (!cell.hasText()) {
blank_cells += 1;
continue;
}
if (blank_cells > 0) {
try writer.writeByteNTimes(' ', blank_cells);
blank_cells = 0;
}
switch (cell.content_tag) {
.codepoint => {
try writer.print("{u}", .{cell.content.codepoint});
},
.codepoint_grapheme => {
try writer.print("{u}", .{cell.content.codepoint});
for (self.lookupGrapheme(cell).?) |cp| {
try writer.print("{u}", .{cp});
}
},
// Unreachable since we do hasText() above
.bg_color_palette,
.bg_color_rgb,
=> unreachable,
}
}
}
return .{ .rows = blank_rows, .cells = blank_cells };
}
/// Returns the bitset for the dirty bits on this page.
///
/// The returned value is a DynamicBitSetUnmanaged but it is NOT