From bcefbfd7b42dcb6389c738e0ebad95b0fc3c16cc Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 9 Nov 2024 09:37:03 -0800 Subject: [PATCH 01/18] terminal: move UTF8 encoding to Page and wrap around it --- src/terminal/PageList.zig | 38 ++++++++++++++ src/terminal/Screen.zig | 86 ++----------------------------- src/terminal/page.zig | 106 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 83 deletions(-) diff --git a/src/terminal/PageList.zig b/src/terminal/PageList.zig index 01e7ed71d..175e3f64f 100644 --- a/src/terminal/PageList.zig +++ b/src/terminal/PageList.zig @@ -2544,6 +2544,44 @@ pub fn getCell(self: *const PageList, pt: point.Point) ?Cell { }; } +pub const EncodeUtf8Options = struct { + /// The start and end points of the dump, both inclusive. The x will + /// be ignored and the full row will always be dumped. + tl: Pin, + br: ?Pin = null, + + /// If true, this will unwrap soft-wrapped lines. If false, this will + /// dump the screen as it is visually seen in a rendered window. + unwrap: bool = true, +}; + +/// Encode the pagelist to utf8 to the given writer. +/// +/// The writer should be buffered; this function does not attempt to +/// efficiently write and often writes one byte at a time. +/// +/// Note: this is tested using Screen.dumpString. This is a function that +/// predates this and is a thin wrapper around it so the tests all live there. +pub fn encodeUtf8( + self: *const PageList, + writer: anytype, + opts: EncodeUtf8Options, +) anyerror!void { + // We don't currently use self at all. There is an argument that this + // function should live on Pin instead but there is some future we might + // need state on here so... letting it go. + _ = self; + + var page_opts: Page.EncodeUtf8Options = .{ .unwrap = opts.unwrap }; + var iter = opts.tl.pageIterator(.right_down, opts.br); + while (iter.next()) |chunk| { + const page: *const Page = &chunk.node.data; + page_opts.start_y = chunk.start; + page_opts.end_y = chunk.end; + page_opts.preceding = try page.encodeUtf8(writer, page_opts); + } +} + /// Log a debug diagram of the page list to the provided writer. /// /// EXAMPLE: diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index d8787487f..bf63e7e05 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -2731,95 +2731,15 @@ pub fn promptPath( return .{ .x = to_x - from_x, .y = to_y - from_y }; } -pub const DumpString = struct { - /// The start and end points of the dump, both inclusive. The x will - /// be ignored and the full row will always be dumped. - tl: Pin, - br: ?Pin = null, - - /// If true, this will unwrap soft-wrapped lines. If false, this will - /// dump the screen as it is visually seen in a rendered window. - unwrap: bool = true, -}; - /// Dump the screen to a string. The writer given should be buffered; /// this function does not attempt to efficiently write and generally writes /// one byte at a time. pub fn dumpString( self: *const Screen, writer: anytype, - opts: DumpString, -) !void { - var blank_rows: usize = 0; - var blank_cells: usize = 0; - - var iter = opts.tl.rowIterator(.right_down, opts.br); - while (iter.next()) |row_offset| { - const rac = row_offset.rowAndCell(); - const row = rac.row; - const cells = cells: { - const cells: [*]pagepkg.Cell = @ptrCast(rac.cell); - break :cells cells[0..self.pages.cols]; - }; - - if (!pagepkg.Cell.hasTextAny(cells)) { - blank_rows += 1; - continue; - } - if (blank_rows > 0) { - for (0..blank_rows) |_| try writer.writeByte('\n'); - blank_rows = 0; - } - - if (!row.wrap or !opts.unwrap) { - // If we're not wrapped, we always add a newline. - // If we are wrapped, we only add a new line if we're unwrapping - // soft-wrapped lines. - blank_rows += 1; - } - - if (!row.wrap_continuation or !opts.unwrap) { - // We should also reset blank cell counts at the start of each row - // unless we're unwrapping and this row is a wrap continuation. - blank_cells = 0; - } - - for (cells) |*cell| { - // Skip spacers - switch (cell.wide) { - .narrow, .wide => {}, - .spacer_head, .spacer_tail => continue, - } - - // If we have a zero value, then we accumulate a counter. We - // only want to turn zero values into spaces if we have a non-zero - // char sometime later. - if (!cell.hasText()) { - blank_cells += 1; - continue; - } - if (blank_cells > 0) { - try writer.writeByteNTimes(' ', blank_cells); - blank_cells = 0; - } - - switch (cell.content_tag) { - .codepoint => { - try writer.print("{u}", .{cell.content.codepoint}); - }, - - .codepoint_grapheme => { - try writer.print("{u}", .{cell.content.codepoint}); - const cps = row_offset.node.data.lookupGrapheme(cell).?; - for (cps) |cp| { - try writer.print("{u}", .{cp}); - } - }, - - else => unreachable, - } - } - } + opts: PageList.EncodeUtf8Options, +) anyerror!void { + try self.pages.encodeUtf8(writer, opts); } /// You should use dumpString, this is a restricted version mostly for diff --git a/src/terminal/page.zig b/src/terminal/page.zig index 8c470d726..d41f37e8d 100644 --- a/src/terminal/page.zig +++ b/src/terminal/page.zig @@ -1481,6 +1481,112 @@ pub const Page = struct { return self.grapheme_map.map(self.memory).capacity(); } + /// Options for encoding the page as UTF-8. + pub const EncodeUtf8Options = struct { + /// The range of rows to encode. If end_y is null, then it will + /// encode to the end of the page. + start_y: size.CellCountInt = 0, + end_y: ?size.CellCountInt = null, + + /// If true, this will unwrap soft-wrapped lines. If false, this will + /// dump the screen as it is visually seen in a rendered window. + unwrap: bool = true, + + /// Preceding state from encoding the prior page. Used to preserve + /// blanks properly across multiple pages. + preceding: TrailingUtf8State = .{}, + + /// Trailing state for UTF-8 encoding. + pub const TrailingUtf8State = struct { + rows: usize = 0, + cells: usize = 0, + }; + }; + + /// Encode the page contents as UTF-8. + /// + /// If preceding is non-null, then it will be used to initialize our + /// blank rows/cells count so that we can accumulate blanks across + /// multiple pages. + /// + /// Note: The tests for this function are done via Screen.dumpString + /// tests since that function is a thin wrapper around this one and + /// it makes it easier to test input contents. + pub fn encodeUtf8( + self: *const Page, + writer: anytype, + opts: EncodeUtf8Options, + ) anyerror!EncodeUtf8Options.TrailingUtf8State { + var blank_rows: usize = opts.preceding.rows; + var blank_cells: usize = opts.preceding.cells; + + const start_y: size.CellCountInt = opts.start_y; + const end_y: size.CellCountInt = opts.end_y orelse self.size.rows; + for (start_y..end_y) |y| { + const row: *Row = self.getRow(y); + const cells: []const Cell = self.getCells(row); + + // If this row is blank, accumulate to avoid a bunch of extra + // work later. If it isn't blank, make sure we dump all our + // blanks. + if (!Cell.hasTextAny(cells)) { + blank_rows += 1; + continue; + } + for (0..blank_rows) |_| try writer.writeByte('\n'); + blank_rows = 0; + + // If we're not wrapped, we always add a newline so after + // the row is printed we can add a newline. + if (!row.wrap or !opts.unwrap) blank_rows += 1; + + // If the row doesn't continue a wrap then we need to reset + // our blank cell count. + if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0; + + // Go through each cell and print it + for (cells) |*cell| { + // Skip spacers + switch (cell.wide) { + .narrow, .wide => {}, + .spacer_head, .spacer_tail => continue, + } + + // If we have a zero value, then we accumulate a counter. We + // only want to turn zero values into spaces if we have a non-zero + // char sometime later. + if (!cell.hasText()) { + blank_cells += 1; + continue; + } + if (blank_cells > 0) { + try writer.writeByteNTimes(' ', blank_cells); + blank_cells = 0; + } + + switch (cell.content_tag) { + .codepoint => { + try writer.print("{u}", .{cell.content.codepoint}); + }, + + .codepoint_grapheme => { + try writer.print("{u}", .{cell.content.codepoint}); + for (self.lookupGrapheme(cell).?) |cp| { + try writer.print("{u}", .{cp}); + } + }, + + // Unreachable since we do hasText() above + .bg_color_palette, + .bg_color_rgb, + => unreachable, + } + } + } + + return .{ .rows = blank_rows, .cells = blank_cells }; + } + /// Returns the bitset for the dirty bits on this page. /// /// The returned value is a DynamicBitSetUnmanaged but it is NOT From 204e4f86634451422e4ba3a6e3d0f1f855af480d Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 9 Nov 2024 09:37:03 -0800 Subject: [PATCH 02/18] terminal: support cell_map for encodeUtf8 --- src/terminal/PageList.zig | 8 +++- src/terminal/Screen.zig | 78 +++++++++++++++++++++++++++++++++++++++ src/terminal/page.zig | 75 +++++++++++++++++++++++++++++++++++-- 3 files changed, 156 insertions(+), 5 deletions(-) diff --git a/src/terminal/PageList.zig b/src/terminal/PageList.zig index 175e3f64f..f8afc801a 100644 --- a/src/terminal/PageList.zig +++ b/src/terminal/PageList.zig @@ -2553,6 +2553,9 @@ pub const EncodeUtf8Options = struct { /// If true, this will unwrap soft-wrapped lines. If false, this will /// dump the screen as it is visually seen in a rendered window. unwrap: bool = true, + + /// See Page.EncodeUtf8Options. + cell_map: ?*Page.CellMap = null, }; /// Encode the pagelist to utf8 to the given writer. @@ -2572,7 +2575,10 @@ pub fn encodeUtf8( // need state on here so... letting it go. _ = self; - var page_opts: Page.EncodeUtf8Options = .{ .unwrap = opts.unwrap }; + var page_opts: Page.EncodeUtf8Options = .{ + .unwrap = opts.unwrap, + .cell_map = opts.cell_map, + }; var iter = opts.tl.pageIterator(.right_down, opts.br); while (iter.next()) |chunk| { const page: *const Page = &chunk.node.data; diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index bf63e7e05..ac9483742 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -8468,3 +8468,81 @@ test "Screen: adjustCapacity cursor style ref count" { ); } } + +test "Screen UTF8 cell map with newlines" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("A\n\nB\n\nC"); + + var cell_map = Page.CellMap.init(alloc); + defer cell_map.deinit(); + var builder = std.ArrayList(u8).init(alloc); + defer builder.deinit(); + try s.dumpString(builder.writer(), .{ + .tl = s.pages.getTopLeft(.screen), + .br = s.pages.getBottomRight(.screen), + .cell_map = &cell_map, + }); + + try testing.expectEqual(7, builder.items.len); + try testing.expectEqualStrings("A\n\nB\n\nC", builder.items); + try testing.expectEqual(builder.items.len, cell_map.items.len); + try testing.expectEqual(Page.CellMapEntry{ + .x = 0, + .y = 0, + }, cell_map.items[0]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 1, + .y = 0, + }, cell_map.items[1]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 0, + .y = 1, + }, cell_map.items[2]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 0, + .y = 2, + }, cell_map.items[3]); +} + +test "Screen UTF8 cell map with blank prefix" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + s.cursorAbsolute(2, 1); + try s.testWriteString("B"); + + var cell_map = Page.CellMap.init(alloc); + defer cell_map.deinit(); + var builder = std.ArrayList(u8).init(alloc); + defer builder.deinit(); + try s.dumpString(builder.writer(), .{ + .tl = s.pages.getTopLeft(.screen), + .br = s.pages.getBottomRight(.screen), + .cell_map = &cell_map, + }); + + try testing.expectEqualStrings("\n B", builder.items); + try testing.expectEqual(builder.items.len, cell_map.items.len); + try testing.expectEqual(Page.CellMapEntry{ + .x = 0, + .y = 0, + }, cell_map.items[0]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 0, + .y = 1, + }, cell_map.items[1]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 1, + .y = 1, + }, cell_map.items[2]); + try testing.expectEqual(Page.CellMapEntry{ + .x = 2, + .y = 1, + }, cell_map.items[3]); +} diff --git a/src/terminal/page.zig b/src/terminal/page.zig index d41f37e8d..83164e163 100644 --- a/src/terminal/page.zig +++ b/src/terminal/page.zig @@ -1496,6 +1496,13 @@ pub const Page = struct { /// blanks properly across multiple pages. preceding: TrailingUtf8State = .{}, + /// If non-null, this will be cleared and filled with the x/y + /// coordinates of each byte in the UTF-8 encoded output. + /// The index in the array is the byte offset in the output + /// where 0 is the cursor of the writer when the function is + /// called. + cell_map: ?*CellMap = null, + /// Trailing state for UTF-8 encoding. pub const TrailingUtf8State = struct { rows: usize = 0, @@ -1503,13 +1510,22 @@ pub const Page = struct { }; }; + /// See cell_map + pub const CellMap = std.ArrayList(CellMapEntry); + + /// The x/y coordinate of a single cell in the cell map. + pub const CellMapEntry = struct { + y: size.CellCountInt, + x: size.CellCountInt, + }; + /// Encode the page contents as UTF-8. /// /// If preceding is non-null, then it will be used to initialize our /// blank rows/cells count so that we can accumulate blanks across /// multiple pages. /// - /// Note: The tests for this function are done via Screen.dumpString + /// Note: Many tests for this function are done via Screen.dumpString /// tests since that function is a thin wrapper around this one and /// it makes it easier to test input contents. pub fn encodeUtf8( @@ -1522,7 +1538,18 @@ pub const Page = struct { const start_y: size.CellCountInt = opts.start_y; const end_y: size.CellCountInt = opts.end_y orelse self.size.rows; - for (start_y..end_y) |y| { + + // We can probably avoid this by doing the logic below in a different + // way. The reason this exists is so that when we end a non-blank + // line with a newline, we can correctly map the cell map over to + // the correct x value. + // + // For example "A\nB". The cell map for "\n" should be (1, 0). + // This is tested in Screen.zig so feel free to refactor this. + var last_x: size.CellCountInt = 0; + + for (start_y..end_y) |y_usize| { + const y: size.CellCountInt = @intCast(y_usize); const row: *Row = self.getRow(y); const cells: []const Cell = self.getCells(row); @@ -1533,7 +1560,19 @@ pub const Page = struct { blank_rows += 1; continue; } - for (0..blank_rows) |_| try writer.writeByte('\n'); + for (1..blank_rows + 1) |i| { + try writer.writeByte('\n'); + + // This is tested in Screen.zig, i.e. one test is + // "cell map with newlines" + if (opts.cell_map) |cell_map| { + try cell_map.append(.{ + .x = last_x, + .y = @intCast(y - blank_rows + i - 1), + }); + last_x = 0; + } + } blank_rows = 0; // If we're not wrapped, we always add a newline so after @@ -1545,7 +1584,9 @@ pub const Page = struct { if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0; // Go through each cell and print it - for (cells) |*cell| { + for (cells, 0..) |*cell, x_usize| { + const x: size.CellCountInt = @intCast(x_usize); + // Skip spacers switch (cell.wide) { .narrow, .wide => {}, @@ -1561,18 +1602,44 @@ pub const Page = struct { } if (blank_cells > 0) { try writer.writeByteNTimes(' ', blank_cells); + if (opts.cell_map) |cell_map| { + for (0..blank_cells) |i| try cell_map.append(.{ + .x = @intCast(x - blank_cells + i), + .y = y, + }); + } + blank_cells = 0; } switch (cell.content_tag) { .codepoint => { try writer.print("{u}", .{cell.content.codepoint}); + if (opts.cell_map) |cell_map| { + last_x = x + 1; + try cell_map.append(.{ + .x = x, + .y = y, + }); + } }, .codepoint_grapheme => { try writer.print("{u}", .{cell.content.codepoint}); + if (opts.cell_map) |cell_map| { + last_x = x + 1; + try cell_map.append(.{ + .x = x, + .y = y, + }); + } + for (self.lookupGrapheme(cell).?) |cp| { try writer.print("{u}", .{cp}); + if (opts.cell_map) |cell_map| try cell_map.append(.{ + .x = x, + .y = y, + }); } }, From 61c5fb81150a7924e1cba399dbe526a8ef254285 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 9 Nov 2024 09:37:03 -0800 Subject: [PATCH 03/18] terminal: single pagelist node search --- src/terminal/main.zig | 1 + src/terminal/search.zig | 148 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/terminal/search.zig diff --git a/src/terminal/main.zig b/src/terminal/main.zig index 3fc7d2600..df3788d30 100644 --- a/src/terminal/main.zig +++ b/src/terminal/main.zig @@ -18,6 +18,7 @@ pub const kitty = @import("kitty.zig"); pub const modes = @import("modes.zig"); pub const page = @import("page.zig"); pub const parse_table = @import("parse_table.zig"); +pub const search = @import("search.zig"); pub const size = @import("size.zig"); pub const tmux = @import("tmux.zig"); pub const x11_color = @import("x11_color.zig"); diff --git a/src/terminal/search.zig b/src/terminal/search.zig new file mode 100644 index 000000000..96a7b56a7 --- /dev/null +++ b/src/terminal/search.zig @@ -0,0 +1,148 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const terminal = @import("main.zig"); +const point = terminal.point; +const Page = terminal.Page; +const PageList = terminal.PageList; +const Selection = terminal.Selection; +const Screen = terminal.Screen; + +pub const PageSearch = struct { + alloc: Allocator, + node: *PageList.List.Node, + needle: []const u8, + cell_map: Page.CellMap, + encoded: std.ArrayListUnmanaged(u8) = .{}, + i: usize = 0, + + pub fn init( + alloc: Allocator, + node: *PageList.List.Node, + needle: []const u8, + ) !PageSearch { + var result: PageSearch = .{ + .alloc = alloc, + .node = node, + .needle = needle, + .cell_map = Page.CellMap.init(alloc), + }; + + const page: *const Page = &node.data; + _ = try page.encodeUtf8(result.encoded.writer(alloc), .{ + .cell_map = &result.cell_map, + }); + + return result; + } + + pub fn deinit(self: *PageSearch) void { + self.encoded.deinit(self.alloc); + self.cell_map.deinit(); + } + + pub fn next(self: *PageSearch) ?Selection { + // Search our haystack for the needle. The resulting index is + // the offset from self.i not the absolute index. + const haystack: []const u8 = self.encoded.items[self.i..]; + const i_offset = std.mem.indexOf(u8, haystack, self.needle) orelse { + self.i = self.encoded.items.len; + return null; + }; + + // Get our full index into the encoded buffer. + const idx = self.i + i_offset; + + // We found our search term. Move the cursor forward one beyond + // the match. This lets us find every repeated match. + self.i = idx + 1; + + const tl: PageList.Pin = tl: { + const map = self.cell_map.items[idx]; + break :tl .{ + .node = self.node, + .y = map.y, + .x = map.x, + }; + }; + const br: PageList.Pin = br: { + const map = self.cell_map.items[idx + self.needle.len - 1]; + break :br .{ + .node = self.node, + .y = map.y, + .x = map.x, + }; + }; + + return Selection.init(tl, br, false); + } +}; + +test "search single page one match" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello, world"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + + var it = try PageSearch.init(alloc, node, "world"); + defer it.deinit(); + + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 11, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + + try testing.expect(it.next() == null); +} + +test "search single page multiple match" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + + var it = try PageSearch.init(alloc, node, "boo!"); + defer it.deinit(); + + { + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + + try testing.expect(it.next() == null); +} From eaddb695009e94f6c28fec22b63720506cc7ed4c Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 27 Nov 2024 09:48:26 -0800 Subject: [PATCH 04/18] datastruct: CircBuf can be initialized empty --- src/datastruct/circ_buf.zig | 53 +++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/src/datastruct/circ_buf.zig b/src/datastruct/circ_buf.zig index ccee41801..c0c658447 100644 --- a/src/datastruct/circ_buf.zig +++ b/src/datastruct/circ_buf.zig @@ -48,7 +48,7 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { }; /// Initialize a new circular buffer that can store size elements. - pub fn init(alloc: Allocator, size: usize) !Self { + pub fn init(alloc: Allocator, size: usize) Allocator.Error!Self { const buf = try alloc.alloc(T, size); @memset(buf, default); @@ -56,7 +56,7 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { .storage = buf, .head = 0, .tail = 0, - .full = false, + .full = size == 0, }; } @@ -67,7 +67,7 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { /// Append a single value to the buffer. If the buffer is full, /// an error will be returned. - pub fn append(self: *Self, v: T) !void { + pub fn append(self: *Self, v: T) Allocator.Error!void { if (self.full) return error.OutOfMemory; self.storage[self.head] = v; self.head += 1; @@ -256,7 +256,7 @@ test { try testing.expectEqual(@as(usize, 0), buf.len()); } -test "append" { +test "CircBuf append" { const testing = std.testing; const alloc = testing.allocator; @@ -273,7 +273,7 @@ test "append" { try testing.expectError(error.OutOfMemory, buf.append(5)); } -test "forward iterator" { +test "CircBuf forward iterator" { const testing = std.testing; const alloc = testing.allocator; @@ -319,7 +319,7 @@ test "forward iterator" { } } -test "reverse iterator" { +test "CircBuf reverse iterator" { const testing = std.testing; const alloc = testing.allocator; @@ -365,7 +365,7 @@ test "reverse iterator" { } } -test "getPtrSlice fits" { +test "CircBuf getPtrSlice fits" { const testing = std.testing; const alloc = testing.allocator; @@ -379,7 +379,7 @@ test "getPtrSlice fits" { try testing.expectEqual(@as(usize, 11), buf.len()); } -test "getPtrSlice wraps" { +test "CircBuf getPtrSlice wraps" { const testing = std.testing; const alloc = testing.allocator; @@ -435,7 +435,7 @@ test "getPtrSlice wraps" { } } -test "rotateToZero" { +test "CircBuf rotateToZero" { const testing = std.testing; const alloc = testing.allocator; @@ -447,7 +447,7 @@ test "rotateToZero" { try buf.rotateToZero(alloc); } -test "rotateToZero offset" { +test "CircBuf rotateToZero offset" { const testing = std.testing; const alloc = testing.allocator; @@ -471,7 +471,7 @@ test "rotateToZero offset" { try testing.expectEqual(@as(usize, 1), buf.head); } -test "rotateToZero wraps" { +test "CircBuf rotateToZero wraps" { const testing = std.testing; const alloc = testing.allocator; @@ -511,7 +511,7 @@ test "rotateToZero wraps" { } } -test "rotateToZero full no wrap" { +test "CircBuf rotateToZero full no wrap" { const testing = std.testing; const alloc = testing.allocator; @@ -549,7 +549,32 @@ test "rotateToZero full no wrap" { } } -test "resize grow" { +test "CircBuf resize grow from zero" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 0); + defer buf.deinit(alloc); + try testing.expect(buf.full); + + // Resize + try buf.resize(alloc, 2); + try testing.expect(!buf.full); + try testing.expectEqual(@as(usize, 0), buf.len()); + try testing.expectEqual(@as(usize, 2), buf.capacity()); + + try buf.append(1); + try buf.append(2); + + { + const slices = buf.getPtrSlice(0, 2); + try testing.expectEqual(@as(u8, 1), slices[0][0]); + try testing.expectEqual(@as(u8, 2), slices[0][1]); + } +} + +test "CircBuf resize grow" { const testing = std.testing; const alloc = testing.allocator; @@ -582,7 +607,7 @@ test "resize grow" { } } -test "resize shrink" { +test "CircBuf resize shrink" { const testing = std.testing; const alloc = testing.allocator; From 8abbd80e06c9fb07fc265cb54214c10b7d4b5eb8 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 27 Nov 2024 09:48:26 -0800 Subject: [PATCH 05/18] CircBuf: add ensureUnusedCapacity, appendSlice --- src/datastruct/circ_buf.zig | 129 ++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/src/datastruct/circ_buf.zig b/src/datastruct/circ_buf.zig index c0c658447..e6378c855 100644 --- a/src/datastruct/circ_buf.zig +++ b/src/datastruct/circ_buf.zig @@ -75,6 +75,19 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { self.full = self.head == self.tail; } + /// Append a slice to the buffer. If the buffer cannot fit the + /// entire slice then an error will be returned. It is up to the + /// caller to rotate the circular buffer if they want to overwrite + /// the oldest data. + pub fn appendSlice( + self: *Self, + slice: []const T, + ) Allocator.Error!void { + const storage = self.getPtrSlice(self.len(), slice.len); + fastmem.copy(T, storage[0], slice[0..storage[0].len]); + fastmem.copy(T, storage[1], slice[storage[0].len..]); + } + /// Clear the buffer. pub fn clear(self: *Self) void { self.head = 0; @@ -91,6 +104,34 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { }; } + /// Get the first (oldest) value in the buffer. + pub fn first(self: Self) ?*T { + // Note: this can be more efficient by not using the + // iterator, but this was an easy way to implement it. + var it = self.iterator(.forward); + return it.next(); + } + + /// Get the last (newest) value in the buffer. + pub fn last(self: Self) ?*T { + // Note: this can be more efficient by not using the + // iterator, but this was an easy way to implement it. + var it = self.iterator(.reverse); + return it.next(); + } + + /// Ensures that there is enough capacity to store amount more + /// items via append. + pub fn ensureUnusedCapacity( + self: *Self, + alloc: Allocator, + amount: usize, + ) Allocator.Error!void { + const new_cap = self.len() + amount; + if (new_cap <= self.capacity()) return; + try self.resize(alloc, new_cap); + } + /// Resize the buffer to the given size (larger or smaller). /// If larger, new values will be set to the default value. pub fn resize(self: *Self, alloc: Allocator, size: usize) Allocator.Error!void { @@ -365,6 +406,94 @@ test "CircBuf reverse iterator" { } } +test "CircBuf first/last" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 3); + defer buf.deinit(alloc); + + try buf.append(1); + try buf.append(2); + try buf.append(3); + try testing.expectEqual(3, buf.last().?.*); + try testing.expectEqual(1, buf.first().?.*); +} + +test "CircBuf first/last empty" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 0); + defer buf.deinit(alloc); + + try testing.expect(buf.first() == null); + try testing.expect(buf.last() == null); +} + +test "CircBuf first/last empty with cap" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 3); + defer buf.deinit(alloc); + + try testing.expect(buf.first() == null); + try testing.expect(buf.last() == null); +} + +test "CircBuf append slice" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 5); + defer buf.deinit(alloc); + + try buf.appendSlice("hello"); + { + var it = buf.iterator(.forward); + try testing.expect(it.next().?.* == 'h'); + try testing.expect(it.next().?.* == 'e'); + try testing.expect(it.next().?.* == 'l'); + try testing.expect(it.next().?.* == 'l'); + try testing.expect(it.next().?.* == 'o'); + try testing.expect(it.next() == null); + } +} + +test "CircBuf append slice with wrap" { + const testing = std.testing; + const alloc = testing.allocator; + + const Buf = CircBuf(u8, 0); + var buf = try Buf.init(alloc, 4); + defer buf.deinit(alloc); + + // Fill the buffer + _ = buf.getPtrSlice(0, buf.capacity()); + try testing.expect(buf.full); + try testing.expectEqual(@as(usize, 4), buf.len()); + + // Delete + buf.deleteOldest(2); + try testing.expect(!buf.full); + try testing.expectEqual(@as(usize, 2), buf.len()); + + try buf.appendSlice("AB"); + { + var it = buf.iterator(.forward); + try testing.expect(it.next().?.* == 0); + try testing.expect(it.next().?.* == 0); + try testing.expect(it.next().?.* == 'A'); + try testing.expect(it.next().?.* == 'B'); + try testing.expect(it.next() == null); + } +} + test "CircBuf getPtrSlice fits" { const testing = std.testing; const alloc = testing.allocator; From 2a13c6b6a35a1689d3642aeaf123485dc0a0e66c Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 27 Nov 2024 09:48:26 -0800 Subject: [PATCH 06/18] terminal: working on a pagelist sliding window for search --- src/terminal/search.zig | 206 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 96a7b56a7..f35249ab2 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const CircBuf = @import("../datastruct/main.zig").CircBuf; const terminal = @import("main.zig"); const point = terminal.point; const Page = terminal.Page; @@ -8,6 +9,211 @@ const PageList = terminal.PageList; const Selection = terminal.Selection; const Screen = terminal.Screen; +pub const PageListSearch = struct { + alloc: Allocator, + + /// The list we're searching. + list: *PageList, + + /// The search term we're searching for. + needle: []const u8, + + /// The window is our sliding window of pages that we're searching so + /// we can handle boundary cases where a needle is partially on the end + /// of one page and the beginning of the next. + /// + /// Note that we're not guaranteed to straddle exactly two pages. If + /// the needle is large enough and/or the pages are small enough then + /// the needle can straddle N pages. Additionally, pages aren't guaranteed + /// to be equal size so we can't precompute the window size. + window: SlidingWindow, + + pub fn init( + alloc: Allocator, + list: *PageList, + needle: []const u8, + ) !PageListSearch { + var window = try CircBuf.init(alloc, 0); + errdefer window.deinit(); + + return .{ + .alloc = alloc, + .list = list, + .current = list.pages.first, + .needle = needle, + .window = window, + }; + } + + pub fn deinit(self: *PageListSearch) void { + _ = self; + + // TODO: deinit window + } +}; + +/// The sliding window of the pages we're searching. The window is always +/// big enough so that the needle can fit in it. +const SlidingWindow = struct { + /// The data buffer is a circular buffer of u8 that contains the + /// encoded page text that we can use to search for the needle. + data: DataBuf, + + /// The meta buffer is a circular buffer that contains the metadata + /// about the pages we're searching. This usually isn't that large + /// so callers must iterate through it to find the offset to map + /// data to meta. + meta: MetaBuf, + + const DataBuf = CircBuf(u8, 0); + const MetaBuf = CircBuf(Meta, undefined); + const Meta = struct { + node: *PageList.List.Node, + cell_map: Page.CellMap, + + pub fn deinit(self: *Meta) void { + self.cell_map.deinit(); + } + }; + + pub fn initEmpty(alloc: Allocator) Allocator.Error!SlidingWindow { + var data = try DataBuf.init(alloc, 0); + errdefer data.deinit(alloc); + + var meta = try MetaBuf.init(alloc, 0); + errdefer meta.deinit(alloc); + + return .{ + .data = data, + .meta = meta, + }; + } + + pub fn deinit(self: *SlidingWindow, alloc: Allocator) void { + self.data.deinit(alloc); + + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(); + self.meta.deinit(alloc); + } + + /// Add a new node to the sliding window. + /// + /// The window will prune itself if it can while always maintaining + /// the invariant that the `fixed_size` always fits within the window. + /// + /// Note it is possible for the window to be smaller than `fixed_size` + /// if not enough nodes have been added yet or the screen is just + /// smaller than the needle. + pub fn append( + self: *SlidingWindow, + alloc: Allocator, + node: *PageList.List.Node, + required_size: usize, + ) Allocator.Error!void { + // Initialize our metadata for the node. + var meta: Meta = .{ + .node = node, + .cell_map = Page.CellMap.init(alloc), + }; + errdefer meta.deinit(); + + // This is suboptimal but we need to encode the page once to + // temporary memory, and then copy it into our circular buffer. + // In the future, we should benchmark and see if we can encode + // directly into the circular buffer. + var encoded: std.ArrayListUnmanaged(u8) = .{}; + defer encoded.deinit(alloc); + + // Encode the page into the buffer. + const page: *const Page = &meta.node.data; + _ = page.encodeUtf8( + encoded.writer(alloc), + .{ .cell_map = &meta.cell_map }, + ) catch { + // writer uses anyerror but the only realistic error on + // an ArrayList is out of memory. + return error.OutOfMemory; + }; + assert(meta.cell_map.items.len == encoded.items.len); + + // Now that we know our buffer length, we can consider if we can + // prune our circular buffer or if we need to grow it. + prune: { + // Our buffer size after adding the new node. + const before_size: usize = self.data.len() + encoded.items.len; + + // Prune as long as removing the first (oldest) node retains + // our required size invariant. + var after_size: usize = before_size; + while (self.meta.first()) |oldest_meta| { + const new_size = after_size - oldest_meta.cell_map.items.len; + if (new_size < required_size) break :prune; + + // We can prune this node and retain our invariant. + // Update our new size, deinitialize the memory, and + // remove from the circular buffer. + after_size = new_size; + oldest_meta.deinit(); + self.meta.deleteOldest(1); + } + assert(after_size <= before_size); + + // If we didn't prune anything then we're done. + if (after_size == before_size) break :prune; + + // We need to prune our data buffer as well. + self.data.deleteOldest(before_size - after_size); + } + + // Ensure our buffers are big enough to store what we need. + try self.data.ensureUnusedCapacity(alloc, encoded.items.len); + try self.meta.ensureUnusedCapacity(alloc, 1); + + // Append our new node to the circular buffer. + try self.data.appendSlice(encoded.items); + try self.meta.append(meta); + + // Integrity check: verify our data matches our metadata exactly. + if (comptime std.debug.runtime_safety) { + var meta_it = self.meta.iterator(.forward); + var data_len: usize = 0; + while (meta_it.next()) |m| data_len += m.cell_map.items.len; + assert(data_len == self.data.len()); + } + } +}; + +test "SlidingWindow empty on init" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + try testing.expectEqual(0, w.data.len()); + try testing.expectEqual(0, w.meta.len()); +} + +test "SlidingWindow single append" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // Imaginary needle for search + const needle = "boo!"; + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node, needle.len); +} + pub const PageSearch = struct { alloc: Allocator, node: *PageList.List.Node, From 6ed298c9c1a46689440d4db2d690d0d7618bd156 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 3 Dec 2024 08:04:36 -0500 Subject: [PATCH 07/18] terminal: sliding window search starts working --- ' | 555 ++++++++++++++++++++++++++++++++++++ src/datastruct/circ_buf.zig | 11 + src/terminal/search.zig | 136 +++++++++ 3 files changed, 702 insertions(+) create mode 100644 ' diff --git a/' b/' new file mode 100644 index 000000000..0b79f1879 --- /dev/null +++ b/' @@ -0,0 +1,555 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const CircBuf = @import("../datastruct/main.zig").CircBuf; +const terminal = @import("main.zig"); +const point = terminal.point; +const Page = terminal.Page; +const PageList = terminal.PageList; +const Pin = PageList.Pin; +const Selection = terminal.Selection; +const Screen = terminal.Screen; + +pub const PageListSearch = struct { + alloc: Allocator, + + /// The list we're searching. + list: *PageList, + + /// The search term we're searching for. + needle: []const u8, + + /// The window is our sliding window of pages that we're searching so + /// we can handle boundary cases where a needle is partially on the end + /// of one page and the beginning of the next. + /// + /// Note that we're not guaranteed to straddle exactly two pages. If + /// the needle is large enough and/or the pages are small enough then + /// the needle can straddle N pages. Additionally, pages aren't guaranteed + /// to be equal size so we can't precompute the window size. + window: SlidingWindow, + + pub fn init( + alloc: Allocator, + list: *PageList, + needle: []const u8, + ) !PageListSearch { + var window = try CircBuf.init(alloc, 0); + errdefer window.deinit(); + + return .{ + .alloc = alloc, + .list = list, + .current = list.pages.first, + .needle = needle, + .window = window, + }; + } + + pub fn deinit(self: *PageListSearch) void { + _ = self; + + // TODO: deinit window + } +}; + +/// The sliding window of the pages we're searching. The window is always +/// big enough so that the needle can fit in it. +const SlidingWindow = struct { + /// The data buffer is a circular buffer of u8 that contains the + /// encoded page text that we can use to search for the needle. + data: DataBuf, + + /// The meta buffer is a circular buffer that contains the metadata + /// about the pages we're searching. This usually isn't that large + /// so callers must iterate through it to find the offset to map + /// data to meta. + meta: MetaBuf, + + /// The cursor into the data buffer for our current search. + i: usize = 0, + + const DataBuf = CircBuf(u8, 0); + const MetaBuf = CircBuf(Meta, undefined); + const Meta = struct { + node: *PageList.List.Node, + cell_map: Page.CellMap, + + pub fn deinit(self: *Meta) void { + self.cell_map.deinit(); + } + }; + + pub fn initEmpty(alloc: Allocator) Allocator.Error!SlidingWindow { + var data = try DataBuf.init(alloc, 0); + errdefer data.deinit(alloc); + + var meta = try MetaBuf.init(alloc, 0); + errdefer meta.deinit(alloc); + + return .{ + .data = data, + .meta = meta, + }; + } + + pub fn deinit(self: *SlidingWindow, alloc: Allocator) void { + self.data.deinit(alloc); + + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(); + self.meta.deinit(alloc); + } + + /// Search the window for the next occurrence of the needle. + pub fn next(self: *SlidingWindow, needle: []const u8) void { + const slices = self.data.getPtrSlice(0, self.data.len()); + + // Search the first slice for the needle. + if (std.mem.indexOf(u8, slices[0][self.i..], needle)) |idx| { + // Found, map the match to a selection. + var meta_it = self.meta.iterator(.forward); + var i: usize = 0; + while (meta_it.next()) |meta| { + const meta_idx = idx - i; + if (meta.cell_map.items.len < meta_idx) { + // This meta doesn't contain the match. + i += meta.cell_map.items.len; + continue; + } + + // We found the meta that contains the start of the match. + const tl: PageList.Pin = tl: { + const map = meta.cell_map.items[meta_idx]; + break :tl .{ + .node = meta.node, + .y = map.y, + .x = map.x, + }; + }; + + _ = tl; + } + + // Found, we can move our index to the next character + // after the match. This let's us find all matches even if + // they overlap. + + self.i = idx + 1; + + @panic("TODO"); + } + } + + /// Return a selection for the given start and length into the data + /// buffer and also prune the data/meta buffers if possible up to + /// this start index. + fn selectAndPrune( + self: *SlidingWindow, + start: usize, + len: usize, + ) Selection { + assert(start < self.data.len()); + assert(start + len < self.data.len()); + + var meta_it = self.meta.iterator(.forward); + var meta_: ?Meta = meta_it.next(); + + // Find the start of the match + var offset: usize = 0; + var skip_nodes: usize = 0; + const tl: PageList.Pin = tl: { + while (meta_) |meta| : (meta_ = meta_it.next()) { + // meta_i is the index we expect to find the match in the + // cell map within this meta if it contains it. + const meta_i = start - offset; + if (meta_i >= meta.cell_map.items.len) { + // This meta doesn't contain the match. This means we + // can also prune this set of data because we only look + // forward. + offset += meta.cell_map.items.len; + skip_nodes += 1; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[start]; + break :tl .{ + .node = meta.node, + .y = map.y, + .x = map.x, + }; + } + + // We never found the top-left. This is unreachable because + // we assert that the start index is within the data buffer, + // and when building the data buffer we assert the cell map + // length exactly matches the data buffer length. + unreachable; + }; + + // Keep track of the number of nodes we skipped for the tl. + const tl_skip_nodes = skip_nodes; + skip_nodes = 0; + + // Find the end of the match + const br: PageList.Pin = br: { + const end_idx = start + len - 1; + while (meta_) |meta| : (meta_ = meta_it.next()) { + const meta_i = end_idx - offset; + if (meta_i >= meta.cell_map.items.len) { + offset += meta.cell_map.items.len; + skip_nodes += 1; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[end_idx]; + break :br .{ + .node = meta.node, + .y = map.y, + .x = map.x, + }; + } + }; + + // If we skipped any nodes for the bottom-right then we can prune + // all the way up to the total. If we didn't, it means we found + // the bottom-right in the same node as the top-left and we can't + // prune the node that the match is on because there may be + // more matches. + if (skip_nodes > 0) skip_nodes += tl_skip_nodes; + + _ = tl; + _ = br; + } + + /// Convert a data index into a pin. + fn pin( + self: *const SlidingWindow, + idx: usize, + it: ?*MetaBuf.Iterator, + ) struct { + /// The pin for the data index. + pin: Pin, + + /// The offset into the meta buffer that the pin was found. + /// This can be used to prune the meta buffer (its safe to prune + /// before this i). + meta_i: usize, + } { + _ = self; + _ = idx; + _ = start; + + while (it.next()) |meta| { + // meta_i is the index we expect to find the match in the + // cell map within this meta if it contains it. + const meta_i = start - offset; + if (meta_i >= meta.cell_map.items.len) { + // This meta doesn't contain the match. This means we + // can also prune this set of data because we only look + // forward. + offset += meta.cell_map.items.len; + skip_nodes += 1; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[start]; + break :tl .{ + .node = meta.node, + .y = map.y, + .x = map.x, + }; + } + + } + + /// Add a new node to the sliding window. + /// + /// The window will prune itself if it can while always maintaining + /// the invariant that the `fixed_size` always fits within the window. + /// + /// Note it is possible for the window to be smaller than `fixed_size` + /// if not enough nodes have been added yet or the screen is just + /// smaller than the needle. + pub fn append( + self: *SlidingWindow, + alloc: Allocator, + node: *PageList.List.Node, + required_size: usize, + ) Allocator.Error!void { + // Initialize our metadata for the node. + var meta: Meta = .{ + .node = node, + .cell_map = Page.CellMap.init(alloc), + }; + errdefer meta.deinit(); + + // This is suboptimal but we need to encode the page once to + // temporary memory, and then copy it into our circular buffer. + // In the future, we should benchmark and see if we can encode + // directly into the circular buffer. + var encoded: std.ArrayListUnmanaged(u8) = .{}; + defer encoded.deinit(alloc); + + // Encode the page into the buffer. + const page: *const Page = &meta.node.data; + _ = page.encodeUtf8( + encoded.writer(alloc), + .{ .cell_map = &meta.cell_map }, + ) catch { + // writer uses anyerror but the only realistic error on + // an ArrayList is out of memory. + return error.OutOfMemory; + }; + assert(meta.cell_map.items.len == encoded.items.len); + + // Now that we know our buffer length, we can consider if we can + // prune our circular buffer or if we need to grow it. + prune: { + // Our buffer size after adding the new node. + const before_size: usize = self.data.len() + encoded.items.len; + + // Prune as long as removing the first (oldest) node retains + // our required size invariant. + var after_size: usize = before_size; + while (self.meta.first()) |oldest_meta| { + const new_size = after_size - oldest_meta.cell_map.items.len; + if (new_size < required_size) break :prune; + + // We can prune this node and retain our invariant. + // Update our new size, deinitialize the memory, and + // remove from the circular buffer. + after_size = new_size; + oldest_meta.deinit(); + self.meta.deleteOldest(1); + } + assert(after_size <= before_size); + + // If we didn't prune anything then we're done. + if (after_size == before_size) break :prune; + + // We need to prune our data buffer as well. + self.data.deleteOldest(before_size - after_size); + } + + // Ensure our buffers are big enough to store what we need. + try self.data.ensureUnusedCapacity(alloc, encoded.items.len); + try self.meta.ensureUnusedCapacity(alloc, 1); + + // Append our new node to the circular buffer. + try self.data.appendSlice(encoded.items); + try self.meta.append(meta); + + // Integrity check: verify our data matches our metadata exactly. + if (comptime std.debug.runtime_safety) { + var meta_it = self.meta.iterator(.forward); + var data_len: usize = 0; + while (meta_it.next()) |m| data_len += m.cell_map.items.len; + assert(data_len == self.data.len()); + } + } +}; + +test "SlidingWindow empty on init" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + try testing.expectEqual(0, w.data.len()); + try testing.expectEqual(0, w.meta.len()); +} + +test "SlidingWindow single append" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // Imaginary needle for search + const needle = "boo!"; + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node, needle.len); +} + +test "SlidingWindow two pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Imaginary needle for search + const needle = "boo!"; + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node, needle.len); + try w.append(alloc, node.next.?, needle.len); + + // Ensure our data is correct +} + +pub const PageSearch = struct { + alloc: Allocator, + node: *PageList.List.Node, + needle: []const u8, + cell_map: Page.CellMap, + encoded: std.ArrayListUnmanaged(u8) = .{}, + i: usize = 0, + + pub fn init( + alloc: Allocator, + node: *PageList.List.Node, + needle: []const u8, + ) !PageSearch { + var result: PageSearch = .{ + .alloc = alloc, + .node = node, + .needle = needle, + .cell_map = Page.CellMap.init(alloc), + }; + + const page: *const Page = &node.data; + _ = try page.encodeUtf8(result.encoded.writer(alloc), .{ + .cell_map = &result.cell_map, + }); + + return result; + } + + pub fn deinit(self: *PageSearch) void { + self.encoded.deinit(self.alloc); + self.cell_map.deinit(); + } + + pub fn next(self: *PageSearch) ?Selection { + // Search our haystack for the needle. The resulting index is + // the offset from self.i not the absolute index. + const haystack: []const u8 = self.encoded.items[self.i..]; + const i_offset = std.mem.indexOf(u8, haystack, self.needle) orelse { + self.i = self.encoded.items.len; + return null; + }; + + // Get our full index into the encoded buffer. + const idx = self.i + i_offset; + + // We found our search term. Move the cursor forward one beyond + // the match. This lets us find every repeated match. + self.i = idx + 1; + + const tl: PageList.Pin = tl: { + const map = self.cell_map.items[idx]; + break :tl .{ + .node = self.node, + .y = map.y, + .x = map.x, + }; + }; + const br: PageList.Pin = br: { + const map = self.cell_map.items[idx + self.needle.len - 1]; + break :br .{ + .node = self.node, + .y = map.y, + .x = map.x, + }; + }; + + return Selection.init(tl, br, false); + } +}; + +test "search single page one match" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello, world"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + + var it = try PageSearch.init(alloc, node, "world"); + defer it.deinit(); + + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 11, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + + try testing.expect(it.next() == null); +} + +test "search single page multiple match" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + + var it = try PageSearch.init(alloc, node, "boo!"); + defer it.deinit(); + + { + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = it.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + + try testing.expect(it.next() == null); +} diff --git a/src/datastruct/circ_buf.zig b/src/datastruct/circ_buf.zig index e6378c855..c13bcc192 100644 --- a/src/datastruct/circ_buf.zig +++ b/src/datastruct/circ_buf.zig @@ -45,6 +45,17 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { self.idx += 1; return &self.buf.storage[storage_idx]; } + + /// Seek the iterator by a given amount. This will clamp + /// the values to the bounds of the buffer so overflows are + /// not possible. + pub fn seekBy(self: *Iterator, amount: isize) void { + if (amount > 0) { + self.idx +|= @intCast(amount); + } else { + self.idx -|= @intCast(@abs(amount)); + } + } }; /// Initialize a new circular buffer that can store size elements. diff --git a/src/terminal/search.zig b/src/terminal/search.zig index f35249ab2..05b2919e0 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -6,6 +6,7 @@ const terminal = @import("main.zig"); const point = terminal.point; const Page = terminal.Page; const PageList = terminal.PageList; +const Pin = PageList.Pin; const Selection = terminal.Selection; const Screen = terminal.Screen; @@ -97,6 +98,85 @@ const SlidingWindow = struct { self.meta.deinit(alloc); } + /// Search the window for the next occurrence of the needle. As + /// the window moves, the window will prune itself while maintaining + /// the invariant that the window is always big enough to contain + /// the needle. + pub fn next(self: *SlidingWindow, needle: []const u8) ?Selection { + const slices = self.data.getPtrSlice(0, self.data.len()); + + // Search the first slice for the needle. + if (std.mem.indexOf(u8, slices[0], needle)) |idx| { + return self.selection(idx, needle.len); + } + + @panic("TODO"); + } + + /// Return a selection for the given start and length into the data + /// buffer and also prune the data/meta buffers if possible up to + /// this start index. + fn selection( + self: *SlidingWindow, + start: usize, + len: usize, + ) Selection { + assert(start < self.data.len()); + assert(start + len < self.data.len()); + + var meta_it = self.meta.iterator(.forward); + const tl: Pin = pin(&meta_it, start); + + // We have to seek back so that we reinspect our current + // iterator value again in case the start and end are in the + // same segment. + meta_it.seekBy(-1); + const br: Pin = pin(&meta_it, start + len - 1); + + // TODO: prune based on meta_it.idx + + return Selection.init(tl, br, false); + } + + /// Convert a data index into a pin. + /// + /// Tip: you can get the offset into the meta buffer we searched + /// by inspecting the iterator index after this function returns. + /// I note this because this is useful if you want to prune the + /// meta buffer after you find a match. + /// + /// Precondition: the index must be within the data buffer. + fn pin( + it: *MetaBuf.Iterator, + idx: usize, + ) Pin { + var offset: usize = 0; + while (it.next()) |meta| { + // meta_i is the index we expect to find the match in the + // cell map within this meta if it contains it. + const meta_i = idx - offset; + if (meta_i >= meta.cell_map.items.len) { + // This meta doesn't contain the match. This means we + // can also prune this set of data because we only look + // forward. + offset += meta.cell_map.items.len; + continue; + } + + // We found the meta that contains the start of the match. + const map = meta.cell_map.items[meta_i]; + return .{ + .node = meta.node, + .y = map.y, + .x = map.x, + }; + } + + // Unreachable because it is a precondition that the index is + // within the data buffer. + unreachable; + } + /// Add a new node to the sliding window. /// /// The window will prune itself if it can while always maintaining @@ -212,6 +292,62 @@ test "SlidingWindow single append" { try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; try w.append(alloc, node, needle.len); + + // We should be able to find two matches. + { + const sel = w.next(needle).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next(needle).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } +} + +test "SlidingWindow two pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Imaginary needle for search + const needle = "boo!"; + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node, needle.len); + try w.append(alloc, node.next.?, needle.len); + + // Ensure our data is correct } pub const PageSearch = struct { From d307b02e40de5816e0ee8e49d70e0ce555e13c18 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 3 Dec 2024 13:06:52 -0500 Subject: [PATCH 08/18] terminal: sliding window search can move the cursor --- src/datastruct/circ_buf.zig | 5 +++ src/terminal/search.zig | 75 ++++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/datastruct/circ_buf.zig b/src/datastruct/circ_buf.zig index c13bcc192..065bf6a1d 100644 --- a/src/datastruct/circ_buf.zig +++ b/src/datastruct/circ_buf.zig @@ -56,6 +56,11 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { self.idx -|= @intCast(@abs(amount)); } } + + /// Reset the iterator back to the first value. + pub fn reset(self: *Iterator) void { + self.idx = 0; + } }; /// Initialize a new circular buffer that can store size elements. diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 05b2919e0..e217f649e 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -66,6 +66,11 @@ const SlidingWindow = struct { /// data to meta. meta: MetaBuf, + /// Offset into data for our current state. This handles the + /// situation where our search moved through meta[0] but didn't + /// do enough to prune it. + data_offset: usize = 0, + const DataBuf = CircBuf(u8, 0); const MetaBuf = CircBuf(Meta, undefined); const Meta = struct { @@ -98,31 +103,60 @@ const SlidingWindow = struct { self.meta.deinit(alloc); } + /// Clear all data but retain allocated capacity. + pub fn clearAndRetainCapacity(self: *SlidingWindow) void { + var meta_it = self.meta.iterator(.forward); + while (meta_it.next()) |meta| meta.deinit(); + self.meta.clear(); + self.data.clear(); + self.data_offset = 0; + } + /// Search the window for the next occurrence of the needle. As /// the window moves, the window will prune itself while maintaining /// the invariant that the window is always big enough to contain /// the needle. pub fn next(self: *SlidingWindow, needle: []const u8) ?Selection { - const slices = self.data.getPtrSlice(0, self.data.len()); + const data_len = self.data.len(); + if (data_len == 0) return null; + const slices = self.data.getPtrSlice( + self.data_offset, + data_len - self.data_offset, + ); // Search the first slice for the needle. if (std.mem.indexOf(u8, slices[0], needle)) |idx| { return self.selection(idx, needle.len); } - @panic("TODO"); + // TODO: search overlap + + // Search the last slice for the needle. + if (std.mem.indexOf(u8, slices[1], needle)) |idx| { + if (true) @panic("TODO: test"); + return self.selection(slices[0].len + idx, needle.len); + } + + // No match. Clear everything. + self.clearAndRetainCapacity(); + return null; } /// Return a selection for the given start and length into the data /// buffer and also prune the data/meta buffers if possible up to /// this start index. + /// + /// The start index is assumed to be relative to the offset. i.e. + /// index zero is actually at `self.data[self.data_offset]`. The + /// selection will account for the offset. fn selection( self: *SlidingWindow, - start: usize, + start_offset: usize, len: usize, ) Selection { + const start = start_offset + self.data_offset; assert(start < self.data.len()); - assert(start + len < self.data.len()); + assert(start + len <= self.data.len()); var meta_it = self.meta.iterator(.forward); const tl: Pin = pin(&meta_it, start); @@ -132,8 +166,37 @@ const SlidingWindow = struct { // same segment. meta_it.seekBy(-1); const br: Pin = pin(&meta_it, start + len - 1); + assert(meta_it.idx >= 1); - // TODO: prune based on meta_it.idx + // meta_it.idx is now the index after the br pin. We can + // safely prune our data up to this index. (It is after + // because next() is called at least once). + const br_meta_idx: usize = meta_it.idx - 1; + meta_it.reset(); + var offset: usize = 0; + while (meta_it.next()) |meta| { + const meta_idx = start - offset; + if (meta_idx >= meta.cell_map.items.len) { + // Prior to our matches, we can prune it. + offset += meta.cell_map.items.len; + meta.deinit(); + } + + assert(meta_it.idx == br_meta_idx + 1); + break; + } + + // If we have metas to prune, then prune them. They should be + // deinitialized already from the while loop above. + if (br_meta_idx > 0) { + assert(offset > 0); + self.meta.deleteOldest(br_meta_idx); + self.data.deleteOldest(offset); + @panic("TODO: TEST"); + } + + // Move our data one beyond so we don't rematch. + self.data_offset = start - offset + 1; return Selection.init(tl, br, false); } @@ -316,6 +379,8 @@ test "SlidingWindow single append" { .y = 0, } }, s.pages.pointFromPin(.active, sel.end()).?); } + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); } test "SlidingWindow two pages" { From b487aa8e1fbd1981103a00464333467068661006 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 3 Dec 2024 13:40:48 -0500 Subject: [PATCH 09/18] terminal: search across two pages and pruning appears to be working --- src/terminal/search.zig | 208 +++++++++++++++++++++++----------------- 1 file changed, 121 insertions(+), 87 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index e217f649e..35f79ed23 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -53,8 +53,9 @@ pub const PageListSearch = struct { } }; -/// The sliding window of the pages we're searching. The window is always -/// big enough so that the needle can fit in it. +/// Search pages via a sliding window. The sliding window always maintains +/// the invariant that data isn't pruned until we've searched it and +/// accounted for overlaps across pages. const SlidingWindow = struct { /// The data buffer is a circular buffer of u8 that contains the /// encoded page text that we can use to search for the needle. @@ -137,8 +138,42 @@ const SlidingWindow = struct { return self.selection(slices[0].len + idx, needle.len); } - // No match. Clear everything. - self.clearAndRetainCapacity(); + // No match. We keep `needle.len - 1` bytes available to + // handle the future overlap case. + var meta_it = self.meta.iterator(.reverse); + prune: { + var saved: usize = 0; + while (meta_it.next()) |meta| { + const needed = needle.len - 1 - saved; + if (meta.cell_map.items.len >= needed) { + // We save up to this meta. We set our data offset + // to exactly where it needs to be to continue + // searching. + self.data_offset = meta.cell_map.items.len - needed; + break; + } + + saved += meta.cell_map.items.len; + } else { + // If we exited the while loop naturally then we + // never got the amount we needed and so there is + // nothing to prune. + assert(saved < needle.len - 1); + break :prune; + } + + const prune_count = self.meta.len() - meta_it.idx; + if (prune_count == 0) { + // This can happen if we need to save up to the first + // meta value to retain our window. + break :prune; + } + + // We can now delete all the metas up to but NOT including + // the meta we found through meta_it. + @panic("TODO: test"); + } + return null; } @@ -158,71 +193,74 @@ const SlidingWindow = struct { assert(start < self.data.len()); assert(start + len <= self.data.len()); + // meta_consumed is the number of bytes we've consumed in the + // data buffer up to and NOT including the meta where we've + // found our pin. This is important because it tells us the + // amount of data we can safely deleted from self.data since + // we can't partially delete a meta block's data. (The partial + // amount is represented by self.data_offset). var meta_it = self.meta.iterator(.forward); - const tl: Pin = pin(&meta_it, start); + var meta_consumed: usize = 0; + const tl: Pin = pin(&meta_it, &meta_consumed, start); // We have to seek back so that we reinspect our current // iterator value again in case the start and end are in the // same segment. meta_it.seekBy(-1); - const br: Pin = pin(&meta_it, start + len - 1); + const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1); assert(meta_it.idx >= 1); - // meta_it.idx is now the index after the br pin. We can - // safely prune our data up to this index. (It is after - // because next() is called at least once). - const br_meta_idx: usize = meta_it.idx - 1; - meta_it.reset(); - var offset: usize = 0; - while (meta_it.next()) |meta| { - const meta_idx = start - offset; - if (meta_idx >= meta.cell_map.items.len) { - // Prior to our matches, we can prune it. - offset += meta.cell_map.items.len; - meta.deinit(); + // Our offset into the current meta block is the start index + // minus the amount of data fully consumed. We then add one + // to move one past the match so we don't repeat it. + self.data_offset = start - meta_consumed + 1; + + // meta_it.idx is br's meta index plus one (because the iterator + // moves one past the end; we call next() one last time). So + // we compare against one to check that the meta that we matched + // in has prior meta blocks we can prune. + if (meta_it.idx > 1) { + // Deinit all our memory in the meta blocks prior to our + // match. + const meta_count = meta_it.idx - 1; + meta_it.reset(); + for (0..meta_count) |_| meta_it.next().?.deinit(); + if (comptime std.debug.runtime_safety) { + assert(meta_it.idx == meta_count); + assert(meta_it.next().?.node == br.node); } + self.meta.deleteOldest(meta_count); - assert(meta_it.idx == br_meta_idx + 1); - break; + // Delete all the data up to our current index. + assert(meta_consumed > 0); + self.data.deleteOldest(meta_consumed); } - // If we have metas to prune, then prune them. They should be - // deinitialized already from the while loop above. - if (br_meta_idx > 0) { - assert(offset > 0); - self.meta.deleteOldest(br_meta_idx); - self.data.deleteOldest(offset); - @panic("TODO: TEST"); - } - - // Move our data one beyond so we don't rematch. - self.data_offset = start - offset + 1; - + self.assertIntegrity(); return Selection.init(tl, br, false); } /// Convert a data index into a pin. /// - /// Tip: you can get the offset into the meta buffer we searched - /// by inspecting the iterator index after this function returns. - /// I note this because this is useful if you want to prune the - /// meta buffer after you find a match. + /// The iterator and offset are both expected to be passed by + /// pointer so that the pin can be efficiently called for multiple + /// indexes (in order). See selection() for an example. /// /// Precondition: the index must be within the data buffer. fn pin( it: *MetaBuf.Iterator, + offset: *usize, idx: usize, ) Pin { - var offset: usize = 0; while (it.next()) |meta| { // meta_i is the index we expect to find the match in the // cell map within this meta if it contains it. - const meta_i = idx - offset; + const meta_i = idx - offset.*; if (meta_i >= meta.cell_map.items.len) { // This meta doesn't contain the match. This means we // can also prune this set of data because we only look // forward. - offset += meta.cell_map.items.len; + offset.* += meta.cell_map.items.len; continue; } @@ -240,19 +278,13 @@ const SlidingWindow = struct { unreachable; } - /// Add a new node to the sliding window. - /// - /// The window will prune itself if it can while always maintaining - /// the invariant that the `fixed_size` always fits within the window. - /// - /// Note it is possible for the window to be smaller than `fixed_size` - /// if not enough nodes have been added yet or the screen is just - /// smaller than the needle. + /// Add a new node to the sliding window. This will always grow + /// the sliding window; data isn't pruned until it is consumed + /// via a search (via next()). pub fn append( self: *SlidingWindow, alloc: Allocator, node: *PageList.List.Node, - required_size: usize, ) Allocator.Error!void { // Initialize our metadata for the node. var meta: Meta = .{ @@ -280,35 +312,6 @@ const SlidingWindow = struct { }; assert(meta.cell_map.items.len == encoded.items.len); - // Now that we know our buffer length, we can consider if we can - // prune our circular buffer or if we need to grow it. - prune: { - // Our buffer size after adding the new node. - const before_size: usize = self.data.len() + encoded.items.len; - - // Prune as long as removing the first (oldest) node retains - // our required size invariant. - var after_size: usize = before_size; - while (self.meta.first()) |oldest_meta| { - const new_size = after_size - oldest_meta.cell_map.items.len; - if (new_size < required_size) break :prune; - - // We can prune this node and retain our invariant. - // Update our new size, deinitialize the memory, and - // remove from the circular buffer. - after_size = new_size; - oldest_meta.deinit(); - self.meta.deleteOldest(1); - } - assert(after_size <= before_size); - - // If we didn't prune anything then we're done. - if (after_size == before_size) break :prune; - - // We need to prune our data buffer as well. - self.data.deleteOldest(before_size - after_size); - } - // Ensure our buffers are big enough to store what we need. try self.data.ensureUnusedCapacity(alloc, encoded.items.len); try self.meta.ensureUnusedCapacity(alloc, 1); @@ -317,13 +320,20 @@ const SlidingWindow = struct { try self.data.appendSlice(encoded.items); try self.meta.append(meta); + self.assertIntegrity(); + } + + fn assertIntegrity(self: *const SlidingWindow) void { + if (comptime !std.debug.runtime_safety) return; + // Integrity check: verify our data matches our metadata exactly. - if (comptime std.debug.runtime_safety) { - var meta_it = self.meta.iterator(.forward); - var data_len: usize = 0; - while (meta_it.next()) |m| data_len += m.cell_map.items.len; - assert(data_len == self.data.len()); - } + var meta_it = self.meta.iterator(.forward); + var data_len: usize = 0; + while (meta_it.next()) |m| data_len += m.cell_map.items.len; + assert(data_len == self.data.len()); + + // Integrity check: verify our data offset is within bounds. + assert(self.data_offset < self.data.len()); } }; @@ -354,7 +364,7 @@ test "SlidingWindow single append" { // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(alloc, node, needle.len); + try w.append(alloc, node); // We should be able to find two matches. { @@ -409,10 +419,34 @@ test "SlidingWindow two pages" { // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(alloc, node, needle.len); - try w.append(alloc, node.next.?, needle.len); + try w.append(alloc, node); + try w.append(alloc, node.next.?); - // Ensure our data is correct + // Search should find two matches + { + const sel = w.next(needle).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 79, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = w.next(needle).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); } pub const PageSearch = struct { From 09e4cccd2c891d3a2e242fce486f8ff8fd83db02 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 3 Dec 2024 15:39:36 -0800 Subject: [PATCH 10/18] terminal: remove unused pagesearch --- src/terminal/search.zig | 139 ---------------------------------------- 1 file changed, 139 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 35f79ed23..40462491a 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -448,142 +448,3 @@ test "SlidingWindow two pages" { try testing.expect(w.next(needle) == null); try testing.expect(w.next(needle) == null); } - -pub const PageSearch = struct { - alloc: Allocator, - node: *PageList.List.Node, - needle: []const u8, - cell_map: Page.CellMap, - encoded: std.ArrayListUnmanaged(u8) = .{}, - i: usize = 0, - - pub fn init( - alloc: Allocator, - node: *PageList.List.Node, - needle: []const u8, - ) !PageSearch { - var result: PageSearch = .{ - .alloc = alloc, - .node = node, - .needle = needle, - .cell_map = Page.CellMap.init(alloc), - }; - - const page: *const Page = &node.data; - _ = try page.encodeUtf8(result.encoded.writer(alloc), .{ - .cell_map = &result.cell_map, - }); - - return result; - } - - pub fn deinit(self: *PageSearch) void { - self.encoded.deinit(self.alloc); - self.cell_map.deinit(); - } - - pub fn next(self: *PageSearch) ?Selection { - // Search our haystack for the needle. The resulting index is - // the offset from self.i not the absolute index. - const haystack: []const u8 = self.encoded.items[self.i..]; - const i_offset = std.mem.indexOf(u8, haystack, self.needle) orelse { - self.i = self.encoded.items.len; - return null; - }; - - // Get our full index into the encoded buffer. - const idx = self.i + i_offset; - - // We found our search term. Move the cursor forward one beyond - // the match. This lets us find every repeated match. - self.i = idx + 1; - - const tl: PageList.Pin = tl: { - const map = self.cell_map.items[idx]; - break :tl .{ - .node = self.node, - .y = map.y, - .x = map.x, - }; - }; - const br: PageList.Pin = br: { - const map = self.cell_map.items[idx + self.needle.len - 1]; - break :br .{ - .node = self.node, - .y = map.y, - .x = map.x, - }; - }; - - return Selection.init(tl, br, false); - } -}; - -test "search single page one match" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello, world"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - - var it = try PageSearch.init(alloc, node, "world"); - defer it.deinit(); - - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 11, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - - try testing.expect(it.next() == null); -} - -test "search single page multiple match" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - - var it = try PageSearch.init(alloc, node, "boo!"); - defer it.deinit(); - - { - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - - try testing.expect(it.next() == null); -} From 79026a114837ba0945d0f6948c7c2efc9f549516 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 3 Dec 2024 15:52:48 -0800 Subject: [PATCH 11/18] terminal: test no match pruning --- src/terminal/search.zig | 116 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 40462491a..71ac6aea4 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -171,9 +171,18 @@ const SlidingWindow = struct { // We can now delete all the metas up to but NOT including // the meta we found through meta_it. - @panic("TODO: test"); + meta_it = self.meta.iterator(.forward); + var prune_data_len: usize = 0; + for (0..prune_count) |_| { + const meta = meta_it.next().?; + prune_data_len += meta.cell_map.items.len; + meta.deinit(); + } + self.meta.deleteOldest(prune_count); + self.data.deleteOldest(prune_data_len); } + self.assertIntegrity(); return null; } @@ -393,6 +402,33 @@ test "SlidingWindow single append" { try testing.expect(w.next(needle) == null); } +test "SlidingWindow single append no match" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + + // Imaginary needle for search + const needle = "nope!"; + + // We want to test single-page cases. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + + // No matches + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); + + // Should still keep the page + try testing.expectEqual(1, w.meta.len()); +} + test "SlidingWindow two pages" { const testing = std.testing; const alloc = testing.allocator; @@ -448,3 +484,81 @@ test "SlidingWindow two pages" { try testing.expect(w.next(needle) == null); try testing.expect(w.next(needle) == null); } + +test "SlidingWindow two pages no match prunes first page" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node.next.?); + + // Imaginary needle for search. Doesn't match! + const needle = "nope!"; + + // Search should find nothing + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); + + // We should've pruned our page because the second page + // has enough text to contain our needle. + try testing.expectEqual(1, w.meta.len()); +} + +test "SlidingWindow two pages no match keeps both pages" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("\n"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + try s.testWriteString("hello. boo!"); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node.next.?); + + // Imaginary needle for search. Doesn't match! + var needle_list = std.ArrayList(u8).init(alloc); + defer needle_list.deinit(); + try needle_list.appendNTimes('x', first_page_rows * s.pages.cols); + const needle: []const u8 = needle_list.items; + + // Search should find nothing + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); + + // No pruning because both pages are needed to fit needle. + try testing.expectEqual(2, w.meta.len()); +} From af1ee4d95f645d9a33841dcb5b77a93c8bdc9745 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 10:36:14 -0800 Subject: [PATCH 12/18] terminal: search match across page boundary --- src/terminal/search.zig | 65 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 71ac6aea4..88da8304d 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -212,6 +212,12 @@ const SlidingWindow = struct { var meta_consumed: usize = 0; const tl: Pin = pin(&meta_it, &meta_consumed, start); + // Store the information required to prune later. We store this + // now because we only want to prune up to our START so we can + // find overlapping matches. + const tl_meta_idx = meta_it.idx - 1; + const tl_meta_consumed = meta_consumed; + // We have to seek back so that we reinspect our current // iterator value again in case the start and end are in the // same segment. @@ -222,27 +228,27 @@ const SlidingWindow = struct { // Our offset into the current meta block is the start index // minus the amount of data fully consumed. We then add one // to move one past the match so we don't repeat it. - self.data_offset = start - meta_consumed + 1; + self.data_offset = start - tl_meta_consumed + 1; // meta_it.idx is br's meta index plus one (because the iterator // moves one past the end; we call next() one last time). So // we compare against one to check that the meta that we matched // in has prior meta blocks we can prune. - if (meta_it.idx > 1) { + if (tl_meta_idx > 0) { // Deinit all our memory in the meta blocks prior to our // match. - const meta_count = meta_it.idx - 1; + const meta_count = tl_meta_idx; meta_it.reset(); for (0..meta_count) |_| meta_it.next().?.deinit(); if (comptime std.debug.runtime_safety) { assert(meta_it.idx == meta_count); - assert(meta_it.next().?.node == br.node); + assert(meta_it.next().?.node == tl.node); } self.meta.deleteOldest(meta_count); // Delete all the data up to our current index. - assert(meta_consumed > 0); - self.data.deleteOldest(meta_consumed); + assert(tl_meta_consumed > 0); + self.data.deleteOldest(tl_meta_consumed); } self.assertIntegrity(); @@ -485,6 +491,53 @@ test "SlidingWindow two pages" { try testing.expect(w.next(needle) == null); } +test "SlidingWindow two pages match across boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 1000); + defer s.deinit(); + + // Fill up the first page. The final bytes in the first page + // are "boo!" + const first_page_rows = s.pages.pages.first.?.data.capacity.rows; + for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); + for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); + try s.testWriteString("hell"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + try s.testWriteString("o, world!"); + try testing.expect(s.pages.pages.first != s.pages.pages.last); + + // Imaginary needle for search + const needle = "hello, world"; + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node.next.?); + + // Search should find a match + { + const sel = w.next(needle).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 76, + .y = 22, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 23, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next(needle) == null); + try testing.expect(w.next(needle) == null); + + // We shouldn't prune because we don't have enough space + try testing.expectEqual(2, w.meta.len()); +} + test "SlidingWindow two pages no match prunes first page" { const testing = std.testing; const alloc = testing.allocator; From 852e04fa009eca31727762f2d72d2fec4fcea273 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 10:58:22 -0800 Subject: [PATCH 13/18] terminal: test for match in second slice of circ buf --- src/terminal/search.zig | 73 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 7 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 88da8304d..7b6486429 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -118,12 +118,16 @@ const SlidingWindow = struct { /// the invariant that the window is always big enough to contain /// the needle. pub fn next(self: *SlidingWindow, needle: []const u8) ?Selection { - const data_len = self.data.len(); - if (data_len == 0) return null; - const slices = self.data.getPtrSlice( - self.data_offset, - data_len - self.data_offset, - ); + const slices = slices: { + // If we have less data then the needle then we can't possibly match + const data_len = self.data.len(); + if (data_len < needle.len) return null; + + break :slices self.data.getPtrSlice( + self.data_offset, + data_len - self.data_offset, + ); + }; // Search the first slice for the needle. if (std.mem.indexOf(u8, slices[0], needle)) |idx| { @@ -134,7 +138,6 @@ const SlidingWindow = struct { // Search the last slice for the needle. if (std.mem.indexOf(u8, slices[1], needle)) |idx| { - if (true) @panic("TODO: test"); return self.selection(slices[0].len + idx, needle.len); } @@ -182,6 +185,10 @@ const SlidingWindow = struct { self.data.deleteOldest(prune_data_len); } + // Our data offset now moves to needle.len - 1 from the end so + // that we can handle the overlap case. + self.data_offset = self.data.len() - needle.len + 1; + self.assertIntegrity(); return null; } @@ -615,3 +622,55 @@ test "SlidingWindow two pages no match keeps both pages" { // No pruning because both pages are needed to fit needle. try testing.expectEqual(2, w.meta.len()); } + +test "SlidingWindow single append across circular buffer boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.initEmpty(alloc); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next("abc") == null); + try testing.expectEqual(1, w.meta.len()); + + // Add new page, now wraps + try w.append(alloc, node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next("boo!").?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next("boo!") == null); +} From 34fb840cf99fdfdd970557df8b8accbbc156b3ce Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 11:16:36 -0800 Subject: [PATCH 14/18] terminal: search match on overlap case --- src/terminal/search.zig | 215 ++++++++++++++++++++++++++++------------ 1 file changed, 153 insertions(+), 62 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 7b6486429..304cc5a4e 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -72,6 +72,14 @@ const SlidingWindow = struct { /// do enough to prune it. data_offset: usize = 0, + /// The needle we're searching for. Does not own the memory. + needle: []const u8, + + /// A buffer to store the overlap search data. This is used to search + /// overlaps between pages where the match starts on one page and + /// ends on another. The length is always `needle.len * 2`. + overlap_buf: []u8, + const DataBuf = CircBuf(u8, 0); const MetaBuf = CircBuf(Meta, undefined); const Meta = struct { @@ -83,20 +91,29 @@ const SlidingWindow = struct { } }; - pub fn initEmpty(alloc: Allocator) Allocator.Error!SlidingWindow { + pub fn init( + alloc: Allocator, + needle: []const u8, + ) Allocator.Error!SlidingWindow { var data = try DataBuf.init(alloc, 0); errdefer data.deinit(alloc); var meta = try MetaBuf.init(alloc, 0); errdefer meta.deinit(alloc); + const overlap_buf = try alloc.alloc(u8, needle.len * 2); + errdefer alloc.free(overlap_buf); + return .{ .data = data, .meta = meta, + .needle = needle, + .overlap_buf = overlap_buf, }; } pub fn deinit(self: *SlidingWindow, alloc: Allocator) void { + alloc.free(self.overlap_buf); self.data.deinit(alloc); var meta_it = self.meta.iterator(.forward); @@ -117,11 +134,11 @@ const SlidingWindow = struct { /// the window moves, the window will prune itself while maintaining /// the invariant that the window is always big enough to contain /// the needle. - pub fn next(self: *SlidingWindow, needle: []const u8) ?Selection { + pub fn next(self: *SlidingWindow) ?Selection { const slices = slices: { // If we have less data then the needle then we can't possibly match const data_len = self.data.len(); - if (data_len < needle.len) return null; + if (data_len < self.needle.len) return null; break :slices self.data.getPtrSlice( self.data_offset, @@ -130,15 +147,46 @@ const SlidingWindow = struct { }; // Search the first slice for the needle. - if (std.mem.indexOf(u8, slices[0], needle)) |idx| { - return self.selection(idx, needle.len); + if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| { + return self.selection(idx, self.needle.len); } - // TODO: search overlap + // Search the overlap buffer for the needle. + if (slices[0].len > 0 and slices[1].len > 0) overlap: { + // Get up to needle.len - 1 bytes from each side (as much as + // we can) and store it in the overlap buffer. + const prefix: []const u8 = prefix: { + const len = @min(slices[0].len, self.needle.len - 1); + const idx = slices[0].len - len; + break :prefix slices[0][idx..]; + }; + const suffix: []const u8 = suffix: { + const len = @min(slices[1].len, self.needle.len - 1); + break :suffix slices[1][0..len]; + }; + const overlap_len = prefix.len + suffix.len; + assert(overlap_len <= self.overlap_buf.len); + @memcpy(self.overlap_buf[0..prefix.len], prefix); + @memcpy(self.overlap_buf[prefix.len..overlap_len], suffix); + + // Search the overlap + const idx = std.mem.indexOf( + u8, + self.overlap_buf[0..overlap_len], + self.needle, + ) orelse break :overlap; + + // We found a match in the overlap buffer. We need to map the + // index back to the data buffer in order to get our selection. + return self.selection( + slices[0].len - prefix.len + idx, + self.needle.len, + ); + } // Search the last slice for the needle. - if (std.mem.indexOf(u8, slices[1], needle)) |idx| { - return self.selection(slices[0].len + idx, needle.len); + if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| { + return self.selection(slices[0].len + idx, self.needle.len); } // No match. We keep `needle.len - 1` bytes available to @@ -147,7 +195,7 @@ const SlidingWindow = struct { prune: { var saved: usize = 0; while (meta_it.next()) |meta| { - const needed = needle.len - 1 - saved; + const needed = self.needle.len - 1 - saved; if (meta.cell_map.items.len >= needed) { // We save up to this meta. We set our data offset // to exactly where it needs to be to continue @@ -161,7 +209,7 @@ const SlidingWindow = struct { // If we exited the while loop naturally then we // never got the amount we needed and so there is // nothing to prune. - assert(saved < needle.len - 1); + assert(saved < self.needle.len - 1); break :prune; } @@ -187,7 +235,7 @@ const SlidingWindow = struct { // Our data offset now moves to needle.len - 1 from the end so // that we can handle the overlap case. - self.data_offset = self.data.len() - needle.len + 1; + self.data_offset = self.data.len() - self.needle.len + 1; self.assertIntegrity(); return null; @@ -363,7 +411,7 @@ test "SlidingWindow empty on init" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "boo!"); defer w.deinit(alloc); try testing.expectEqual(0, w.data.len()); try testing.expectEqual(0, w.meta.len()); @@ -373,16 +421,13 @@ test "SlidingWindow single append" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "boo!"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 0); defer s.deinit(); try s.testWriteString("hello. boo! hello. boo!"); - // Imaginary needle for search - const needle = "boo!"; - // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; @@ -390,7 +435,7 @@ test "SlidingWindow single append" { // We should be able to find two matches. { - const sel = w.next(needle).?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 7, .y = 0, @@ -401,7 +446,7 @@ test "SlidingWindow single append" { } }, s.pages.pointFromPin(.active, sel.end()).?); } { - const sel = w.next(needle).?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 19, .y = 0, @@ -411,32 +456,29 @@ test "SlidingWindow single append" { .y = 0, } }, s.pages.pointFromPin(.active, sel.end()).?); } - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); } test "SlidingWindow single append no match" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "nope!"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 0); defer s.deinit(); try s.testWriteString("hello. boo! hello. boo!"); - // Imaginary needle for search - const needle = "nope!"; - // We want to test single-page cases. try testing.expect(s.pages.pages.first == s.pages.pages.last); const node: *PageList.List.Node = s.pages.pages.first.?; try w.append(alloc, node); // No matches - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); // Should still keep the page try testing.expectEqual(1, w.meta.len()); @@ -446,7 +488,7 @@ test "SlidingWindow two pages" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "boo!"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 1000); @@ -463,9 +505,6 @@ test "SlidingWindow two pages" { try testing.expect(s.pages.pages.first != s.pages.pages.last); try s.testWriteString("hello. boo!"); - // Imaginary needle for search - const needle = "boo!"; - // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; try w.append(alloc, node); @@ -473,7 +512,7 @@ test "SlidingWindow two pages" { // Search should find two matches { - const sel = w.next(needle).?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 76, .y = 22, @@ -484,7 +523,7 @@ test "SlidingWindow two pages" { } }, s.pages.pointFromPin(.active, sel.end()).?); } { - const sel = w.next(needle).?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 7, .y = 23, @@ -494,15 +533,15 @@ test "SlidingWindow two pages" { .y = 23, } }, s.pages.pointFromPin(.active, sel.end()).?); } - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); } test "SlidingWindow two pages match across boundary" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "hello, world"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 1000); @@ -518,9 +557,6 @@ test "SlidingWindow two pages match across boundary" { try s.testWriteString("o, world!"); try testing.expect(s.pages.pages.first != s.pages.pages.last); - // Imaginary needle for search - const needle = "hello, world"; - // Add both pages const node: *PageList.List.Node = s.pages.pages.first.?; try w.append(alloc, node); @@ -528,7 +564,7 @@ test "SlidingWindow two pages match across boundary" { // Search should find a match { - const sel = w.next(needle).?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 76, .y = 22, @@ -538,8 +574,8 @@ test "SlidingWindow two pages match across boundary" { .y = 23, } }, s.pages.pointFromPin(.active, sel.end()).?); } - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); // We shouldn't prune because we don't have enough space try testing.expectEqual(2, w.meta.len()); @@ -549,7 +585,7 @@ test "SlidingWindow two pages no match prunes first page" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "nope!"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 1000); @@ -571,12 +607,9 @@ test "SlidingWindow two pages no match prunes first page" { try w.append(alloc, node); try w.append(alloc, node.next.?); - // Imaginary needle for search. Doesn't match! - const needle = "nope!"; - // Search should find nothing - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); // We should've pruned our page because the second page // has enough text to contain our needle. @@ -587,9 +620,6 @@ test "SlidingWindow two pages no match keeps both pages" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); - defer w.deinit(alloc); - var s = try Screen.init(alloc, 80, 24, 1000); defer s.deinit(); @@ -604,20 +634,23 @@ test "SlidingWindow two pages no match keeps both pages" { try testing.expect(s.pages.pages.first != s.pages.pages.last); try s.testWriteString("hello. boo!"); - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(alloc, node); - try w.append(alloc, node.next.?); - // Imaginary needle for search. Doesn't match! var needle_list = std.ArrayList(u8).init(alloc); defer needle_list.deinit(); try needle_list.appendNTimes('x', first_page_rows * s.pages.cols); const needle: []const u8 = needle_list.items; + var w = try SlidingWindow.init(alloc, needle); + defer w.deinit(alloc); + + // Add both pages + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node.next.?); + // Search should find nothing - try testing.expect(w.next(needle) == null); - try testing.expect(w.next(needle) == null); + try testing.expect(w.next() == null); + try testing.expect(w.next() == null); // No pruning because both pages are needed to fit needle. try testing.expectEqual(2, w.meta.len()); @@ -627,7 +660,7 @@ test "SlidingWindow single append across circular buffer boundary" { const testing = std.testing; const alloc = testing.allocator; - var w = try SlidingWindow.initEmpty(alloc); + var w = try SlidingWindow.init(alloc, "abc"); defer w.deinit(alloc); var s = try Screen.init(alloc, 80, 24, 0); @@ -651,9 +684,12 @@ test "SlidingWindow single append across circular buffer boundary" { } // Search non-match, prunes page - try testing.expect(w.next("abc") == null); + try testing.expect(w.next() == null); try testing.expectEqual(1, w.meta.len()); + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo"; + // Add new page, now wraps try w.append(alloc, node); { @@ -662,15 +698,70 @@ test "SlidingWindow single append across circular buffer boundary" { try testing.expect(slices[1].len > 0); } { - const sel = w.next("boo!").?; + const sel = w.next().?; try testing.expectEqual(point.Point{ .active = .{ .x = 19, .y = 0, } }, s.pages.pointFromPin(.active, sel.start()).?); try testing.expectEqual(point.Point{ .active = .{ - .x = 22, + .x = 21, .y = 0, } }, s.pages.pointFromPin(.active, sel.end()).?); } - try testing.expect(w.next("boo!") == null); + try testing.expect(w.next() == null); +} + +test "SlidingWindow single append match on boundary" { + const testing = std.testing; + const alloc = testing.allocator; + + var w = try SlidingWindow.init(alloc, "abcd"); + defer w.deinit(alloc); + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo"); + + // We are trying to break a circular buffer boundary so the way we + // do this is to duplicate the data then do a failing search. This + // will cause the first page to be pruned. The next time we append we'll + // put it in the middle of the circ buffer. We assert this so that if + // our implementation changes our test will fail. + try testing.expect(s.pages.pages.first == s.pages.pages.last); + const node: *PageList.List.Node = s.pages.pages.first.?; + try w.append(alloc, node); + try w.append(alloc, node); + { + // No wrap around yet + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len == 0); + } + + // Search non-match, prunes page + try testing.expect(w.next() == null); + try testing.expectEqual(1, w.meta.len()); + + // Change the needle, just needs to be the same length (not a real API) + w.needle = "boo!"; + + // Add new page, now wraps + try w.append(alloc, node); + { + const slices = w.data.getPtrSlice(0, w.data.len()); + try testing.expect(slices[0].len > 0); + try testing.expect(slices[1].len > 0); + } + { + const sel = w.next().?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 21, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 1, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect(w.next() == null); } From 6361bf47f7f8fda0993a15046829e8d42b085419 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 11:23:09 -0800 Subject: [PATCH 15/18] terminal: update comments/docs on sliding window search --- src/terminal/search.zig | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 304cc5a4e..09078ae28 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -53,9 +53,22 @@ pub const PageListSearch = struct { } }; -/// Search pages via a sliding window. The sliding window always maintains -/// the invariant that data isn't pruned until we've searched it and -/// accounted for overlaps across pages. +/// Searches page nodes via a sliding window. The sliding window maintains +/// the invariant that data isn't pruned until (1) we've searched it and +/// (2) we've accounted for overlaps across pages to fit the needle. +/// +/// The sliding window is first initialized empty. Pages are then appended +/// in the order to search them. If you're doing a reverse search then the +/// pages should be appended in reverse order and the needle should be +/// reversed. +/// +/// All appends grow the window. The window is only pruned when a searc +/// is done (positive or negative match) via `next()`. +/// +/// To avoid unnecessary memory growth, the recommended usage is to +/// call `next()` until it returns null and then `append` the next page +/// and repeat the process. This will always maintain the minimum +/// required memory to search for the needle. const SlidingWindow = struct { /// The data buffer is a circular buffer of u8 that contains the /// encoded page text that we can use to search for the needle. From b9dda6ad87fff1ec01699a0b12ad28cbbe51b856 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 12:15:32 -0800 Subject: [PATCH 16/18] terminal: PageListSearch works! --- src/terminal/search.zig | 101 +++++++++++++++++++++++++++++++--------- 1 file changed, 79 insertions(+), 22 deletions(-) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index 09078ae28..fe5ac0c29 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -10,46 +10,64 @@ const Pin = PageList.Pin; const Selection = terminal.Selection; const Screen = terminal.Screen; +/// Searches for a term in a PageList structure. pub const PageListSearch = struct { - alloc: Allocator, - /// The list we're searching. list: *PageList, - /// The search term we're searching for. - needle: []const u8, - - /// The window is our sliding window of pages that we're searching so - /// we can handle boundary cases where a needle is partially on the end - /// of one page and the beginning of the next. - /// - /// Note that we're not guaranteed to straddle exactly two pages. If - /// the needle is large enough and/or the pages are small enough then - /// the needle can straddle N pages. Additionally, pages aren't guaranteed - /// to be equal size so we can't precompute the window size. + /// The sliding window of page contents and nodes to search. window: SlidingWindow, + /// Initialize the page list search. + /// + /// The needle is not copied and must be kept alive for the duration + /// of the search operation. pub fn init( alloc: Allocator, list: *PageList, needle: []const u8, - ) !PageListSearch { - var window = try CircBuf.init(alloc, 0); - errdefer window.deinit(); + ) Allocator.Error!PageListSearch { + var window = try SlidingWindow.init(alloc, needle); + errdefer window.deinit(alloc); return .{ - .alloc = alloc, .list = list, - .current = list.pages.first, - .needle = needle, .window = window, }; } - pub fn deinit(self: *PageListSearch) void { - _ = self; + pub fn deinit(self: *PageListSearch, alloc: Allocator) void { + self.window.deinit(alloc); + } - // TODO: deinit window + /// Find the next match for the needle in the pagelist. This returns + /// null when there are no more matches. + pub fn next( + self: *PageListSearch, + alloc: Allocator, + ) Allocator.Error!?Selection { + // Try to search for the needle in the window. If we find a match + // then we can return that and we're done. + if (self.window.next()) |sel| return sel; + + // Get our next node. If we have a value in our window then we + // can determine the next node. If we don't, we've never setup the + // window so we use our first node. + var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta| + meta.node.next + else + self.list.pages.first; + + // Add one pagelist node at a time, look for matches, and repeat + // until we find a match or we reach the end of the pagelist. + // This append then next pattern limits memory usage of the window. + while (node_) |node| : (node_ = node.next) { + try self.window.append(alloc, node); + if (self.window.next()) |sel| return sel; + } + + // We've reached the end of the pagelist, no matches. + return null; } }; @@ -420,6 +438,45 @@ const SlidingWindow = struct { } }; +test "PageListSearch single page" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try Screen.init(alloc, 80, 24, 0); + defer s.deinit(); + try s.testWriteString("hello. boo! hello. boo!"); + try testing.expect(s.pages.pages.first == s.pages.pages.last); + + var search = try PageListSearch.init(alloc, &s.pages, "boo!"); + defer search.deinit(alloc); + + // We should be able to find two matches. + { + const sel = (try search.next(alloc)).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 7, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 10, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + { + const sel = (try search.next(alloc)).?; + try testing.expectEqual(point.Point{ .active = .{ + .x = 19, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.start()).?); + try testing.expectEqual(point.Point{ .active = .{ + .x = 22, + .y = 0, + } }, s.pages.pointFromPin(.active, sel.end()).?); + } + try testing.expect((try search.next(alloc)) == null); + try testing.expect((try search.next(alloc)) == null); +} + test "SlidingWindow empty on init" { const testing = std.testing; const alloc = testing.allocator; From 50b36c5d8606a4fd0be5fafd4e641751f2625861 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 12:38:29 -0800 Subject: [PATCH 17/18] comments --- src/terminal/search.zig | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/terminal/search.zig b/src/terminal/search.zig index fe5ac0c29..56b181c48 100644 --- a/src/terminal/search.zig +++ b/src/terminal/search.zig @@ -1,3 +1,26 @@ +//! Search functionality for the terminal. +//! +//! At the time of writing this comment, this is a **work in progress**. +//! +//! Search at the time of writing is implemented using a simple +//! boyer-moore-horspool algorithm. The suboptimal part of the implementation +//! is that we need to encode each terminal page into a text buffer in order +//! to apply BMH to it. This is because the terminal page is not laid out +//! in a flat text form. +//! +//! To minimize memory usage, we use a sliding window to search for the +//! needle. The sliding window only keeps the minimum amount of page data +//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap +//! between terminal pages). +//! +//! Future work: +//! +//! - PageListSearch on a PageList concurrently with another thread +//! - Handle pruned pages in a PageList to ensure we don't keep references +//! - Repeat search a changing active area of the screen +//! - Reverse search so that more recent matches are found first +//! + const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; @@ -11,6 +34,10 @@ const Selection = terminal.Selection; const Screen = terminal.Screen; /// Searches for a term in a PageList structure. +/// +/// At the time of writing, this does not support searching a pagelist +/// simultaneously as its being used by another thread. This will be resolved +/// in the future. pub const PageListSearch = struct { /// The list we're searching. list: *PageList, From 7dd8e7c43f4c1da2a310dbd5d9c96c3a9de1d6e4 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Dec 2024 12:48:00 -0800 Subject: [PATCH 18/18] remove unused file --- ' | 555 -------------------------------------------------------------- 1 file changed, 555 deletions(-) delete mode 100644 ' diff --git a/' b/' deleted file mode 100644 index 0b79f1879..000000000 --- a/' +++ /dev/null @@ -1,555 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const CircBuf = @import("../datastruct/main.zig").CircBuf; -const terminal = @import("main.zig"); -const point = terminal.point; -const Page = terminal.Page; -const PageList = terminal.PageList; -const Pin = PageList.Pin; -const Selection = terminal.Selection; -const Screen = terminal.Screen; - -pub const PageListSearch = struct { - alloc: Allocator, - - /// The list we're searching. - list: *PageList, - - /// The search term we're searching for. - needle: []const u8, - - /// The window is our sliding window of pages that we're searching so - /// we can handle boundary cases where a needle is partially on the end - /// of one page and the beginning of the next. - /// - /// Note that we're not guaranteed to straddle exactly two pages. If - /// the needle is large enough and/or the pages are small enough then - /// the needle can straddle N pages. Additionally, pages aren't guaranteed - /// to be equal size so we can't precompute the window size. - window: SlidingWindow, - - pub fn init( - alloc: Allocator, - list: *PageList, - needle: []const u8, - ) !PageListSearch { - var window = try CircBuf.init(alloc, 0); - errdefer window.deinit(); - - return .{ - .alloc = alloc, - .list = list, - .current = list.pages.first, - .needle = needle, - .window = window, - }; - } - - pub fn deinit(self: *PageListSearch) void { - _ = self; - - // TODO: deinit window - } -}; - -/// The sliding window of the pages we're searching. The window is always -/// big enough so that the needle can fit in it. -const SlidingWindow = struct { - /// The data buffer is a circular buffer of u8 that contains the - /// encoded page text that we can use to search for the needle. - data: DataBuf, - - /// The meta buffer is a circular buffer that contains the metadata - /// about the pages we're searching. This usually isn't that large - /// so callers must iterate through it to find the offset to map - /// data to meta. - meta: MetaBuf, - - /// The cursor into the data buffer for our current search. - i: usize = 0, - - const DataBuf = CircBuf(u8, 0); - const MetaBuf = CircBuf(Meta, undefined); - const Meta = struct { - node: *PageList.List.Node, - cell_map: Page.CellMap, - - pub fn deinit(self: *Meta) void { - self.cell_map.deinit(); - } - }; - - pub fn initEmpty(alloc: Allocator) Allocator.Error!SlidingWindow { - var data = try DataBuf.init(alloc, 0); - errdefer data.deinit(alloc); - - var meta = try MetaBuf.init(alloc, 0); - errdefer meta.deinit(alloc); - - return .{ - .data = data, - .meta = meta, - }; - } - - pub fn deinit(self: *SlidingWindow, alloc: Allocator) void { - self.data.deinit(alloc); - - var meta_it = self.meta.iterator(.forward); - while (meta_it.next()) |meta| meta.deinit(); - self.meta.deinit(alloc); - } - - /// Search the window for the next occurrence of the needle. - pub fn next(self: *SlidingWindow, needle: []const u8) void { - const slices = self.data.getPtrSlice(0, self.data.len()); - - // Search the first slice for the needle. - if (std.mem.indexOf(u8, slices[0][self.i..], needle)) |idx| { - // Found, map the match to a selection. - var meta_it = self.meta.iterator(.forward); - var i: usize = 0; - while (meta_it.next()) |meta| { - const meta_idx = idx - i; - if (meta.cell_map.items.len < meta_idx) { - // This meta doesn't contain the match. - i += meta.cell_map.items.len; - continue; - } - - // We found the meta that contains the start of the match. - const tl: PageList.Pin = tl: { - const map = meta.cell_map.items[meta_idx]; - break :tl .{ - .node = meta.node, - .y = map.y, - .x = map.x, - }; - }; - - _ = tl; - } - - // Found, we can move our index to the next character - // after the match. This let's us find all matches even if - // they overlap. - - self.i = idx + 1; - - @panic("TODO"); - } - } - - /// Return a selection for the given start and length into the data - /// buffer and also prune the data/meta buffers if possible up to - /// this start index. - fn selectAndPrune( - self: *SlidingWindow, - start: usize, - len: usize, - ) Selection { - assert(start < self.data.len()); - assert(start + len < self.data.len()); - - var meta_it = self.meta.iterator(.forward); - var meta_: ?Meta = meta_it.next(); - - // Find the start of the match - var offset: usize = 0; - var skip_nodes: usize = 0; - const tl: PageList.Pin = tl: { - while (meta_) |meta| : (meta_ = meta_it.next()) { - // meta_i is the index we expect to find the match in the - // cell map within this meta if it contains it. - const meta_i = start - offset; - if (meta_i >= meta.cell_map.items.len) { - // This meta doesn't contain the match. This means we - // can also prune this set of data because we only look - // forward. - offset += meta.cell_map.items.len; - skip_nodes += 1; - continue; - } - - // We found the meta that contains the start of the match. - const map = meta.cell_map.items[start]; - break :tl .{ - .node = meta.node, - .y = map.y, - .x = map.x, - }; - } - - // We never found the top-left. This is unreachable because - // we assert that the start index is within the data buffer, - // and when building the data buffer we assert the cell map - // length exactly matches the data buffer length. - unreachable; - }; - - // Keep track of the number of nodes we skipped for the tl. - const tl_skip_nodes = skip_nodes; - skip_nodes = 0; - - // Find the end of the match - const br: PageList.Pin = br: { - const end_idx = start + len - 1; - while (meta_) |meta| : (meta_ = meta_it.next()) { - const meta_i = end_idx - offset; - if (meta_i >= meta.cell_map.items.len) { - offset += meta.cell_map.items.len; - skip_nodes += 1; - continue; - } - - // We found the meta that contains the start of the match. - const map = meta.cell_map.items[end_idx]; - break :br .{ - .node = meta.node, - .y = map.y, - .x = map.x, - }; - } - }; - - // If we skipped any nodes for the bottom-right then we can prune - // all the way up to the total. If we didn't, it means we found - // the bottom-right in the same node as the top-left and we can't - // prune the node that the match is on because there may be - // more matches. - if (skip_nodes > 0) skip_nodes += tl_skip_nodes; - - _ = tl; - _ = br; - } - - /// Convert a data index into a pin. - fn pin( - self: *const SlidingWindow, - idx: usize, - it: ?*MetaBuf.Iterator, - ) struct { - /// The pin for the data index. - pin: Pin, - - /// The offset into the meta buffer that the pin was found. - /// This can be used to prune the meta buffer (its safe to prune - /// before this i). - meta_i: usize, - } { - _ = self; - _ = idx; - _ = start; - - while (it.next()) |meta| { - // meta_i is the index we expect to find the match in the - // cell map within this meta if it contains it. - const meta_i = start - offset; - if (meta_i >= meta.cell_map.items.len) { - // This meta doesn't contain the match. This means we - // can also prune this set of data because we only look - // forward. - offset += meta.cell_map.items.len; - skip_nodes += 1; - continue; - } - - // We found the meta that contains the start of the match. - const map = meta.cell_map.items[start]; - break :tl .{ - .node = meta.node, - .y = map.y, - .x = map.x, - }; - } - - } - - /// Add a new node to the sliding window. - /// - /// The window will prune itself if it can while always maintaining - /// the invariant that the `fixed_size` always fits within the window. - /// - /// Note it is possible for the window to be smaller than `fixed_size` - /// if not enough nodes have been added yet or the screen is just - /// smaller than the needle. - pub fn append( - self: *SlidingWindow, - alloc: Allocator, - node: *PageList.List.Node, - required_size: usize, - ) Allocator.Error!void { - // Initialize our metadata for the node. - var meta: Meta = .{ - .node = node, - .cell_map = Page.CellMap.init(alloc), - }; - errdefer meta.deinit(); - - // This is suboptimal but we need to encode the page once to - // temporary memory, and then copy it into our circular buffer. - // In the future, we should benchmark and see if we can encode - // directly into the circular buffer. - var encoded: std.ArrayListUnmanaged(u8) = .{}; - defer encoded.deinit(alloc); - - // Encode the page into the buffer. - const page: *const Page = &meta.node.data; - _ = page.encodeUtf8( - encoded.writer(alloc), - .{ .cell_map = &meta.cell_map }, - ) catch { - // writer uses anyerror but the only realistic error on - // an ArrayList is out of memory. - return error.OutOfMemory; - }; - assert(meta.cell_map.items.len == encoded.items.len); - - // Now that we know our buffer length, we can consider if we can - // prune our circular buffer or if we need to grow it. - prune: { - // Our buffer size after adding the new node. - const before_size: usize = self.data.len() + encoded.items.len; - - // Prune as long as removing the first (oldest) node retains - // our required size invariant. - var after_size: usize = before_size; - while (self.meta.first()) |oldest_meta| { - const new_size = after_size - oldest_meta.cell_map.items.len; - if (new_size < required_size) break :prune; - - // We can prune this node and retain our invariant. - // Update our new size, deinitialize the memory, and - // remove from the circular buffer. - after_size = new_size; - oldest_meta.deinit(); - self.meta.deleteOldest(1); - } - assert(after_size <= before_size); - - // If we didn't prune anything then we're done. - if (after_size == before_size) break :prune; - - // We need to prune our data buffer as well. - self.data.deleteOldest(before_size - after_size); - } - - // Ensure our buffers are big enough to store what we need. - try self.data.ensureUnusedCapacity(alloc, encoded.items.len); - try self.meta.ensureUnusedCapacity(alloc, 1); - - // Append our new node to the circular buffer. - try self.data.appendSlice(encoded.items); - try self.meta.append(meta); - - // Integrity check: verify our data matches our metadata exactly. - if (comptime std.debug.runtime_safety) { - var meta_it = self.meta.iterator(.forward); - var data_len: usize = 0; - while (meta_it.next()) |m| data_len += m.cell_map.items.len; - assert(data_len == self.data.len()); - } - } -}; - -test "SlidingWindow empty on init" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.initEmpty(alloc); - defer w.deinit(alloc); - try testing.expectEqual(0, w.data.len()); - try testing.expectEqual(0, w.meta.len()); -} - -test "SlidingWindow single append" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.initEmpty(alloc); - defer w.deinit(alloc); - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // Imaginary needle for search - const needle = "boo!"; - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(alloc, node, needle.len); -} - -test "SlidingWindow two pages" { - const testing = std.testing; - const alloc = testing.allocator; - - var w = try SlidingWindow.initEmpty(alloc); - defer w.deinit(alloc); - - var s = try Screen.init(alloc, 80, 24, 1000); - defer s.deinit(); - - // Fill up the first page. The final bytes in the first page - // are "boo!" - const first_page_rows = s.pages.pages.first.?.data.capacity.rows; - for (0..first_page_rows - 1) |_| try s.testWriteString("\n"); - for (0..s.pages.cols - 4) |_| try s.testWriteString("x"); - try s.testWriteString("boo!"); - try testing.expect(s.pages.pages.first == s.pages.pages.last); - try s.testWriteString("\n"); - try testing.expect(s.pages.pages.first != s.pages.pages.last); - try s.testWriteString("hello. boo!"); - - // Imaginary needle for search - const needle = "boo!"; - - // Add both pages - const node: *PageList.List.Node = s.pages.pages.first.?; - try w.append(alloc, node, needle.len); - try w.append(alloc, node.next.?, needle.len); - - // Ensure our data is correct -} - -pub const PageSearch = struct { - alloc: Allocator, - node: *PageList.List.Node, - needle: []const u8, - cell_map: Page.CellMap, - encoded: std.ArrayListUnmanaged(u8) = .{}, - i: usize = 0, - - pub fn init( - alloc: Allocator, - node: *PageList.List.Node, - needle: []const u8, - ) !PageSearch { - var result: PageSearch = .{ - .alloc = alloc, - .node = node, - .needle = needle, - .cell_map = Page.CellMap.init(alloc), - }; - - const page: *const Page = &node.data; - _ = try page.encodeUtf8(result.encoded.writer(alloc), .{ - .cell_map = &result.cell_map, - }); - - return result; - } - - pub fn deinit(self: *PageSearch) void { - self.encoded.deinit(self.alloc); - self.cell_map.deinit(); - } - - pub fn next(self: *PageSearch) ?Selection { - // Search our haystack for the needle. The resulting index is - // the offset from self.i not the absolute index. - const haystack: []const u8 = self.encoded.items[self.i..]; - const i_offset = std.mem.indexOf(u8, haystack, self.needle) orelse { - self.i = self.encoded.items.len; - return null; - }; - - // Get our full index into the encoded buffer. - const idx = self.i + i_offset; - - // We found our search term. Move the cursor forward one beyond - // the match. This lets us find every repeated match. - self.i = idx + 1; - - const tl: PageList.Pin = tl: { - const map = self.cell_map.items[idx]; - break :tl .{ - .node = self.node, - .y = map.y, - .x = map.x, - }; - }; - const br: PageList.Pin = br: { - const map = self.cell_map.items[idx + self.needle.len - 1]; - break :br .{ - .node = self.node, - .y = map.y, - .x = map.x, - }; - }; - - return Selection.init(tl, br, false); - } -}; - -test "search single page one match" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello, world"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - - var it = try PageSearch.init(alloc, node, "world"); - defer it.deinit(); - - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 11, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - - try testing.expect(it.next() == null); -} - -test "search single page multiple match" { - const testing = std.testing; - const alloc = testing.allocator; - - var s = try Screen.init(alloc, 80, 24, 0); - defer s.deinit(); - try s.testWriteString("hello. boo! hello. boo!"); - - // We want to test single-page cases. - try testing.expect(s.pages.pages.first == s.pages.pages.last); - const node: *PageList.List.Node = s.pages.pages.first.?; - - var it = try PageSearch.init(alloc, node, "boo!"); - defer it.deinit(); - - { - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 7, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 10, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - { - const sel = it.next().?; - try testing.expectEqual(point.Point{ .active = .{ - .x = 19, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.start()).?); - try testing.expectEqual(point.Point{ .active = .{ - .x = 22, - .y = 0, - } }, s.pages.pointFromPin(.active, sel.end()).?); - } - - try testing.expect(it.next() == null); -}