Merge pull request #2885 from ghostty-org/search

Naive search internals (core only)
This commit is contained in:
Mitchell Hashimoto
2024-12-04 13:48:10 -08:00
committed by GitHub
6 changed files with 1347 additions and 97 deletions

View File

@ -45,10 +45,26 @@ pub fn CircBuf(comptime T: type, comptime default: T) type {
self.idx += 1;
return &self.buf.storage[storage_idx];
}
/// Seek the iterator by a given amount. This will clamp
/// the values to the bounds of the buffer so overflows are
/// not possible.
pub fn seekBy(self: *Iterator, amount: isize) void {
if (amount > 0) {
self.idx +|= @intCast(amount);
} else {
self.idx -|= @intCast(@abs(amount));
}
}
/// Reset the iterator back to the first value.
pub fn reset(self: *Iterator) void {
self.idx = 0;
}
};
/// Initialize a new circular buffer that can store size elements.
pub fn init(alloc: Allocator, size: usize) !Self {
pub fn init(alloc: Allocator, size: usize) Allocator.Error!Self {
const buf = try alloc.alloc(T, size);
@memset(buf, default);
@ -56,7 +72,7 @@ pub fn CircBuf(comptime T: type, comptime default: T) type {
.storage = buf,
.head = 0,
.tail = 0,
.full = false,
.full = size == 0,
};
}
@ -67,7 +83,7 @@ pub fn CircBuf(comptime T: type, comptime default: T) type {
/// Append a single value to the buffer. If the buffer is full,
/// an error will be returned.
pub fn append(self: *Self, v: T) !void {
pub fn append(self: *Self, v: T) Allocator.Error!void {
if (self.full) return error.OutOfMemory;
self.storage[self.head] = v;
self.head += 1;
@ -75,6 +91,19 @@ pub fn CircBuf(comptime T: type, comptime default: T) type {
self.full = self.head == self.tail;
}
/// Append a slice to the buffer. If the buffer cannot fit the
/// entire slice then an error will be returned. It is up to the
/// caller to rotate the circular buffer if they want to overwrite
/// the oldest data.
pub fn appendSlice(
self: *Self,
slice: []const T,
) Allocator.Error!void {
const storage = self.getPtrSlice(self.len(), slice.len);
fastmem.copy(T, storage[0], slice[0..storage[0].len]);
fastmem.copy(T, storage[1], slice[storage[0].len..]);
}
/// Clear the buffer.
pub fn clear(self: *Self) void {
self.head = 0;
@ -91,6 +120,34 @@ pub fn CircBuf(comptime T: type, comptime default: T) type {
};
}
/// Get the first (oldest) value in the buffer.
pub fn first(self: Self) ?*T {
// Note: this can be more efficient by not using the
// iterator, but this was an easy way to implement it.
var it = self.iterator(.forward);
return it.next();
}
/// Get the last (newest) value in the buffer.
pub fn last(self: Self) ?*T {
// Note: this can be more efficient by not using the
// iterator, but this was an easy way to implement it.
var it = self.iterator(.reverse);
return it.next();
}
/// Ensures that there is enough capacity to store amount more
/// items via append.
pub fn ensureUnusedCapacity(
self: *Self,
alloc: Allocator,
amount: usize,
) Allocator.Error!void {
const new_cap = self.len() + amount;
if (new_cap <= self.capacity()) return;
try self.resize(alloc, new_cap);
}
/// Resize the buffer to the given size (larger or smaller).
/// If larger, new values will be set to the default value.
pub fn resize(self: *Self, alloc: Allocator, size: usize) Allocator.Error!void {
@ -256,7 +313,7 @@ test {
try testing.expectEqual(@as(usize, 0), buf.len());
}
test "append" {
test "CircBuf append" {
const testing = std.testing;
const alloc = testing.allocator;
@ -273,7 +330,7 @@ test "append" {
try testing.expectError(error.OutOfMemory, buf.append(5));
}
test "forward iterator" {
test "CircBuf forward iterator" {
const testing = std.testing;
const alloc = testing.allocator;
@ -319,7 +376,7 @@ test "forward iterator" {
}
}
test "reverse iterator" {
test "CircBuf reverse iterator" {
const testing = std.testing;
const alloc = testing.allocator;
@ -365,7 +422,95 @@ test "reverse iterator" {
}
}
test "getPtrSlice fits" {
test "CircBuf first/last" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 3);
defer buf.deinit(alloc);
try buf.append(1);
try buf.append(2);
try buf.append(3);
try testing.expectEqual(3, buf.last().?.*);
try testing.expectEqual(1, buf.first().?.*);
}
test "CircBuf first/last empty" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 0);
defer buf.deinit(alloc);
try testing.expect(buf.first() == null);
try testing.expect(buf.last() == null);
}
test "CircBuf first/last empty with cap" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 3);
defer buf.deinit(alloc);
try testing.expect(buf.first() == null);
try testing.expect(buf.last() == null);
}
test "CircBuf append slice" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 5);
defer buf.deinit(alloc);
try buf.appendSlice("hello");
{
var it = buf.iterator(.forward);
try testing.expect(it.next().?.* == 'h');
try testing.expect(it.next().?.* == 'e');
try testing.expect(it.next().?.* == 'l');
try testing.expect(it.next().?.* == 'l');
try testing.expect(it.next().?.* == 'o');
try testing.expect(it.next() == null);
}
}
test "CircBuf append slice with wrap" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 4);
defer buf.deinit(alloc);
// Fill the buffer
_ = buf.getPtrSlice(0, buf.capacity());
try testing.expect(buf.full);
try testing.expectEqual(@as(usize, 4), buf.len());
// Delete
buf.deleteOldest(2);
try testing.expect(!buf.full);
try testing.expectEqual(@as(usize, 2), buf.len());
try buf.appendSlice("AB");
{
var it = buf.iterator(.forward);
try testing.expect(it.next().?.* == 0);
try testing.expect(it.next().?.* == 0);
try testing.expect(it.next().?.* == 'A');
try testing.expect(it.next().?.* == 'B');
try testing.expect(it.next() == null);
}
}
test "CircBuf getPtrSlice fits" {
const testing = std.testing;
const alloc = testing.allocator;
@ -379,7 +524,7 @@ test "getPtrSlice fits" {
try testing.expectEqual(@as(usize, 11), buf.len());
}
test "getPtrSlice wraps" {
test "CircBuf getPtrSlice wraps" {
const testing = std.testing;
const alloc = testing.allocator;
@ -435,7 +580,7 @@ test "getPtrSlice wraps" {
}
}
test "rotateToZero" {
test "CircBuf rotateToZero" {
const testing = std.testing;
const alloc = testing.allocator;
@ -447,7 +592,7 @@ test "rotateToZero" {
try buf.rotateToZero(alloc);
}
test "rotateToZero offset" {
test "CircBuf rotateToZero offset" {
const testing = std.testing;
const alloc = testing.allocator;
@ -471,7 +616,7 @@ test "rotateToZero offset" {
try testing.expectEqual(@as(usize, 1), buf.head);
}
test "rotateToZero wraps" {
test "CircBuf rotateToZero wraps" {
const testing = std.testing;
const alloc = testing.allocator;
@ -511,7 +656,7 @@ test "rotateToZero wraps" {
}
}
test "rotateToZero full no wrap" {
test "CircBuf rotateToZero full no wrap" {
const testing = std.testing;
const alloc = testing.allocator;
@ -549,7 +694,32 @@ test "rotateToZero full no wrap" {
}
}
test "resize grow" {
test "CircBuf resize grow from zero" {
const testing = std.testing;
const alloc = testing.allocator;
const Buf = CircBuf(u8, 0);
var buf = try Buf.init(alloc, 0);
defer buf.deinit(alloc);
try testing.expect(buf.full);
// Resize
try buf.resize(alloc, 2);
try testing.expect(!buf.full);
try testing.expectEqual(@as(usize, 0), buf.len());
try testing.expectEqual(@as(usize, 2), buf.capacity());
try buf.append(1);
try buf.append(2);
{
const slices = buf.getPtrSlice(0, 2);
try testing.expectEqual(@as(u8, 1), slices[0][0]);
try testing.expectEqual(@as(u8, 2), slices[0][1]);
}
}
test "CircBuf resize grow" {
const testing = std.testing;
const alloc = testing.allocator;
@ -582,7 +752,7 @@ test "resize grow" {
}
}
test "resize shrink" {
test "CircBuf resize shrink" {
const testing = std.testing;
const alloc = testing.allocator;

View File

@ -2544,6 +2544,50 @@ pub fn getCell(self: *const PageList, pt: point.Point) ?Cell {
};
}
pub const EncodeUtf8Options = struct {
/// The start and end points of the dump, both inclusive. The x will
/// be ignored and the full row will always be dumped.
tl: Pin,
br: ?Pin = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
/// See Page.EncodeUtf8Options.
cell_map: ?*Page.CellMap = null,
};
/// Encode the pagelist to utf8 to the given writer.
///
/// The writer should be buffered; this function does not attempt to
/// efficiently write and often writes one byte at a time.
///
/// Note: this is tested using Screen.dumpString. This is a function that
/// predates this and is a thin wrapper around it so the tests all live there.
pub fn encodeUtf8(
self: *const PageList,
writer: anytype,
opts: EncodeUtf8Options,
) anyerror!void {
// We don't currently use self at all. There is an argument that this
// function should live on Pin instead but there is some future we might
// need state on here so... letting it go.
_ = self;
var page_opts: Page.EncodeUtf8Options = .{
.unwrap = opts.unwrap,
.cell_map = opts.cell_map,
};
var iter = opts.tl.pageIterator(.right_down, opts.br);
while (iter.next()) |chunk| {
const page: *const Page = &chunk.node.data;
page_opts.start_y = chunk.start;
page_opts.end_y = chunk.end;
page_opts.preceding = try page.encodeUtf8(writer, page_opts);
}
}
/// Log a debug diagram of the page list to the provided writer.
///
/// EXAMPLE:

View File

@ -2731,95 +2731,15 @@ pub fn promptPath(
return .{ .x = to_x - from_x, .y = to_y - from_y };
}
pub const DumpString = struct {
/// The start and end points of the dump, both inclusive. The x will
/// be ignored and the full row will always be dumped.
tl: Pin,
br: ?Pin = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
};
/// Dump the screen to a string. The writer given should be buffered;
/// this function does not attempt to efficiently write and generally writes
/// one byte at a time.
pub fn dumpString(
self: *const Screen,
writer: anytype,
opts: DumpString,
) !void {
var blank_rows: usize = 0;
var blank_cells: usize = 0;
var iter = opts.tl.rowIterator(.right_down, opts.br);
while (iter.next()) |row_offset| {
const rac = row_offset.rowAndCell();
const row = rac.row;
const cells = cells: {
const cells: [*]pagepkg.Cell = @ptrCast(rac.cell);
break :cells cells[0..self.pages.cols];
};
if (!pagepkg.Cell.hasTextAny(cells)) {
blank_rows += 1;
continue;
}
if (blank_rows > 0) {
for (0..blank_rows) |_| try writer.writeByte('\n');
blank_rows = 0;
}
if (!row.wrap or !opts.unwrap) {
// If we're not wrapped, we always add a newline.
// If we are wrapped, we only add a new line if we're unwrapping
// soft-wrapped lines.
blank_rows += 1;
}
if (!row.wrap_continuation or !opts.unwrap) {
// We should also reset blank cell counts at the start of each row
// unless we're unwrapping and this row is a wrap continuation.
blank_cells = 0;
}
for (cells) |*cell| {
// Skip spacers
switch (cell.wide) {
.narrow, .wide => {},
.spacer_head, .spacer_tail => continue,
}
// If we have a zero value, then we accumulate a counter. We
// only want to turn zero values into spaces if we have a non-zero
// char sometime later.
if (!cell.hasText()) {
blank_cells += 1;
continue;
}
if (blank_cells > 0) {
try writer.writeByteNTimes(' ', blank_cells);
blank_cells = 0;
}
switch (cell.content_tag) {
.codepoint => {
try writer.print("{u}", .{cell.content.codepoint});
},
.codepoint_grapheme => {
try writer.print("{u}", .{cell.content.codepoint});
const cps = row_offset.node.data.lookupGrapheme(cell).?;
for (cps) |cp| {
try writer.print("{u}", .{cp});
}
},
else => unreachable,
}
}
}
opts: PageList.EncodeUtf8Options,
) anyerror!void {
try self.pages.encodeUtf8(writer, opts);
}
/// You should use dumpString, this is a restricted version mostly for
@ -8548,3 +8468,81 @@ test "Screen: adjustCapacity cursor style ref count" {
);
}
}
test "Screen UTF8 cell map with newlines" {
const testing = std.testing;
const alloc = testing.allocator;
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("A\n\nB\n\nC");
var cell_map = Page.CellMap.init(alloc);
defer cell_map.deinit();
var builder = std.ArrayList(u8).init(alloc);
defer builder.deinit();
try s.dumpString(builder.writer(), .{
.tl = s.pages.getTopLeft(.screen),
.br = s.pages.getBottomRight(.screen),
.cell_map = &cell_map,
});
try testing.expectEqual(7, builder.items.len);
try testing.expectEqualStrings("A\n\nB\n\nC", builder.items);
try testing.expectEqual(builder.items.len, cell_map.items.len);
try testing.expectEqual(Page.CellMapEntry{
.x = 0,
.y = 0,
}, cell_map.items[0]);
try testing.expectEqual(Page.CellMapEntry{
.x = 1,
.y = 0,
}, cell_map.items[1]);
try testing.expectEqual(Page.CellMapEntry{
.x = 0,
.y = 1,
}, cell_map.items[2]);
try testing.expectEqual(Page.CellMapEntry{
.x = 0,
.y = 2,
}, cell_map.items[3]);
}
test "Screen UTF8 cell map with blank prefix" {
const testing = std.testing;
const alloc = testing.allocator;
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
s.cursorAbsolute(2, 1);
try s.testWriteString("B");
var cell_map = Page.CellMap.init(alloc);
defer cell_map.deinit();
var builder = std.ArrayList(u8).init(alloc);
defer builder.deinit();
try s.dumpString(builder.writer(), .{
.tl = s.pages.getTopLeft(.screen),
.br = s.pages.getBottomRight(.screen),
.cell_map = &cell_map,
});
try testing.expectEqualStrings("\n B", builder.items);
try testing.expectEqual(builder.items.len, cell_map.items.len);
try testing.expectEqual(Page.CellMapEntry{
.x = 0,
.y = 0,
}, cell_map.items[0]);
try testing.expectEqual(Page.CellMapEntry{
.x = 0,
.y = 1,
}, cell_map.items[1]);
try testing.expectEqual(Page.CellMapEntry{
.x = 1,
.y = 1,
}, cell_map.items[2]);
try testing.expectEqual(Page.CellMapEntry{
.x = 2,
.y = 1,
}, cell_map.items[3]);
}

View File

@ -18,6 +18,7 @@ pub const kitty = @import("kitty.zig");
pub const modes = @import("modes.zig");
pub const page = @import("page.zig");
pub const parse_table = @import("parse_table.zig");
pub const search = @import("search.zig");
pub const size = @import("size.zig");
pub const tmux = @import("tmux.zig");
pub const x11_color = @import("x11_color.zig");

View File

@ -1481,6 +1481,179 @@ pub const Page = struct {
return self.grapheme_map.map(self.memory).capacity();
}
/// Options for encoding the page as UTF-8.
pub const EncodeUtf8Options = struct {
/// The range of rows to encode. If end_y is null, then it will
/// encode to the end of the page.
start_y: size.CellCountInt = 0,
end_y: ?size.CellCountInt = null,
/// If true, this will unwrap soft-wrapped lines. If false, this will
/// dump the screen as it is visually seen in a rendered window.
unwrap: bool = true,
/// Preceding state from encoding the prior page. Used to preserve
/// blanks properly across multiple pages.
preceding: TrailingUtf8State = .{},
/// If non-null, this will be cleared and filled with the x/y
/// coordinates of each byte in the UTF-8 encoded output.
/// The index in the array is the byte offset in the output
/// where 0 is the cursor of the writer when the function is
/// called.
cell_map: ?*CellMap = null,
/// Trailing state for UTF-8 encoding.
pub const TrailingUtf8State = struct {
rows: usize = 0,
cells: usize = 0,
};
};
/// See cell_map
pub const CellMap = std.ArrayList(CellMapEntry);
/// The x/y coordinate of a single cell in the cell map.
pub const CellMapEntry = struct {
y: size.CellCountInt,
x: size.CellCountInt,
};
/// Encode the page contents as UTF-8.
///
/// If preceding is non-null, then it will be used to initialize our
/// blank rows/cells count so that we can accumulate blanks across
/// multiple pages.
///
/// Note: Many tests for this function are done via Screen.dumpString
/// tests since that function is a thin wrapper around this one and
/// it makes it easier to test input contents.
pub fn encodeUtf8(
self: *const Page,
writer: anytype,
opts: EncodeUtf8Options,
) anyerror!EncodeUtf8Options.TrailingUtf8State {
var blank_rows: usize = opts.preceding.rows;
var blank_cells: usize = opts.preceding.cells;
const start_y: size.CellCountInt = opts.start_y;
const end_y: size.CellCountInt = opts.end_y orelse self.size.rows;
// We can probably avoid this by doing the logic below in a different
// way. The reason this exists is so that when we end a non-blank
// line with a newline, we can correctly map the cell map over to
// the correct x value.
//
// For example "A\nB". The cell map for "\n" should be (1, 0).
// This is tested in Screen.zig so feel free to refactor this.
var last_x: size.CellCountInt = 0;
for (start_y..end_y) |y_usize| {
const y: size.CellCountInt = @intCast(y_usize);
const row: *Row = self.getRow(y);
const cells: []const Cell = self.getCells(row);
// If this row is blank, accumulate to avoid a bunch of extra
// work later. If it isn't blank, make sure we dump all our
// blanks.
if (!Cell.hasTextAny(cells)) {
blank_rows += 1;
continue;
}
for (1..blank_rows + 1) |i| {
try writer.writeByte('\n');
// This is tested in Screen.zig, i.e. one test is
// "cell map with newlines"
if (opts.cell_map) |cell_map| {
try cell_map.append(.{
.x = last_x,
.y = @intCast(y - blank_rows + i - 1),
});
last_x = 0;
}
}
blank_rows = 0;
// If we're not wrapped, we always add a newline so after
// the row is printed we can add a newline.
if (!row.wrap or !opts.unwrap) blank_rows += 1;
// If the row doesn't continue a wrap then we need to reset
// our blank cell count.
if (!row.wrap_continuation or !opts.unwrap) blank_cells = 0;
// Go through each cell and print it
for (cells, 0..) |*cell, x_usize| {
const x: size.CellCountInt = @intCast(x_usize);
// Skip spacers
switch (cell.wide) {
.narrow, .wide => {},
.spacer_head, .spacer_tail => continue,
}
// If we have a zero value, then we accumulate a counter. We
// only want to turn zero values into spaces if we have a non-zero
// char sometime later.
if (!cell.hasText()) {
blank_cells += 1;
continue;
}
if (blank_cells > 0) {
try writer.writeByteNTimes(' ', blank_cells);
if (opts.cell_map) |cell_map| {
for (0..blank_cells) |i| try cell_map.append(.{
.x = @intCast(x - blank_cells + i),
.y = y,
});
}
blank_cells = 0;
}
switch (cell.content_tag) {
.codepoint => {
try writer.print("{u}", .{cell.content.codepoint});
if (opts.cell_map) |cell_map| {
last_x = x + 1;
try cell_map.append(.{
.x = x,
.y = y,
});
}
},
.codepoint_grapheme => {
try writer.print("{u}", .{cell.content.codepoint});
if (opts.cell_map) |cell_map| {
last_x = x + 1;
try cell_map.append(.{
.x = x,
.y = y,
});
}
for (self.lookupGrapheme(cell).?) |cp| {
try writer.print("{u}", .{cp});
if (opts.cell_map) |cell_map| try cell_map.append(.{
.x = x,
.y = y,
});
}
},
// Unreachable since we do hasText() above
.bg_color_palette,
.bg_color_rgb,
=> unreachable,
}
}
}
return .{ .rows = blank_rows, .cells = blank_cells };
}
/// Returns the bitset for the dirty bits on this page.
///
/// The returned value is a DynamicBitSetUnmanaged but it is NOT

864
src/terminal/search.zig Normal file
View File

@ -0,0 +1,864 @@
//! Search functionality for the terminal.
//!
//! At the time of writing this comment, this is a **work in progress**.
//!
//! Search at the time of writing is implemented using a simple
//! boyer-moore-horspool algorithm. The suboptimal part of the implementation
//! is that we need to encode each terminal page into a text buffer in order
//! to apply BMH to it. This is because the terminal page is not laid out
//! in a flat text form.
//!
//! To minimize memory usage, we use a sliding window to search for the
//! needle. The sliding window only keeps the minimum amount of page data
//! in memory to search for a needle (i.e. `needle.len - 1` bytes of overlap
//! between terminal pages).
//!
//! Future work:
//!
//! - PageListSearch on a PageList concurrently with another thread
//! - Handle pruned pages in a PageList to ensure we don't keep references
//! - Repeat search a changing active area of the screen
//! - Reverse search so that more recent matches are found first
//!
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const CircBuf = @import("../datastruct/main.zig").CircBuf;
const terminal = @import("main.zig");
const point = terminal.point;
const Page = terminal.Page;
const PageList = terminal.PageList;
const Pin = PageList.Pin;
const Selection = terminal.Selection;
const Screen = terminal.Screen;
/// Searches for a term in a PageList structure.
///
/// At the time of writing, this does not support searching a pagelist
/// simultaneously as its being used by another thread. This will be resolved
/// in the future.
pub const PageListSearch = struct {
/// The list we're searching.
list: *PageList,
/// The sliding window of page contents and nodes to search.
window: SlidingWindow,
/// Initialize the page list search.
///
/// The needle is not copied and must be kept alive for the duration
/// of the search operation.
pub fn init(
alloc: Allocator,
list: *PageList,
needle: []const u8,
) Allocator.Error!PageListSearch {
var window = try SlidingWindow.init(alloc, needle);
errdefer window.deinit(alloc);
return .{
.list = list,
.window = window,
};
}
pub fn deinit(self: *PageListSearch, alloc: Allocator) void {
self.window.deinit(alloc);
}
/// Find the next match for the needle in the pagelist. This returns
/// null when there are no more matches.
pub fn next(
self: *PageListSearch,
alloc: Allocator,
) Allocator.Error!?Selection {
// Try to search for the needle in the window. If we find a match
// then we can return that and we're done.
if (self.window.next()) |sel| return sel;
// Get our next node. If we have a value in our window then we
// can determine the next node. If we don't, we've never setup the
// window so we use our first node.
var node_: ?*PageList.List.Node = if (self.window.meta.last()) |meta|
meta.node.next
else
self.list.pages.first;
// Add one pagelist node at a time, look for matches, and repeat
// until we find a match or we reach the end of the pagelist.
// This append then next pattern limits memory usage of the window.
while (node_) |node| : (node_ = node.next) {
try self.window.append(alloc, node);
if (self.window.next()) |sel| return sel;
}
// We've reached the end of the pagelist, no matches.
return null;
}
};
/// Searches page nodes via a sliding window. The sliding window maintains
/// the invariant that data isn't pruned until (1) we've searched it and
/// (2) we've accounted for overlaps across pages to fit the needle.
///
/// The sliding window is first initialized empty. Pages are then appended
/// in the order to search them. If you're doing a reverse search then the
/// pages should be appended in reverse order and the needle should be
/// reversed.
///
/// All appends grow the window. The window is only pruned when a searc
/// is done (positive or negative match) via `next()`.
///
/// To avoid unnecessary memory growth, the recommended usage is to
/// call `next()` until it returns null and then `append` the next page
/// and repeat the process. This will always maintain the minimum
/// required memory to search for the needle.
const SlidingWindow = struct {
/// The data buffer is a circular buffer of u8 that contains the
/// encoded page text that we can use to search for the needle.
data: DataBuf,
/// The meta buffer is a circular buffer that contains the metadata
/// about the pages we're searching. This usually isn't that large
/// so callers must iterate through it to find the offset to map
/// data to meta.
meta: MetaBuf,
/// Offset into data for our current state. This handles the
/// situation where our search moved through meta[0] but didn't
/// do enough to prune it.
data_offset: usize = 0,
/// The needle we're searching for. Does not own the memory.
needle: []const u8,
/// A buffer to store the overlap search data. This is used to search
/// overlaps between pages where the match starts on one page and
/// ends on another. The length is always `needle.len * 2`.
overlap_buf: []u8,
const DataBuf = CircBuf(u8, 0);
const MetaBuf = CircBuf(Meta, undefined);
const Meta = struct {
node: *PageList.List.Node,
cell_map: Page.CellMap,
pub fn deinit(self: *Meta) void {
self.cell_map.deinit();
}
};
pub fn init(
alloc: Allocator,
needle: []const u8,
) Allocator.Error!SlidingWindow {
var data = try DataBuf.init(alloc, 0);
errdefer data.deinit(alloc);
var meta = try MetaBuf.init(alloc, 0);
errdefer meta.deinit(alloc);
const overlap_buf = try alloc.alloc(u8, needle.len * 2);
errdefer alloc.free(overlap_buf);
return .{
.data = data,
.meta = meta,
.needle = needle,
.overlap_buf = overlap_buf,
};
}
pub fn deinit(self: *SlidingWindow, alloc: Allocator) void {
alloc.free(self.overlap_buf);
self.data.deinit(alloc);
var meta_it = self.meta.iterator(.forward);
while (meta_it.next()) |meta| meta.deinit();
self.meta.deinit(alloc);
}
/// Clear all data but retain allocated capacity.
pub fn clearAndRetainCapacity(self: *SlidingWindow) void {
var meta_it = self.meta.iterator(.forward);
while (meta_it.next()) |meta| meta.deinit();
self.meta.clear();
self.data.clear();
self.data_offset = 0;
}
/// Search the window for the next occurrence of the needle. As
/// the window moves, the window will prune itself while maintaining
/// the invariant that the window is always big enough to contain
/// the needle.
pub fn next(self: *SlidingWindow) ?Selection {
const slices = slices: {
// If we have less data then the needle then we can't possibly match
const data_len = self.data.len();
if (data_len < self.needle.len) return null;
break :slices self.data.getPtrSlice(
self.data_offset,
data_len - self.data_offset,
);
};
// Search the first slice for the needle.
if (std.mem.indexOf(u8, slices[0], self.needle)) |idx| {
return self.selection(idx, self.needle.len);
}
// Search the overlap buffer for the needle.
if (slices[0].len > 0 and slices[1].len > 0) overlap: {
// Get up to needle.len - 1 bytes from each side (as much as
// we can) and store it in the overlap buffer.
const prefix: []const u8 = prefix: {
const len = @min(slices[0].len, self.needle.len - 1);
const idx = slices[0].len - len;
break :prefix slices[0][idx..];
};
const suffix: []const u8 = suffix: {
const len = @min(slices[1].len, self.needle.len - 1);
break :suffix slices[1][0..len];
};
const overlap_len = prefix.len + suffix.len;
assert(overlap_len <= self.overlap_buf.len);
@memcpy(self.overlap_buf[0..prefix.len], prefix);
@memcpy(self.overlap_buf[prefix.len..overlap_len], suffix);
// Search the overlap
const idx = std.mem.indexOf(
u8,
self.overlap_buf[0..overlap_len],
self.needle,
) orelse break :overlap;
// We found a match in the overlap buffer. We need to map the
// index back to the data buffer in order to get our selection.
return self.selection(
slices[0].len - prefix.len + idx,
self.needle.len,
);
}
// Search the last slice for the needle.
if (std.mem.indexOf(u8, slices[1], self.needle)) |idx| {
return self.selection(slices[0].len + idx, self.needle.len);
}
// No match. We keep `needle.len - 1` bytes available to
// handle the future overlap case.
var meta_it = self.meta.iterator(.reverse);
prune: {
var saved: usize = 0;
while (meta_it.next()) |meta| {
const needed = self.needle.len - 1 - saved;
if (meta.cell_map.items.len >= needed) {
// We save up to this meta. We set our data offset
// to exactly where it needs to be to continue
// searching.
self.data_offset = meta.cell_map.items.len - needed;
break;
}
saved += meta.cell_map.items.len;
} else {
// If we exited the while loop naturally then we
// never got the amount we needed and so there is
// nothing to prune.
assert(saved < self.needle.len - 1);
break :prune;
}
const prune_count = self.meta.len() - meta_it.idx;
if (prune_count == 0) {
// This can happen if we need to save up to the first
// meta value to retain our window.
break :prune;
}
// We can now delete all the metas up to but NOT including
// the meta we found through meta_it.
meta_it = self.meta.iterator(.forward);
var prune_data_len: usize = 0;
for (0..prune_count) |_| {
const meta = meta_it.next().?;
prune_data_len += meta.cell_map.items.len;
meta.deinit();
}
self.meta.deleteOldest(prune_count);
self.data.deleteOldest(prune_data_len);
}
// Our data offset now moves to needle.len - 1 from the end so
// that we can handle the overlap case.
self.data_offset = self.data.len() - self.needle.len + 1;
self.assertIntegrity();
return null;
}
/// Return a selection for the given start and length into the data
/// buffer and also prune the data/meta buffers if possible up to
/// this start index.
///
/// The start index is assumed to be relative to the offset. i.e.
/// index zero is actually at `self.data[self.data_offset]`. The
/// selection will account for the offset.
fn selection(
self: *SlidingWindow,
start_offset: usize,
len: usize,
) Selection {
const start = start_offset + self.data_offset;
assert(start < self.data.len());
assert(start + len <= self.data.len());
// meta_consumed is the number of bytes we've consumed in the
// data buffer up to and NOT including the meta where we've
// found our pin. This is important because it tells us the
// amount of data we can safely deleted from self.data since
// we can't partially delete a meta block's data. (The partial
// amount is represented by self.data_offset).
var meta_it = self.meta.iterator(.forward);
var meta_consumed: usize = 0;
const tl: Pin = pin(&meta_it, &meta_consumed, start);
// Store the information required to prune later. We store this
// now because we only want to prune up to our START so we can
// find overlapping matches.
const tl_meta_idx = meta_it.idx - 1;
const tl_meta_consumed = meta_consumed;
// We have to seek back so that we reinspect our current
// iterator value again in case the start and end are in the
// same segment.
meta_it.seekBy(-1);
const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1);
assert(meta_it.idx >= 1);
// Our offset into the current meta block is the start index
// minus the amount of data fully consumed. We then add one
// to move one past the match so we don't repeat it.
self.data_offset = start - tl_meta_consumed + 1;
// meta_it.idx is br's meta index plus one (because the iterator
// moves one past the end; we call next() one last time). So
// we compare against one to check that the meta that we matched
// in has prior meta blocks we can prune.
if (tl_meta_idx > 0) {
// Deinit all our memory in the meta blocks prior to our
// match.
const meta_count = tl_meta_idx;
meta_it.reset();
for (0..meta_count) |_| meta_it.next().?.deinit();
if (comptime std.debug.runtime_safety) {
assert(meta_it.idx == meta_count);
assert(meta_it.next().?.node == tl.node);
}
self.meta.deleteOldest(meta_count);
// Delete all the data up to our current index.
assert(tl_meta_consumed > 0);
self.data.deleteOldest(tl_meta_consumed);
}
self.assertIntegrity();
return Selection.init(tl, br, false);
}
/// Convert a data index into a pin.
///
/// The iterator and offset are both expected to be passed by
/// pointer so that the pin can be efficiently called for multiple
/// indexes (in order). See selection() for an example.
///
/// Precondition: the index must be within the data buffer.
fn pin(
it: *MetaBuf.Iterator,
offset: *usize,
idx: usize,
) Pin {
while (it.next()) |meta| {
// meta_i is the index we expect to find the match in the
// cell map within this meta if it contains it.
const meta_i = idx - offset.*;
if (meta_i >= meta.cell_map.items.len) {
// This meta doesn't contain the match. This means we
// can also prune this set of data because we only look
// forward.
offset.* += meta.cell_map.items.len;
continue;
}
// We found the meta that contains the start of the match.
const map = meta.cell_map.items[meta_i];
return .{
.node = meta.node,
.y = map.y,
.x = map.x,
};
}
// Unreachable because it is a precondition that the index is
// within the data buffer.
unreachable;
}
/// Add a new node to the sliding window. This will always grow
/// the sliding window; data isn't pruned until it is consumed
/// via a search (via next()).
pub fn append(
self: *SlidingWindow,
alloc: Allocator,
node: *PageList.List.Node,
) Allocator.Error!void {
// Initialize our metadata for the node.
var meta: Meta = .{
.node = node,
.cell_map = Page.CellMap.init(alloc),
};
errdefer meta.deinit();
// This is suboptimal but we need to encode the page once to
// temporary memory, and then copy it into our circular buffer.
// In the future, we should benchmark and see if we can encode
// directly into the circular buffer.
var encoded: std.ArrayListUnmanaged(u8) = .{};
defer encoded.deinit(alloc);
// Encode the page into the buffer.
const page: *const Page = &meta.node.data;
_ = page.encodeUtf8(
encoded.writer(alloc),
.{ .cell_map = &meta.cell_map },
) catch {
// writer uses anyerror but the only realistic error on
// an ArrayList is out of memory.
return error.OutOfMemory;
};
assert(meta.cell_map.items.len == encoded.items.len);
// Ensure our buffers are big enough to store what we need.
try self.data.ensureUnusedCapacity(alloc, encoded.items.len);
try self.meta.ensureUnusedCapacity(alloc, 1);
// Append our new node to the circular buffer.
try self.data.appendSlice(encoded.items);
try self.meta.append(meta);
self.assertIntegrity();
}
fn assertIntegrity(self: *const SlidingWindow) void {
if (comptime !std.debug.runtime_safety) return;
// Integrity check: verify our data matches our metadata exactly.
var meta_it = self.meta.iterator(.forward);
var data_len: usize = 0;
while (meta_it.next()) |m| data_len += m.cell_map.items.len;
assert(data_len == self.data.len());
// Integrity check: verify our data offset is within bounds.
assert(self.data_offset < self.data.len());
}
};
test "PageListSearch single page" {
const testing = std.testing;
const alloc = testing.allocator;
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("hello. boo! hello. boo!");
try testing.expect(s.pages.pages.first == s.pages.pages.last);
var search = try PageListSearch.init(alloc, &s.pages, "boo!");
defer search.deinit(alloc);
// We should be able to find two matches.
{
const sel = (try search.next(alloc)).?;
try testing.expectEqual(point.Point{ .active = .{
.x = 7,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 10,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
{
const sel = (try search.next(alloc)).?;
try testing.expectEqual(point.Point{ .active = .{
.x = 19,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 22,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect((try search.next(alloc)) == null);
try testing.expect((try search.next(alloc)) == null);
}
test "SlidingWindow empty on init" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "boo!");
defer w.deinit(alloc);
try testing.expectEqual(0, w.data.len());
try testing.expectEqual(0, w.meta.len());
}
test "SlidingWindow single append" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "boo!");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("hello. boo! hello. boo!");
// We want to test single-page cases.
try testing.expect(s.pages.pages.first == s.pages.pages.last);
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
// We should be able to find two matches.
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 7,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 10,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 19,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 22,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
}
test "SlidingWindow single append no match" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "nope!");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("hello. boo! hello. boo!");
// We want to test single-page cases.
try testing.expect(s.pages.pages.first == s.pages.pages.last);
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
// No matches
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
// Should still keep the page
try testing.expectEqual(1, w.meta.len());
}
test "SlidingWindow two pages" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "boo!");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 1000);
defer s.deinit();
// Fill up the first page. The final bytes in the first page
// are "boo!"
const first_page_rows = s.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.testWriteString("\n");
for (0..s.pages.cols - 4) |_| try s.testWriteString("x");
try s.testWriteString("boo!");
try testing.expect(s.pages.pages.first == s.pages.pages.last);
try s.testWriteString("\n");
try testing.expect(s.pages.pages.first != s.pages.pages.last);
try s.testWriteString("hello. boo!");
// Add both pages
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node.next.?);
// Search should find two matches
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 76,
.y = 22,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 79,
.y = 22,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 7,
.y = 23,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 10,
.y = 23,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
}
test "SlidingWindow two pages match across boundary" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "hello, world");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 1000);
defer s.deinit();
// Fill up the first page. The final bytes in the first page
// are "boo!"
const first_page_rows = s.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.testWriteString("\n");
for (0..s.pages.cols - 4) |_| try s.testWriteString("x");
try s.testWriteString("hell");
try testing.expect(s.pages.pages.first == s.pages.pages.last);
try s.testWriteString("o, world!");
try testing.expect(s.pages.pages.first != s.pages.pages.last);
// Add both pages
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node.next.?);
// Search should find a match
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 76,
.y = 22,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 7,
.y = 23,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
// We shouldn't prune because we don't have enough space
try testing.expectEqual(2, w.meta.len());
}
test "SlidingWindow two pages no match prunes first page" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "nope!");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 1000);
defer s.deinit();
// Fill up the first page. The final bytes in the first page
// are "boo!"
const first_page_rows = s.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.testWriteString("\n");
for (0..s.pages.cols - 4) |_| try s.testWriteString("x");
try s.testWriteString("boo!");
try testing.expect(s.pages.pages.first == s.pages.pages.last);
try s.testWriteString("\n");
try testing.expect(s.pages.pages.first != s.pages.pages.last);
try s.testWriteString("hello. boo!");
// Add both pages
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node.next.?);
// Search should find nothing
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
// We should've pruned our page because the second page
// has enough text to contain our needle.
try testing.expectEqual(1, w.meta.len());
}
test "SlidingWindow two pages no match keeps both pages" {
const testing = std.testing;
const alloc = testing.allocator;
var s = try Screen.init(alloc, 80, 24, 1000);
defer s.deinit();
// Fill up the first page. The final bytes in the first page
// are "boo!"
const first_page_rows = s.pages.pages.first.?.data.capacity.rows;
for (0..first_page_rows - 1) |_| try s.testWriteString("\n");
for (0..s.pages.cols - 4) |_| try s.testWriteString("x");
try s.testWriteString("boo!");
try testing.expect(s.pages.pages.first == s.pages.pages.last);
try s.testWriteString("\n");
try testing.expect(s.pages.pages.first != s.pages.pages.last);
try s.testWriteString("hello. boo!");
// Imaginary needle for search. Doesn't match!
var needle_list = std.ArrayList(u8).init(alloc);
defer needle_list.deinit();
try needle_list.appendNTimes('x', first_page_rows * s.pages.cols);
const needle: []const u8 = needle_list.items;
var w = try SlidingWindow.init(alloc, needle);
defer w.deinit(alloc);
// Add both pages
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node.next.?);
// Search should find nothing
try testing.expect(w.next() == null);
try testing.expect(w.next() == null);
// No pruning because both pages are needed to fit needle.
try testing.expectEqual(2, w.meta.len());
}
test "SlidingWindow single append across circular buffer boundary" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "abc");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("XXXXXXXXXXXXXXXXXXXboo!XXXXX");
// We are trying to break a circular buffer boundary so the way we
// do this is to duplicate the data then do a failing search. This
// will cause the first page to be pruned. The next time we append we'll
// put it in the middle of the circ buffer. We assert this so that if
// our implementation changes our test will fail.
try testing.expect(s.pages.pages.first == s.pages.pages.last);
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node);
{
// No wrap around yet
const slices = w.data.getPtrSlice(0, w.data.len());
try testing.expect(slices[0].len > 0);
try testing.expect(slices[1].len == 0);
}
// Search non-match, prunes page
try testing.expect(w.next() == null);
try testing.expectEqual(1, w.meta.len());
// Change the needle, just needs to be the same length (not a real API)
w.needle = "boo";
// Add new page, now wraps
try w.append(alloc, node);
{
const slices = w.data.getPtrSlice(0, w.data.len());
try testing.expect(slices[0].len > 0);
try testing.expect(slices[1].len > 0);
}
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 19,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 21,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next() == null);
}
test "SlidingWindow single append match on boundary" {
const testing = std.testing;
const alloc = testing.allocator;
var w = try SlidingWindow.init(alloc, "abcd");
defer w.deinit(alloc);
var s = try Screen.init(alloc, 80, 24, 0);
defer s.deinit();
try s.testWriteString("o!XXXXXXXXXXXXXXXXXXXbo");
// We are trying to break a circular buffer boundary so the way we
// do this is to duplicate the data then do a failing search. This
// will cause the first page to be pruned. The next time we append we'll
// put it in the middle of the circ buffer. We assert this so that if
// our implementation changes our test will fail.
try testing.expect(s.pages.pages.first == s.pages.pages.last);
const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node);
try w.append(alloc, node);
{
// No wrap around yet
const slices = w.data.getPtrSlice(0, w.data.len());
try testing.expect(slices[0].len > 0);
try testing.expect(slices[1].len == 0);
}
// Search non-match, prunes page
try testing.expect(w.next() == null);
try testing.expectEqual(1, w.meta.len());
// Change the needle, just needs to be the same length (not a real API)
w.needle = "boo!";
// Add new page, now wraps
try w.append(alloc, node);
{
const slices = w.data.getPtrSlice(0, w.data.len());
try testing.expect(slices[0].len > 0);
try testing.expect(slices[1].len > 0);
}
{
const sel = w.next().?;
try testing.expectEqual(point.Point{ .active = .{
.x = 21,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 1,
.y = 0,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next() == null);
}