terminal: search across two pages and pruning appears to be working

This commit is contained in:
Mitchell Hashimoto
2024-12-03 13:40:48 -05:00
parent d307b02e40
commit b487aa8e1f

View File

@ -53,8 +53,9 @@ pub const PageListSearch = struct {
} }
}; };
/// The sliding window of the pages we're searching. The window is always /// Search pages via a sliding window. The sliding window always maintains
/// big enough so that the needle can fit in it. /// the invariant that data isn't pruned until we've searched it and
/// accounted for overlaps across pages.
const SlidingWindow = struct { const SlidingWindow = struct {
/// The data buffer is a circular buffer of u8 that contains the /// The data buffer is a circular buffer of u8 that contains the
/// encoded page text that we can use to search for the needle. /// encoded page text that we can use to search for the needle.
@ -137,8 +138,42 @@ const SlidingWindow = struct {
return self.selection(slices[0].len + idx, needle.len); return self.selection(slices[0].len + idx, needle.len);
} }
// No match. Clear everything. // No match. We keep `needle.len - 1` bytes available to
self.clearAndRetainCapacity(); // handle the future overlap case.
var meta_it = self.meta.iterator(.reverse);
prune: {
var saved: usize = 0;
while (meta_it.next()) |meta| {
const needed = needle.len - 1 - saved;
if (meta.cell_map.items.len >= needed) {
// We save up to this meta. We set our data offset
// to exactly where it needs to be to continue
// searching.
self.data_offset = meta.cell_map.items.len - needed;
break;
}
saved += meta.cell_map.items.len;
} else {
// If we exited the while loop naturally then we
// never got the amount we needed and so there is
// nothing to prune.
assert(saved < needle.len - 1);
break :prune;
}
const prune_count = self.meta.len() - meta_it.idx;
if (prune_count == 0) {
// This can happen if we need to save up to the first
// meta value to retain our window.
break :prune;
}
// We can now delete all the metas up to but NOT including
// the meta we found through meta_it.
@panic("TODO: test");
}
return null; return null;
} }
@ -158,71 +193,74 @@ const SlidingWindow = struct {
assert(start < self.data.len()); assert(start < self.data.len());
assert(start + len <= self.data.len()); assert(start + len <= self.data.len());
// meta_consumed is the number of bytes we've consumed in the
// data buffer up to and NOT including the meta where we've
// found our pin. This is important because it tells us the
// amount of data we can safely deleted from self.data since
// we can't partially delete a meta block's data. (The partial
// amount is represented by self.data_offset).
var meta_it = self.meta.iterator(.forward); var meta_it = self.meta.iterator(.forward);
const tl: Pin = pin(&meta_it, start); var meta_consumed: usize = 0;
const tl: Pin = pin(&meta_it, &meta_consumed, start);
// We have to seek back so that we reinspect our current // We have to seek back so that we reinspect our current
// iterator value again in case the start and end are in the // iterator value again in case the start and end are in the
// same segment. // same segment.
meta_it.seekBy(-1); meta_it.seekBy(-1);
const br: Pin = pin(&meta_it, start + len - 1); const br: Pin = pin(&meta_it, &meta_consumed, start + len - 1);
assert(meta_it.idx >= 1); assert(meta_it.idx >= 1);
// meta_it.idx is now the index after the br pin. We can // Our offset into the current meta block is the start index
// safely prune our data up to this index. (It is after // minus the amount of data fully consumed. We then add one
// because next() is called at least once). // to move one past the match so we don't repeat it.
const br_meta_idx: usize = meta_it.idx - 1; self.data_offset = start - meta_consumed + 1;
meta_it.reset();
var offset: usize = 0; // meta_it.idx is br's meta index plus one (because the iterator
while (meta_it.next()) |meta| { // moves one past the end; we call next() one last time). So
const meta_idx = start - offset; // we compare against one to check that the meta that we matched
if (meta_idx >= meta.cell_map.items.len) { // in has prior meta blocks we can prune.
// Prior to our matches, we can prune it. if (meta_it.idx > 1) {
offset += meta.cell_map.items.len; // Deinit all our memory in the meta blocks prior to our
meta.deinit(); // match.
const meta_count = meta_it.idx - 1;
meta_it.reset();
for (0..meta_count) |_| meta_it.next().?.deinit();
if (comptime std.debug.runtime_safety) {
assert(meta_it.idx == meta_count);
assert(meta_it.next().?.node == br.node);
} }
self.meta.deleteOldest(meta_count);
assert(meta_it.idx == br_meta_idx + 1); // Delete all the data up to our current index.
break; assert(meta_consumed > 0);
self.data.deleteOldest(meta_consumed);
} }
// If we have metas to prune, then prune them. They should be self.assertIntegrity();
// deinitialized already from the while loop above.
if (br_meta_idx > 0) {
assert(offset > 0);
self.meta.deleteOldest(br_meta_idx);
self.data.deleteOldest(offset);
@panic("TODO: TEST");
}
// Move our data one beyond so we don't rematch.
self.data_offset = start - offset + 1;
return Selection.init(tl, br, false); return Selection.init(tl, br, false);
} }
/// Convert a data index into a pin. /// Convert a data index into a pin.
/// ///
/// Tip: you can get the offset into the meta buffer we searched /// The iterator and offset are both expected to be passed by
/// by inspecting the iterator index after this function returns. /// pointer so that the pin can be efficiently called for multiple
/// I note this because this is useful if you want to prune the /// indexes (in order). See selection() for an example.
/// meta buffer after you find a match.
/// ///
/// Precondition: the index must be within the data buffer. /// Precondition: the index must be within the data buffer.
fn pin( fn pin(
it: *MetaBuf.Iterator, it: *MetaBuf.Iterator,
offset: *usize,
idx: usize, idx: usize,
) Pin { ) Pin {
var offset: usize = 0;
while (it.next()) |meta| { while (it.next()) |meta| {
// meta_i is the index we expect to find the match in the // meta_i is the index we expect to find the match in the
// cell map within this meta if it contains it. // cell map within this meta if it contains it.
const meta_i = idx - offset; const meta_i = idx - offset.*;
if (meta_i >= meta.cell_map.items.len) { if (meta_i >= meta.cell_map.items.len) {
// This meta doesn't contain the match. This means we // This meta doesn't contain the match. This means we
// can also prune this set of data because we only look // can also prune this set of data because we only look
// forward. // forward.
offset += meta.cell_map.items.len; offset.* += meta.cell_map.items.len;
continue; continue;
} }
@ -240,19 +278,13 @@ const SlidingWindow = struct {
unreachable; unreachable;
} }
/// Add a new node to the sliding window. /// Add a new node to the sliding window. This will always grow
/// /// the sliding window; data isn't pruned until it is consumed
/// The window will prune itself if it can while always maintaining /// via a search (via next()).
/// the invariant that the `fixed_size` always fits within the window.
///
/// Note it is possible for the window to be smaller than `fixed_size`
/// if not enough nodes have been added yet or the screen is just
/// smaller than the needle.
pub fn append( pub fn append(
self: *SlidingWindow, self: *SlidingWindow,
alloc: Allocator, alloc: Allocator,
node: *PageList.List.Node, node: *PageList.List.Node,
required_size: usize,
) Allocator.Error!void { ) Allocator.Error!void {
// Initialize our metadata for the node. // Initialize our metadata for the node.
var meta: Meta = .{ var meta: Meta = .{
@ -280,35 +312,6 @@ const SlidingWindow = struct {
}; };
assert(meta.cell_map.items.len == encoded.items.len); assert(meta.cell_map.items.len == encoded.items.len);
// Now that we know our buffer length, we can consider if we can
// prune our circular buffer or if we need to grow it.
prune: {
// Our buffer size after adding the new node.
const before_size: usize = self.data.len() + encoded.items.len;
// Prune as long as removing the first (oldest) node retains
// our required size invariant.
var after_size: usize = before_size;
while (self.meta.first()) |oldest_meta| {
const new_size = after_size - oldest_meta.cell_map.items.len;
if (new_size < required_size) break :prune;
// We can prune this node and retain our invariant.
// Update our new size, deinitialize the memory, and
// remove from the circular buffer.
after_size = new_size;
oldest_meta.deinit();
self.meta.deleteOldest(1);
}
assert(after_size <= before_size);
// If we didn't prune anything then we're done.
if (after_size == before_size) break :prune;
// We need to prune our data buffer as well.
self.data.deleteOldest(before_size - after_size);
}
// Ensure our buffers are big enough to store what we need. // Ensure our buffers are big enough to store what we need.
try self.data.ensureUnusedCapacity(alloc, encoded.items.len); try self.data.ensureUnusedCapacity(alloc, encoded.items.len);
try self.meta.ensureUnusedCapacity(alloc, 1); try self.meta.ensureUnusedCapacity(alloc, 1);
@ -317,13 +320,20 @@ const SlidingWindow = struct {
try self.data.appendSlice(encoded.items); try self.data.appendSlice(encoded.items);
try self.meta.append(meta); try self.meta.append(meta);
self.assertIntegrity();
}
fn assertIntegrity(self: *const SlidingWindow) void {
if (comptime !std.debug.runtime_safety) return;
// Integrity check: verify our data matches our metadata exactly. // Integrity check: verify our data matches our metadata exactly.
if (comptime std.debug.runtime_safety) { var meta_it = self.meta.iterator(.forward);
var meta_it = self.meta.iterator(.forward); var data_len: usize = 0;
var data_len: usize = 0; while (meta_it.next()) |m| data_len += m.cell_map.items.len;
while (meta_it.next()) |m| data_len += m.cell_map.items.len; assert(data_len == self.data.len());
assert(data_len == self.data.len());
} // Integrity check: verify our data offset is within bounds.
assert(self.data_offset < self.data.len());
} }
}; };
@ -354,7 +364,7 @@ test "SlidingWindow single append" {
// We want to test single-page cases. // We want to test single-page cases.
try testing.expect(s.pages.pages.first == s.pages.pages.last); try testing.expect(s.pages.pages.first == s.pages.pages.last);
const node: *PageList.List.Node = s.pages.pages.first.?; const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node, needle.len); try w.append(alloc, node);
// We should be able to find two matches. // We should be able to find two matches.
{ {
@ -409,10 +419,34 @@ test "SlidingWindow two pages" {
// Add both pages // Add both pages
const node: *PageList.List.Node = s.pages.pages.first.?; const node: *PageList.List.Node = s.pages.pages.first.?;
try w.append(alloc, node, needle.len); try w.append(alloc, node);
try w.append(alloc, node.next.?, needle.len); try w.append(alloc, node.next.?);
// Ensure our data is correct // Search should find two matches
{
const sel = w.next(needle).?;
try testing.expectEqual(point.Point{ .active = .{
.x = 76,
.y = 22,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 79,
.y = 22,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
{
const sel = w.next(needle).?;
try testing.expectEqual(point.Point{ .active = .{
.x = 7,
.y = 23,
} }, s.pages.pointFromPin(.active, sel.start()).?);
try testing.expectEqual(point.Point{ .active = .{
.x = 10,
.y = 23,
} }, s.pages.pointFromPin(.active, sel.end()).?);
}
try testing.expect(w.next(needle) == null);
try testing.expect(w.next(needle) == null);
} }
pub const PageSearch = struct { pub const PageSearch = struct {