ghostty/src/terminal/ref_counted_set.zig
2024-07-05 21:40:39 -07:00

604 lines
22 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const size = @import("size.zig");
const Offset = size.Offset;
const OffsetBuf = size.OffsetBuf;
const fastmem = @import("../fastmem.zig");
/// A reference counted set.
///
/// This set is created with some capacity in mind. You can determine
/// the exact memory requirement of a given capacity by calling `layout`
/// and checking the total size.
///
/// When the set exceeds capacity, an `OutOfMemory` or `NeedsRehash` error
/// is returned from any memory-using methods. The caller is responsible
/// for determining a path forward.
///
/// This set is reference counted. Each item in the set has an associated
/// reference count. The caller is responsible for calling release for an
/// item when it is no longer being used. Items with 0 references will be
/// kept until another item is written to their bucket. This allows items
/// to be ressurected if they are re-added before they get overwritten.
///
/// The backing data structure of this set is an open addressed hash table
/// with linear probing and Robin Hood hashing, and a flat array of items.
///
/// The table maps values to item IDs, which are indices in the item array
/// which contain the item's value and its reference count. Item IDs can be
/// used to efficiently access an item and update its reference count after
/// it has been added to the table, to avoid having to use the hash map to
/// look the value back up.
///
/// ID 0 is reserved and will never be assigned.
///
/// Parameters:
///
/// `Context`
/// A type containing methods to define behaviors.
/// - `fn hash(*Context, T) u64` - Return a hash for an item.
/// - `fn eql(*Context, T, T) bool` - Check two items for equality.
/// - `fn deleted(*Context, T) void` - [OPTIONAL] Deletion callback.
/// If present, called whenever an item is finally deleted.
/// Useful if the item has memory that needs to be freed.
///
pub fn RefCountedSet(
comptime T: type,
comptime IdT: type,
comptime RefCountInt: type,
comptime ContextT: type,
) type {
return struct {
const Self = @This();
pub const base_align = @max(
@alignOf(Context),
@alignOf(Layout),
@alignOf(Item),
@alignOf(Id),
);
/// Set item
pub const Item = struct {
/// The value this item represents.
value: T = undefined,
/// Metadata for this item.
meta: Metadata = .{},
pub const Metadata = struct {
/// The bucket in the hash table where this item
/// is referenced.
bucket: Id = std.math.maxInt(Id),
/// The length of the probe sequence between this
/// item's starting bucket and the bucket it's in,
/// used for Robin Hood hashing.
psl: Id = 0,
/// The reference count for this item.
ref: RefCountInt = 0,
};
};
// Re-export these types so they can be referenced by the caller.
pub const Id = IdT;
pub const Context = ContextT;
/// A hash table of item indices
table: Offset(Id),
/// By keeping track of the max probe sequence length
/// we can bail out early when looking up values that
/// aren't present.
max_psl: Id = 0,
/// We keep track of how many items have a PSL of any
/// given length, so that we can shrink max_psl when
/// we delete items.
///
/// A probe sequence of length 32 or more is astronomically
/// unlikely. Roughly a (1/table_cap)^32 -- with any normal
/// table capacity that is so unlikely that it's not worth
/// handling.
psl_stats: [32]Id = [_]Id{0} ** 32,
/// The backing store of items
items: Offset(Item),
/// The number of living items currently stored in the set.
living: Id = 0,
/// The next index to store an item at.
/// Id 0 is reserved for unused items.
next_id: Id = 1,
layout: Layout,
/// An instance of the context structure.
context: Context,
/// Returns the memory layout for the given base offset and
/// desired capacity. The layout can be used by the caller to
/// determine how much memory to allocate, and the layout must
/// be used to initialize the set so that the set knows all
/// the offsets for the various buffers.
///
/// The capacity passed for cap will be used for the hash table,
/// which has a load factor of `0.8125` (13/16), so the number of
/// items which can actually be stored in the set will be smaller.
///
/// The laid out capacity will be at least `cap`, but may be higher,
/// since it is rounded up to the next power of 2 for efficiency.
///
/// The returned layout `cap` property will be 1 more than the number
/// of items that the set can actually store, since ID 0 is reserved.
pub fn layout(cap: usize) Layout {
// Experimentally, this load factor works quite well.
const load_factor = 0.8125;
assert(cap <= @as(usize, @intCast(std.math.maxInt(Id))) + 1);
// Zero-cap set is valid, return special case
if (cap == 0) return .{
.cap = 0,
.table_cap = 0,
.table_mask = 0,
.table_start = 0,
.items_start = 0,
.total_size = 0,
};
const table_cap: usize = std.math.ceilPowerOfTwoAssert(usize, cap);
const items_cap: usize = @intFromFloat(load_factor * @as(f64, @floatFromInt(table_cap)));
const table_mask: Id = @intCast((@as(usize, 1) << std.math.log2_int(usize, table_cap)) - 1);
const table_start = 0;
const table_end = table_start + table_cap * @sizeOf(Id);
const items_start = std.mem.alignForward(usize, table_end, @alignOf(Item));
const items_end = items_start + items_cap * @sizeOf(Item);
const total_size = items_end;
return .{
.cap = items_cap,
.table_cap = table_cap,
.table_mask = table_mask,
.table_start = table_start,
.items_start = items_start,
.total_size = total_size,
};
}
pub const Layout = struct {
cap: usize,
table_cap: usize,
table_mask: Id,
table_start: usize,
items_start: usize,
total_size: usize,
};
pub fn init(base: OffsetBuf, l: Layout, context: Context) Self {
const table = base.member(Id, l.table_start);
const items = base.member(Item, l.items_start);
@memset(table.ptr(base)[0..l.table_cap], 0);
@memset(items.ptr(base)[0..l.cap], .{});
return .{
.table = table,
.items = items,
.layout = l,
.context = context,
};
}
/// Possible errors for `add` and `addWithId`.
pub const AddError = error{
/// There is not enough memory to add a new item.
/// Remove items or grow and reinitialize.
OutOfMemory,
/// The set needs to be rehashed, as there are many dead
/// items with lower IDs which are inaccessible for re-use.
NeedsRehash,
};
/// Add an item to the set if not present and increment its ref count.
///
/// Returns the item's ID.
///
/// If the set has no more room, then an OutOfMemory error is returned.
pub fn add(self: *Self, base: anytype, value: T) AddError!Id {
return try self.addContext(base, value, self.context);
}
pub fn addContext(self: *Self, base: anytype, value: T, ctx: Context) AddError!Id {
const items = self.items.ptr(base);
// Trim dead items from the end of the list.
while (self.next_id > 1 and items[self.next_id - 1].meta.ref == 0) {
self.next_id -= 1;
self.deleteItem(base, self.next_id, ctx);
}
// If the item already exists, return it.
if (self.lookup(base, value, ctx)) |id| {
// Notify the context that the value is "deleted" because
// we're reusing the existing value in the set. This allows
// callers to clean up any resources associated with the value.
if (comptime @hasDecl(Context, "deleted")) ctx.deleted(value);
items[id].meta.ref += 1;
return id;
}
// If the item doesn't exist, we need an available ID.
if (self.next_id >= self.layout.cap) {
// Arbitrarily chosen, threshold for rehashing.
// If less than 90% of currently allocated IDs
// correspond to living items, we should rehash.
// Otherwise, claim we're out of memory because
// we assume that we'll end up running out of
// memory or rehashing again very soon if we
// rehash with only a few IDs left.
const rehash_threshold = 0.9;
if (self.living < @as(Id, @intFromFloat(@as(f64, @floatFromInt(self.layout.cap)) * rehash_threshold))) {
return AddError.NeedsRehash;
}
// If we don't have at least 10% dead items then
// we claim we're out of memory.
return AddError.OutOfMemory;
}
const id = self.insert(base, value, self.next_id, ctx);
items[id].meta.ref += 1;
assert(items[id].meta.ref == 1);
self.living += 1;
// Its possible insert returns a different ID by reusing a
// dead item so we only need to update next id if we used it.
if (id == self.next_id) self.next_id += 1;
return id;
}
/// Add an item to the set if not present and increment its
/// ref count. If possible, use the provided ID.
///
/// Returns the item's ID, or null if the provided ID was used.
///
/// If the set has no more room, then an OutOfMemory error is returned.
pub fn addWithId(self: *Self, base: anytype, value: T, id: Id) AddError!?Id {
return try self.addWithIdContext(base, value, id, self.context);
}
pub fn addWithIdContext(self: *Self, base: anytype, value: T, id: Id, ctx: Context) AddError!?Id {
const items = self.items.ptr(base);
if (id < self.next_id) {
if (items[id].meta.ref == 0) {
self.deleteItem(base, id, ctx);
const added_id = self.upsert(base, value, id, ctx);
items[added_id].meta.ref += 1;
self.living += 1;
return if (added_id == id) null else added_id;
} else if (ctx.eql(value, items[id].value)) {
items[id].meta.ref += 1;
return null;
}
}
return try self.addContext(base, value, ctx);
}
/// Increment an item's reference count by 1.
///
/// Asserts that the item's reference count is greater than 0.
pub fn use(self: *const Self, base: anytype, id: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
// If `use` is being called on an item with 0 references, then
// either someone forgot to call it before, released too early
// or lied about releasing. In any case something is wrong and
// shouldn't be allowed.
assert(item.meta.ref > 0);
item.meta.ref += 1;
}
/// Increment an item's reference count by a specified number.
///
/// Asserts that the item's reference count is greater than 0.
pub fn useMultiple(self: *const Self, base: anytype, id: Id, n: RefCountInt) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
// If `use` is being called on an item with 0 references, then
// either someone forgot to call it before, released too early
// or lied about releasing. In any case something is wrong and
// shouldn't be allowed.
assert(item.meta.ref > 0);
item.meta.ref += n;
}
/// Get an item by its ID without incrementing its reference count.
///
/// Asserts that the item's reference count is greater than 0.
pub fn get(self: *const Self, base: anytype, id: Id) *T {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref > 0);
return @ptrCast(&item.value);
}
/// Releases a reference to an item by its ID.
///
/// Asserts that the item's reference count is greater than 0.
pub fn release(self: *Self, base: anytype, id: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref > 0);
item.meta.ref -= 1;
if (item.meta.ref == 0) self.living -= 1;
}
/// Release a specified number of references to an item by its ID.
///
/// Asserts that the item's reference count is at least `n`.
pub fn releaseMultiple(self: *Self, base: anytype, id: Id, n: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref >= n);
item.meta.ref -= n;
if (item.meta.ref == 0) {
self.living -= 1;
}
}
/// Get the ref count for an item by its ID.
pub fn refCount(self: *const Self, base: anytype, id: Id) RefCountInt {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
return item.meta.ref;
}
/// Get the current number of non-dead items in the set.
pub fn count(self: *const Self) usize {
return self.living;
}
/// Delete an item, removing any references from
/// the table, and freeing its ID to be re-used.
fn deleteItem(self: *Self, base: anytype, id: Id, ctx: Context) void {
const table = self.table.ptr(base);
const items = self.items.ptr(base);
const item = items[id];
if (item.meta.bucket > self.layout.table_cap) return;
if (table[item.meta.bucket] != id) return;
if (comptime @hasDecl(Context, "deleted")) {
// Inform the context struct that we're
// deleting the dead item's value for good.
ctx.deleted(item.value);
}
self.psl_stats[item.meta.psl] -= 1;
table[item.meta.bucket] = 0;
items[id] = .{};
var p: Id = item.meta.bucket;
var n: Id = (p +% 1) & self.layout.table_mask;
while (table[n] != 0 and items[table[n]].meta.psl > 0) {
items[table[n]].meta.bucket = p;
self.psl_stats[items[table[n]].meta.psl] -= 1;
items[table[n]].meta.psl -= 1;
self.psl_stats[items[table[n]].meta.psl] += 1;
table[p] = table[n];
p = n;
n = (p +% 1) & self.layout.table_mask;
}
while (self.max_psl > 0 and self.psl_stats[self.max_psl] == 0) {
self.max_psl -= 1;
}
table[p] = 0;
}
/// Find an item in the table and return its ID.
/// If the item does not exist in the table, null is returned.
fn lookup(self: *Self, base: anytype, value: T, ctx: Context) ?Id {
const table = self.table.ptr(base);
const items = self.items.ptr(base);
const hash: u64 = ctx.hash(value);
for (0..self.max_psl + 1) |i| {
const p: usize = @intCast((hash + i) & self.layout.table_mask);
const id = table[p];
// Empty bucket, our item cannot have probed to
// any point after this, meaning it's not present.
if (id == 0) {
return null;
}
const item = items[id];
// An item with a shorter probe sequence length would never
// end up in the middle of another sequence, since it would
// be swapped out if inserted before the new sequence, and
// would not be swapped in if inserted afterwards.
//
// As such, our item cannot be present.
if (item.meta.psl < i) {
return null;
}
// We don't bother checking dead items.
if (item.meta.ref == 0) {
continue;
}
// If the item is a part of the same probe sequence,
// we check if it matches the value we're looking for.
if (item.meta.psl == i and
ctx.eql(value, item.value))
{
return id;
}
}
return null;
}
/// Find the provided value in the hash table, or add a new item
/// for it if not present. If a new item is added, `new_id` will
/// be used as the ID. If an existing item is found, the `new_id`
/// is ignored and the existing item's ID is returned.
fn upsert(self: *Self, base: anytype, value: T, new_id: Id, ctx: Context) Id {
// If the item already exists, return it.
if (self.lookup(base, value, ctx)) |id| {
// Notify the context that the value is "deleted" because
// we're reusing the existing value in the set. This allows
// callers to clean up any resources associated with the value.
if (comptime @hasDecl(Context, "deleted")) ctx.deleted(value);
return id;
}
return self.insert(base, value, new_id, ctx);
}
/// Insert the given value into the hash table with the given ID.
/// asserts that the value is not already present in the table.
fn insert(self: *Self, base: anytype, value: T, new_id: Id, ctx: Context) Id {
assert(self.lookup(base, value, ctx) == null);
const table = self.table.ptr(base);
const items = self.items.ptr(base);
// The new item that we'll put in to the table.
var new_item: Item = .{
.value = value,
.meta = .{ .psl = 0, .ref = 0 },
};
const hash: u64 = ctx.hash(value);
var held_id: Id = new_id;
var held_item: *Item = &new_item;
var chosen_p: ?Id = null;
var chosen_id: Id = new_id;
for (0..self.layout.table_cap - 1) |i| {
const p: Id = @intCast((hash + i) & self.layout.table_mask);
const id = table[p];
// Empty bucket, put our held item in to it and break.
if (id == 0) {
table[p] = held_id;
held_item.meta.bucket = p;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
break;
}
const item = &items[id];
// If there's a dead item then we resurrect it
// for our value so that we can re-use its ID.
if (item.meta.ref == 0) {
if (comptime @hasDecl(Context, "deleted")) {
// Inform the context struct that we're
// deleting the dead item's value for good.
ctx.deleted(item.value);
}
chosen_id = id;
held_item.meta.bucket = p;
self.psl_stats[item.meta.psl] -= 1;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
// If we're not still holding our new item then we
// need to make sure that we put the re-used ID in
// the right place, where we previously put new_id.
if (chosen_p) |c| {
table[c] = id;
table[p] = held_id;
} else {
// If we're still holding our new item then we
// don't actually have to do anything, because
// the table already has the correct ID here.
}
break;
}
// This item has a lower PSL, swap it out with our held item.
if (item.meta.psl < held_item.meta.psl) {
if (held_id == new_id) {
chosen_p = p;
new_item.meta.bucket = p;
}
table[p] = held_id;
items[held_id].meta.bucket = p;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
held_id = id;
held_item = item;
self.psl_stats[item.meta.psl] -= 1;
}
// Advance to the next probe position for our held item.
held_item.meta.psl += 1;
}
items[chosen_id] = new_item;
return chosen_id;
}
};
}