ghostty/src/terminal/ref_counted_set.zig
Mitchell Hashimoto cb1caff018 terminal: refcountedset passes base memory to all context funcs
This enables these funcs to access memory offsets that may be present in
set items, which is possible since the set itself is in an offset-based
structure.
2024-07-05 21:39:07 -07:00

564 lines
20 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const size = @import("size.zig");
const Offset = size.Offset;
const OffsetBuf = size.OffsetBuf;
const fastmem = @import("../fastmem.zig");
/// A reference counted set.
///
/// This set is created with some capacity in mind. You can determine
/// the exact memory requirement of a given capacity by calling `layout`
/// and checking the total size.
///
/// When the set exceeds capacity, an `OutOfMemory` or `NeedsRehash` error
/// is returned from any memory-using methods. The caller is responsible
/// for determining a path forward.
///
/// This set is reference counted. Each item in the set has an associated
/// reference count. The caller is responsible for calling release for an
/// item when it is no longer being used. Items with 0 references will be
/// kept until another item is written to their bucket. This allows items
/// to be ressurected if they are re-added before they get overwritten.
///
/// The backing data structure of this set is an open addressed hash table
/// with linear probing and Robin Hood hashing, and a flat array of items.
///
/// The table maps values to item IDs, which are indices in the item array
/// which contain the item's value and its reference count. Item IDs can be
/// used to efficiently access an item and update its reference count after
/// it has been added to the table, to avoid having to use the hash map to
/// look the value back up.
///
/// ID 0 is reserved and will never be assigned.
///
/// Parameters:
///
/// `Context`
/// A type containing methods to define behaviors.
/// - `fn hash(*Context, T) u64` - Return a hash for an item.
/// - `fn eql(*Context, T, T) bool` - Check two items for equality.
/// - `fn deleted(*Context, T) void` - [OPTIONAL] Deletion callback.
/// If present, called whenever an item is finally deleted.
/// Useful if the item has memory that needs to be freed.
///
pub fn RefCountedSet(
comptime T: type,
comptime IdT: type,
comptime RefCountInt: type,
comptime ContextT: type,
) type {
return struct {
const Self = @This();
pub const base_align = @max(
@alignOf(Context),
@alignOf(Layout),
@alignOf(Item),
@alignOf(Id),
);
/// Set item
pub const Item = struct {
/// The value this item represents.
value: T = undefined,
/// Metadata for this item.
meta: Metadata = .{},
pub const Metadata = struct {
/// The bucket in the hash table where this item
/// is referenced.
bucket: Id = std.math.maxInt(Id),
/// The length of the probe sequence between this
/// item's starting bucket and the bucket it's in,
/// used for Robin Hood hashing.
psl: Id = 0,
/// The reference count for this item.
ref: RefCountInt = 0,
};
};
// Re-export these types so they can be referenced by the caller.
pub const Id = IdT;
pub const Context = ContextT;
/// A hash table of item indices
table: Offset(Id),
/// By keeping track of the max probe sequence length
/// we can bail out early when looking up values that
/// aren't present.
max_psl: Id = 0,
/// We keep track of how many items have a PSL of any
/// given length, so that we can shrink max_psl when
/// we delete items.
///
/// A probe sequence of length 32 or more is astronomically
/// unlikely. Roughly a (1/table_cap)^32 -- with any normal
/// table capacity that is so unlikely that it's not worth
/// handling.
psl_stats: [32]Id = [_]Id{0} ** 32,
/// The backing store of items
items: Offset(Item),
/// The number of living items currently stored in the set.
living: Id = 0,
/// The next index to store an item at.
/// Id 0 is reserved for unused items.
next_id: Id = 1,
layout: Layout,
/// An instance of the context structure.
context: Context,
/// Returns the memory layout for the given base offset and
/// desired capacity. The layout can be used by the caller to
/// determine how much memory to allocate, and the layout must
/// be used to initialize the set so that the set knows all
/// the offsets for the various buffers.
///
/// The capacity passed for cap will be used for the hash table,
/// which has a load factor of `0.8125` (13/16), so the number of
/// items which can actually be stored in the set will be smaller.
///
/// The laid out capacity will be at least `cap`, but may be higher,
/// since it is rounded up to the next power of 2 for efficiency.
///
/// The returned layout `cap` property will be 1 more than the number
/// of items that the set can actually store, since ID 0 is reserved.
pub fn layout(cap: usize) Layout {
// Experimentally, this load factor works quite well.
const load_factor = 0.8125;
assert(cap <= @as(usize, @intCast(std.math.maxInt(Id))) + 1);
const table_cap: usize = std.math.ceilPowerOfTwoAssert(usize, cap);
const items_cap: usize = @intFromFloat(load_factor * @as(f64, @floatFromInt(table_cap)));
const table_mask: Id = @intCast((@as(usize, 1) << std.math.log2_int(usize, table_cap)) - 1);
const table_start = 0;
const table_end = table_start + table_cap * @sizeOf(Id);
const items_start = std.mem.alignForward(usize, table_end, @alignOf(Item));
const items_end = items_start + items_cap * @sizeOf(Item);
const total_size = items_end;
return .{
.cap = items_cap,
.table_cap = table_cap,
.table_mask = table_mask,
.table_start = table_start,
.items_start = items_start,
.total_size = total_size,
};
}
pub const Layout = struct {
cap: usize,
table_cap: usize,
table_mask: Id,
table_start: usize,
items_start: usize,
total_size: usize,
};
pub fn init(base: OffsetBuf, l: Layout, context: Context) Self {
const table = base.member(Id, l.table_start);
const items = base.member(Item, l.items_start);
@memset(table.ptr(base)[0..l.table_cap], 0);
@memset(items.ptr(base)[0..l.cap], .{});
return .{
.table = table,
.items = items,
.layout = l,
.context = context,
};
}
/// Possible errors for `add` and `addWithId`.
pub const AddError = error{
/// There is not enough memory to add a new item.
/// Remove items or grow and reinitialize.
OutOfMemory,
/// The set needs to be rehashed, as there are many dead
/// items with lower IDs which are inaccessible for re-use.
NeedsRehash,
};
/// Add an item to the set if not present and increment its ref count.
///
/// Returns the item's ID.
///
/// If the set has no more room, then an OutOfMemory error is returned.
pub fn add(self: *Self, base: anytype, value: T) AddError!Id {
const items = self.items.ptr(base);
// Trim dead items from the end of the list.
while (self.next_id > 1 and items[self.next_id - 1].meta.ref == 0) {
self.next_id -= 1;
self.deleteItem(base, self.next_id);
}
// If we still don't have an available ID, we can't continue.
if (self.next_id >= self.layout.cap) {
// Arbitrarily chosen, threshold for rehashing.
// If less than 90% of currently allocated IDs
// correspond to living items, we should rehash.
// Otherwise, claim we're out of memory because
// we assume that we'll end up running out of
// memory or rehashing again very soon if we
// rehash with only a few IDs left.
const rehash_threshold = 0.9;
if (self.living < @as(Id, @intFromFloat(@as(f64, @floatFromInt(self.layout.cap)) * rehash_threshold))) {
return AddError.NeedsRehash;
}
// If we don't have at least 10% dead items then
// we claim we're out of memory.
return AddError.OutOfMemory;
}
const id = self.upsert(base, value, self.next_id);
items[id].meta.ref += 1;
if (id == self.next_id) self.next_id += 1;
if (items[id].meta.ref == 1) {
self.living += 1;
}
return id;
}
/// Add an item to the set if not present and increment its
/// ref count. If possible, use the provided ID.
///
/// Returns the item's ID, or null if the provided ID was used.
///
/// If the set has no more room, then an OutOfMemory error is returned.
pub fn addWithId(self: *Self, base: anytype, value: T, id: Id) AddError!?Id {
const items = self.items.ptr(base);
if (id < self.next_id) {
if (items[id].meta.ref == 0) {
self.deleteItem(base, id);
const added_id = self.upsert(base, value, id);
items[added_id].meta.ref += 1;
self.living += 1;
return if (added_id == id) null else added_id;
} else if (self.context.eql(base, value, items[id].value)) {
items[id].meta.ref += 1;
return null;
}
}
return try self.add(base, value);
}
/// Increment an item's reference count by 1.
///
/// Asserts that the item's reference count is greater than 0.
pub fn use(self: *const Self, base: anytype, id: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
// If `use` is being called on an item with 0 references, then
// either someone forgot to call it before, released too early
// or lied about releasing. In any case something is wrong and
// shouldn't be allowed.
assert(item.meta.ref > 0);
item.meta.ref += 1;
}
/// Increment an item's reference count by a specified number.
///
/// Asserts that the item's reference count is greater than 0.
pub fn useMultiple(self: *const Self, base: anytype, id: Id, n: RefCountInt) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
// If `use` is being called on an item with 0 references, then
// either someone forgot to call it before, released too early
// or lied about releasing. In any case something is wrong and
// shouldn't be allowed.
assert(item.meta.ref > 0);
item.meta.ref += n;
}
/// Get an item by its ID without incrementing its reference count.
///
/// Asserts that the item's reference count is greater than 0.
pub fn get(self: *const Self, base: anytype, id: Id) *T {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref > 0);
return @ptrCast(&item.value);
}
/// Releases a reference to an item by its ID.
///
/// Asserts that the item's reference count is greater than 0.
pub fn release(self: *Self, base: anytype, id: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref > 0);
item.meta.ref -= 1;
if (item.meta.ref == 0) self.living -= 1;
}
/// Release a specified number of references to an item by its ID.
///
/// Asserts that the item's reference count is at least `n`.
pub fn releaseMultiple(self: *Self, base: anytype, id: Id, n: Id) void {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
assert(item.meta.ref >= n);
item.meta.ref -= n;
if (item.meta.ref == 0) {
self.living -= 1;
}
}
/// Get the ref count for an item by its ID.
pub fn refCount(self: *const Self, base: anytype, id: Id) RefCountInt {
assert(id > 0);
assert(id < self.layout.cap);
const items = self.items.ptr(base);
const item = &items[id];
return item.meta.ref;
}
/// Get the current number of non-dead items in the set.
pub fn count(self: *const Self) usize {
return self.living;
}
/// Delete an item, removing any references from
/// the table, and freeing its ID to be re-used.
fn deleteItem(self: *Self, base: anytype, id: Id) void {
const table = self.table.ptr(base);
const items = self.items.ptr(base);
const item = items[id];
if (item.meta.bucket > self.layout.table_cap) return;
if (table[item.meta.bucket] != id) return;
if (comptime @hasDecl(Context, "deleted")) {
// Inform the context struct that we're
// deleting the dead item's value for good.
self.context.deleted(base, item.value);
}
self.psl_stats[item.meta.psl] -= 1;
table[item.meta.bucket] = 0;
items[id] = .{};
var p: Id = item.meta.bucket;
var n: Id = (p +% 1) & self.layout.table_mask;
while (table[n] != 0 and items[table[n]].meta.psl > 0) {
items[table[n]].meta.bucket = p;
self.psl_stats[items[table[n]].meta.psl] -= 1;
items[table[n]].meta.psl -= 1;
self.psl_stats[items[table[n]].meta.psl] += 1;
table[p] = table[n];
p = n;
n = (p +% 1) & self.layout.table_mask;
}
while (self.max_psl > 0 and self.psl_stats[self.max_psl] == 0) {
self.max_psl -= 1;
}
table[p] = 0;
}
/// Find an item in the table and return its ID.
/// If the item does not exist in the table, null is returned.
fn lookup(self: *Self, base: anytype, value: T) ?Id {
const table = self.table.ptr(base);
const items = self.items.ptr(base);
const hash: u64 = self.context.hash(base, value);
for (0..self.max_psl + 1) |i| {
const p: usize = @intCast((hash + i) & self.layout.table_mask);
const id = table[p];
// Empty bucket, our item cannot have probed to
// any point after this, meaning it's not present.
if (id == 0) {
return null;
}
const item = items[id];
// An item with a shorter probe sequence length would never
// end up in the middle of another sequence, since it would
// be swapped out if inserted before the new sequence, and
// would not be swapped in if inserted afterwards.
//
// As such, our item cannot be present.
if (item.meta.psl < i) {
return null;
}
// We don't bother checking dead items.
if (item.meta.ref == 0) {
continue;
}
// If the item is a part of the same probe sequence,
// we check if it matches the value we're looking for.
if (item.meta.psl == i and
self.context.eql(base, value, item.value))
{
return id;
}
}
return null;
}
/// Find the provided value in the hash table, or add a new item
/// for it if not present. If a new item is added, `new_id` will
/// be used as the ID. If an existing item is found, the `new_id`
/// is ignored and the existing item's ID is returned.
fn upsert(self: *Self, base: anytype, value: T, new_id: Id) Id {
// If the item already exists, return it.
if (self.lookup(base, value)) |id| return id;
const table = self.table.ptr(base);
const items = self.items.ptr(base);
// The new item that we'll put in to the table.
var new_item: Item = .{
.value = value,
.meta = .{ .psl = 0, .ref = 0 },
};
const hash: u64 = self.context.hash(base, value);
var held_id: Id = new_id;
var held_item: *Item = &new_item;
var chosen_p: ?Id = null;
var chosen_id: Id = new_id;
for (0..self.layout.table_cap - 1) |i| {
const p: Id = @intCast((hash + i) & self.layout.table_mask);
const id = table[p];
// Empty bucket, put our held item in to it and break.
if (id == 0) {
table[p] = held_id;
held_item.meta.bucket = p;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
break;
}
const item = &items[id];
// If there's a dead item then we resurrect it
// for our value so that we can re-use its ID.
if (item.meta.ref == 0) {
if (comptime @hasDecl(Context, "deleted")) {
// Inform the context struct that we're
// deleting the dead item's value for good.
self.context.deleted(base, item.value);
}
chosen_id = id;
held_item.meta.bucket = p;
self.psl_stats[item.meta.psl] -= 1;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
// If we're not still holding our new item then we
// need to make sure that we put the re-used ID in
// the right place, where we previously put new_id.
if (chosen_p) |c| {
table[c] = id;
table[p] = held_id;
} else {
// If we're still holding our new item then we
// don't actually have to do anything, because
// the table already has the correct ID here.
}
break;
}
// This item has a lower PSL, swap it out with our held item.
if (item.meta.psl < held_item.meta.psl) {
if (held_id == new_id) {
chosen_p = p;
new_item.meta.bucket = p;
}
table[p] = held_id;
items[held_id].meta.bucket = p;
self.psl_stats[held_item.meta.psl] += 1;
self.max_psl = @max(self.max_psl, held_item.meta.psl);
held_id = id;
held_item = item;
self.psl_stats[item.meta.psl] -= 1;
}
// Advance to the next probe position for our held item.
held_item.meta.psl += 1;
}
items[chosen_id] = new_item;
return chosen_id;
}
};
}