mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-16 08:46:08 +03:00
613 lines
23 KiB
Zig
613 lines
23 KiB
Zig
const std = @import("std");
|
|
const assert = std.debug.assert;
|
|
|
|
const size = @import("size.zig");
|
|
const Offset = size.Offset;
|
|
const OffsetBuf = size.OffsetBuf;
|
|
|
|
const fastmem = @import("../fastmem.zig");
|
|
|
|
/// A reference counted set.
|
|
///
|
|
/// This set is created with some capacity in mind. You can determine
|
|
/// the exact memory requirement of a given capacity by calling `layout`
|
|
/// and checking the total size.
|
|
///
|
|
/// When the set exceeds capacity, an `OutOfMemory` or `NeedsRehash` error
|
|
/// is returned from any memory-using methods. The caller is responsible
|
|
/// for determining a path forward.
|
|
///
|
|
/// This set is reference counted. Each item in the set has an associated
|
|
/// reference count. The caller is responsible for calling release for an
|
|
/// item when it is no longer being used. Items with 0 references will be
|
|
/// kept until another item is written to their bucket. This allows items
|
|
/// to be resurrected if they are re-added before they get overwritten.
|
|
///
|
|
/// The backing data structure of this set is an open addressed hash table
|
|
/// with linear probing and Robin Hood hashing, and a flat array of items.
|
|
///
|
|
/// The table maps values to item IDs, which are indices in the item array
|
|
/// which contain the item's value and its reference count. Item IDs can be
|
|
/// used to efficiently access an item and update its reference count after
|
|
/// it has been added to the table, to avoid having to use the hash map to
|
|
/// look the value back up.
|
|
///
|
|
/// ID 0 is reserved and will never be assigned.
|
|
///
|
|
/// Parameters:
|
|
///
|
|
/// `Context`
|
|
/// A type containing methods to define behaviors.
|
|
/// - `fn hash(*Context, T) u64` - Return a hash for an item.
|
|
/// - `fn eql(*Context, T, T) bool` - Check two items for equality.
|
|
/// - `fn deleted(*Context, T) void` - [OPTIONAL] Deletion callback.
|
|
/// If present, called whenever an item is finally deleted.
|
|
/// Useful if the item has memory that needs to be freed.
|
|
///
|
|
pub fn RefCountedSet(
|
|
comptime T: type,
|
|
comptime IdT: type,
|
|
comptime RefCountInt: type,
|
|
comptime ContextT: type,
|
|
) type {
|
|
return struct {
|
|
const Self = @This();
|
|
|
|
pub const base_align = @max(
|
|
@alignOf(Context),
|
|
@alignOf(Layout),
|
|
@alignOf(Item),
|
|
@alignOf(Id),
|
|
);
|
|
|
|
/// Set item
|
|
pub const Item = struct {
|
|
/// The value this item represents.
|
|
value: T = undefined,
|
|
|
|
/// Metadata for this item.
|
|
meta: Metadata = .{},
|
|
|
|
pub const Metadata = struct {
|
|
/// The bucket in the hash table where this item
|
|
/// is referenced.
|
|
bucket: Id = std.math.maxInt(Id),
|
|
|
|
/// The length of the probe sequence between this
|
|
/// item's starting bucket and the bucket it's in,
|
|
/// used for Robin Hood hashing.
|
|
psl: Id = 0,
|
|
|
|
/// The reference count for this item.
|
|
ref: RefCountInt = 0,
|
|
};
|
|
};
|
|
|
|
// Re-export these types so they can be referenced by the caller.
|
|
pub const Id = IdT;
|
|
pub const Context = ContextT;
|
|
|
|
/// A hash table of item indices
|
|
table: Offset(Id),
|
|
|
|
/// By keeping track of the max probe sequence length
|
|
/// we can bail out early when looking up values that
|
|
/// aren't present.
|
|
max_psl: Id = 0,
|
|
|
|
/// We keep track of how many items have a PSL of any
|
|
/// given length, so that we can shrink max_psl when
|
|
/// we delete items.
|
|
///
|
|
/// A probe sequence of length 32 or more is astronomically
|
|
/// unlikely. Roughly a (1/table_cap)^32 -- with any normal
|
|
/// table capacity that is so unlikely that it's not worth
|
|
/// handling.
|
|
psl_stats: [32]Id = [_]Id{0} ** 32,
|
|
|
|
/// The backing store of items
|
|
items: Offset(Item),
|
|
|
|
/// The number of living items currently stored in the set.
|
|
living: Id = 0,
|
|
|
|
/// The next index to store an item at.
|
|
/// Id 0 is reserved for unused items.
|
|
next_id: Id = 1,
|
|
|
|
layout: Layout,
|
|
|
|
/// An instance of the context structure.
|
|
context: Context,
|
|
|
|
/// Returns the memory layout for the given base offset and
|
|
/// desired capacity. The layout can be used by the caller to
|
|
/// determine how much memory to allocate, and the layout must
|
|
/// be used to initialize the set so that the set knows all
|
|
/// the offsets for the various buffers.
|
|
///
|
|
/// The capacity passed for cap will be used for the hash table,
|
|
/// which has a load factor of `0.8125` (13/16), so the number of
|
|
/// items which can actually be stored in the set will be smaller.
|
|
///
|
|
/// The laid out capacity will be at least `cap`, but may be higher,
|
|
/// since it is rounded up to the next power of 2 for efficiency.
|
|
///
|
|
/// The returned layout `cap` property will be 1 more than the number
|
|
/// of items that the set can actually store, since ID 0 is reserved.
|
|
pub fn layout(cap: usize) Layout {
|
|
// Experimentally, this load factor works quite well.
|
|
const load_factor = 0.8125;
|
|
|
|
assert(cap <= @as(usize, @intCast(std.math.maxInt(Id))) + 1);
|
|
|
|
// Zero-cap set is valid, return special case
|
|
if (cap == 0) return .{
|
|
.cap = 0,
|
|
.table_cap = 0,
|
|
.table_mask = 0,
|
|
.table_start = 0,
|
|
.items_start = 0,
|
|
.total_size = 0,
|
|
};
|
|
|
|
const table_cap: usize = std.math.ceilPowerOfTwoAssert(usize, cap);
|
|
const items_cap: usize = @intFromFloat(load_factor * @as(f64, @floatFromInt(table_cap)));
|
|
|
|
const table_mask: Id = @intCast((@as(usize, 1) << std.math.log2_int(usize, table_cap)) - 1);
|
|
|
|
const table_start = 0;
|
|
const table_end = table_start + table_cap * @sizeOf(Id);
|
|
|
|
const items_start = std.mem.alignForward(usize, table_end, @alignOf(Item));
|
|
const items_end = items_start + items_cap * @sizeOf(Item);
|
|
|
|
const total_size = items_end;
|
|
|
|
return .{
|
|
.cap = items_cap,
|
|
.table_cap = table_cap,
|
|
.table_mask = table_mask,
|
|
.table_start = table_start,
|
|
.items_start = items_start,
|
|
.total_size = total_size,
|
|
};
|
|
}
|
|
|
|
pub const Layout = struct {
|
|
cap: usize,
|
|
table_cap: usize,
|
|
table_mask: Id,
|
|
table_start: usize,
|
|
items_start: usize,
|
|
total_size: usize,
|
|
};
|
|
|
|
pub fn init(base: OffsetBuf, l: Layout, context: Context) Self {
|
|
const table = base.member(Id, l.table_start);
|
|
const items = base.member(Item, l.items_start);
|
|
|
|
@memset(table.ptr(base)[0..l.table_cap], 0);
|
|
@memset(items.ptr(base)[0..l.cap], .{});
|
|
|
|
return .{
|
|
.table = table,
|
|
.items = items,
|
|
.layout = l,
|
|
.context = context,
|
|
};
|
|
}
|
|
|
|
/// Possible errors for `add` and `addWithId`.
|
|
pub const AddError = error{
|
|
/// There is not enough memory to add a new item.
|
|
/// Remove items or grow and reinitialize.
|
|
OutOfMemory,
|
|
|
|
/// The set needs to be rehashed, as there are many dead
|
|
/// items with lower IDs which are inaccessible for re-use.
|
|
NeedsRehash,
|
|
};
|
|
|
|
/// Add an item to the set if not present and increment its ref count.
|
|
///
|
|
/// Returns the item's ID.
|
|
///
|
|
/// If the set has no more room, then an OutOfMemory error is returned.
|
|
pub fn add(self: *Self, base: anytype, value: T) AddError!Id {
|
|
return try self.addContext(base, value, self.context);
|
|
}
|
|
pub fn addContext(self: *Self, base: anytype, value: T, ctx: Context) AddError!Id {
|
|
const items = self.items.ptr(base);
|
|
|
|
// Trim dead items from the end of the list.
|
|
while (self.next_id > 1 and items[self.next_id - 1].meta.ref == 0) {
|
|
self.next_id -= 1;
|
|
self.deleteItem(base, self.next_id, ctx);
|
|
}
|
|
|
|
// If the item already exists, return it.
|
|
if (self.lookup(base, value, ctx)) |id| {
|
|
// Notify the context that the value is "deleted" because
|
|
// we're reusing the existing value in the set. This allows
|
|
// callers to clean up any resources associated with the value.
|
|
if (comptime @hasDecl(Context, "deleted")) ctx.deleted(value);
|
|
|
|
items[id].meta.ref += 1;
|
|
return id;
|
|
}
|
|
|
|
// If the item doesn't exist, we need an available ID.
|
|
if (self.next_id >= self.layout.cap) {
|
|
// Arbitrarily chosen, threshold for rehashing.
|
|
// If less than 90% of currently allocated IDs
|
|
// correspond to living items, we should rehash.
|
|
// Otherwise, claim we're out of memory because
|
|
// we assume that we'll end up running out of
|
|
// memory or rehashing again very soon if we
|
|
// rehash with only a few IDs left.
|
|
const rehash_threshold = 0.9;
|
|
if (self.living < @as(Id, @intFromFloat(@as(f64, @floatFromInt(self.layout.cap)) * rehash_threshold))) {
|
|
return AddError.NeedsRehash;
|
|
}
|
|
|
|
// If we don't have at least 10% dead items then
|
|
// we claim we're out of memory.
|
|
return AddError.OutOfMemory;
|
|
}
|
|
|
|
const id = self.insert(base, value, self.next_id, ctx);
|
|
items[id].meta.ref += 1;
|
|
assert(items[id].meta.ref == 1);
|
|
self.living += 1;
|
|
|
|
// Its possible insert returns a different ID by reusing a
|
|
// dead item so we only need to update next id if we used it.
|
|
if (id == self.next_id) self.next_id += 1;
|
|
|
|
return id;
|
|
}
|
|
|
|
/// Add an item to the set if not present and increment its
|
|
/// ref count. If possible, use the provided ID.
|
|
///
|
|
/// Returns the item's ID, or null if the provided ID was used.
|
|
///
|
|
/// If the set has no more room, then an OutOfMemory error is returned.
|
|
pub fn addWithId(self: *Self, base: anytype, value: T, id: Id) AddError!?Id {
|
|
return try self.addWithIdContext(base, value, id, self.context);
|
|
}
|
|
pub fn addWithIdContext(self: *Self, base: anytype, value: T, id: Id, ctx: Context) AddError!?Id {
|
|
const items = self.items.ptr(base);
|
|
|
|
assert(id > 0);
|
|
|
|
if (id < self.next_id) {
|
|
if (items[id].meta.ref == 0) {
|
|
self.deleteItem(base, id, ctx);
|
|
|
|
const added_id = self.upsert(base, value, id, ctx);
|
|
|
|
items[added_id].meta.ref += 1;
|
|
|
|
self.living += 1;
|
|
|
|
return if (added_id == id) null else added_id;
|
|
} else if (ctx.eql(value, items[id].value)) {
|
|
// Notify the context that the value is "deleted" because
|
|
// we're reusing the existing value in the set. This allows
|
|
// callers to clean up any resources associated with the value.
|
|
if (comptime @hasDecl(Context, "deleted")) ctx.deleted(value);
|
|
|
|
items[id].meta.ref += 1;
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
return try self.addContext(base, value, ctx);
|
|
}
|
|
|
|
/// Increment an item's reference count by 1.
|
|
///
|
|
/// Asserts that the item's reference count is greater than 0.
|
|
pub fn use(self: *const Self, base: anytype, id: Id) void {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
|
|
// If `use` is being called on an item with 0 references, then
|
|
// either someone forgot to call it before, released too early
|
|
// or lied about releasing. In any case something is wrong and
|
|
// shouldn't be allowed.
|
|
assert(item.meta.ref > 0);
|
|
|
|
item.meta.ref += 1;
|
|
}
|
|
|
|
/// Increment an item's reference count by a specified number.
|
|
///
|
|
/// Asserts that the item's reference count is greater than 0.
|
|
pub fn useMultiple(self: *const Self, base: anytype, id: Id, n: RefCountInt) void {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
|
|
// If `use` is being called on an item with 0 references, then
|
|
// either someone forgot to call it before, released too early
|
|
// or lied about releasing. In any case something is wrong and
|
|
// shouldn't be allowed.
|
|
assert(item.meta.ref > 0);
|
|
|
|
item.meta.ref += n;
|
|
}
|
|
|
|
/// Get an item by its ID without incrementing its reference count.
|
|
///
|
|
/// Asserts that the item's reference count is greater than 0.
|
|
pub fn get(self: *const Self, base: anytype, id: Id) *T {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
|
|
assert(item.meta.ref > 0);
|
|
|
|
return @ptrCast(&item.value);
|
|
}
|
|
|
|
/// Releases a reference to an item by its ID.
|
|
///
|
|
/// Asserts that the item's reference count is greater than 0.
|
|
pub fn release(self: *Self, base: anytype, id: Id) void {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
|
|
assert(item.meta.ref > 0);
|
|
item.meta.ref -= 1;
|
|
if (item.meta.ref == 0) self.living -= 1;
|
|
}
|
|
|
|
/// Release a specified number of references to an item by its ID.
|
|
///
|
|
/// Asserts that the item's reference count is at least `n`.
|
|
pub fn releaseMultiple(self: *Self, base: anytype, id: Id, n: Id) void {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
|
|
assert(item.meta.ref >= n);
|
|
item.meta.ref -= n;
|
|
|
|
if (item.meta.ref == 0) {
|
|
self.living -= 1;
|
|
}
|
|
}
|
|
|
|
/// Get the ref count for an item by its ID.
|
|
pub fn refCount(self: *const Self, base: anytype, id: Id) RefCountInt {
|
|
assert(id > 0);
|
|
assert(id < self.layout.cap);
|
|
|
|
const items = self.items.ptr(base);
|
|
const item = &items[id];
|
|
return item.meta.ref;
|
|
}
|
|
|
|
/// Get the current number of non-dead items in the set.
|
|
pub fn count(self: *const Self) usize {
|
|
return self.living;
|
|
}
|
|
|
|
/// Delete an item, removing any references from
|
|
/// the table, and freeing its ID to be re-used.
|
|
fn deleteItem(self: *Self, base: anytype, id: Id, ctx: Context) void {
|
|
const table = self.table.ptr(base);
|
|
const items = self.items.ptr(base);
|
|
|
|
const item = items[id];
|
|
|
|
if (item.meta.bucket > self.layout.table_cap) return;
|
|
|
|
if (table[item.meta.bucket] != id) return;
|
|
|
|
if (comptime @hasDecl(Context, "deleted")) {
|
|
// Inform the context struct that we're
|
|
// deleting the dead item's value for good.
|
|
ctx.deleted(item.value);
|
|
}
|
|
|
|
self.psl_stats[item.meta.psl] -= 1;
|
|
table[item.meta.bucket] = 0;
|
|
items[id] = .{};
|
|
|
|
var p: Id = item.meta.bucket;
|
|
var n: Id = (p +% 1) & self.layout.table_mask;
|
|
|
|
while (table[n] != 0 and items[table[n]].meta.psl > 0) {
|
|
items[table[n]].meta.bucket = p;
|
|
self.psl_stats[items[table[n]].meta.psl] -= 1;
|
|
items[table[n]].meta.psl -= 1;
|
|
self.psl_stats[items[table[n]].meta.psl] += 1;
|
|
table[p] = table[n];
|
|
p = n;
|
|
n = (p +% 1) & self.layout.table_mask;
|
|
}
|
|
|
|
while (self.max_psl > 0 and self.psl_stats[self.max_psl] == 0) {
|
|
self.max_psl -= 1;
|
|
}
|
|
|
|
table[p] = 0;
|
|
}
|
|
|
|
/// Find an item in the table and return its ID.
|
|
/// If the item does not exist in the table, null is returned.
|
|
fn lookup(self: *Self, base: anytype, value: T, ctx: Context) ?Id {
|
|
const table = self.table.ptr(base);
|
|
const items = self.items.ptr(base);
|
|
|
|
const hash: u64 = ctx.hash(value);
|
|
|
|
for (0..self.max_psl + 1) |i| {
|
|
const p: usize = @intCast((hash + i) & self.layout.table_mask);
|
|
const id = table[p];
|
|
|
|
// Empty bucket, our item cannot have probed to
|
|
// any point after this, meaning it's not present.
|
|
if (id == 0) {
|
|
return null;
|
|
}
|
|
|
|
const item = items[id];
|
|
|
|
// An item with a shorter probe sequence length would never
|
|
// end up in the middle of another sequence, since it would
|
|
// be swapped out if inserted before the new sequence, and
|
|
// would not be swapped in if inserted afterwards.
|
|
//
|
|
// As such, our item cannot be present.
|
|
if (item.meta.psl < i) {
|
|
return null;
|
|
}
|
|
|
|
// We don't bother checking dead items.
|
|
if (item.meta.ref == 0) {
|
|
continue;
|
|
}
|
|
|
|
// If the item is a part of the same probe sequence,
|
|
// we check if it matches the value we're looking for.
|
|
if (item.meta.psl == i and
|
|
ctx.eql(value, item.value))
|
|
{
|
|
return id;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// Find the provided value in the hash table, or add a new item
|
|
/// for it if not present. If a new item is added, `new_id` will
|
|
/// be used as the ID. If an existing item is found, the `new_id`
|
|
/// is ignored and the existing item's ID is returned.
|
|
fn upsert(self: *Self, base: anytype, value: T, new_id: Id, ctx: Context) Id {
|
|
// If the item already exists, return it.
|
|
if (self.lookup(base, value, ctx)) |id| {
|
|
// Notify the context that the value is "deleted" because
|
|
// we're reusing the existing value in the set. This allows
|
|
// callers to clean up any resources associated with the value.
|
|
if (comptime @hasDecl(Context, "deleted")) ctx.deleted(value);
|
|
|
|
return id;
|
|
}
|
|
|
|
return self.insert(base, value, new_id, ctx);
|
|
}
|
|
|
|
/// Insert the given value into the hash table with the given ID.
|
|
/// asserts that the value is not already present in the table.
|
|
fn insert(self: *Self, base: anytype, value: T, new_id: Id, ctx: Context) Id {
|
|
assert(self.lookup(base, value, ctx) == null);
|
|
|
|
const table = self.table.ptr(base);
|
|
const items = self.items.ptr(base);
|
|
|
|
// The new item that we'll put in to the table.
|
|
var new_item: Item = .{
|
|
.value = value,
|
|
.meta = .{ .psl = 0, .ref = 0 },
|
|
};
|
|
|
|
const hash: u64 = ctx.hash(value);
|
|
|
|
var held_id: Id = new_id;
|
|
var held_item: *Item = &new_item;
|
|
|
|
var chosen_p: ?Id = null;
|
|
var chosen_id: Id = new_id;
|
|
|
|
for (0..self.layout.table_cap - 1) |i| {
|
|
const p: Id = @intCast((hash + i) & self.layout.table_mask);
|
|
const id = table[p];
|
|
|
|
// Empty bucket, put our held item in to it and break.
|
|
if (id == 0) {
|
|
table[p] = held_id;
|
|
held_item.meta.bucket = p;
|
|
self.psl_stats[held_item.meta.psl] += 1;
|
|
self.max_psl = @max(self.max_psl, held_item.meta.psl);
|
|
break;
|
|
}
|
|
|
|
const item = &items[id];
|
|
|
|
// If there's a dead item then we resurrect it
|
|
// for our value so that we can re-use its ID.
|
|
if (item.meta.ref == 0) {
|
|
if (comptime @hasDecl(Context, "deleted")) {
|
|
// Inform the context struct that we're
|
|
// deleting the dead item's value for good.
|
|
ctx.deleted(item.value);
|
|
}
|
|
|
|
chosen_id = id;
|
|
|
|
held_item.meta.bucket = p;
|
|
self.psl_stats[item.meta.psl] -= 1;
|
|
self.psl_stats[held_item.meta.psl] += 1;
|
|
self.max_psl = @max(self.max_psl, held_item.meta.psl);
|
|
|
|
// If we're not still holding our new item then we
|
|
// need to make sure that we put the re-used ID in
|
|
// the right place, where we previously put new_id.
|
|
if (chosen_p) |c| {
|
|
table[c] = id;
|
|
table[p] = held_id;
|
|
} else {
|
|
// If we're still holding our new item then we
|
|
// don't actually have to do anything, because
|
|
// the table already has the correct ID here.
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
// This item has a lower PSL, swap it out with our held item.
|
|
if (item.meta.psl < held_item.meta.psl) {
|
|
if (held_id == new_id) {
|
|
chosen_p = p;
|
|
new_item.meta.bucket = p;
|
|
}
|
|
|
|
table[p] = held_id;
|
|
items[held_id].meta.bucket = p;
|
|
self.psl_stats[held_item.meta.psl] += 1;
|
|
self.max_psl = @max(self.max_psl, held_item.meta.psl);
|
|
|
|
held_id = id;
|
|
held_item = item;
|
|
self.psl_stats[item.meta.psl] -= 1;
|
|
}
|
|
|
|
// Advance to the next probe position for our held item.
|
|
held_item.meta.psl += 1;
|
|
}
|
|
|
|
items[chosen_id] = new_item;
|
|
return chosen_id;
|
|
}
|
|
};
|
|
}
|