ghostty/src/terminal/ref_counted_set.zig

const std = @import("std");
const assert = std.debug.assert;

const size = @import("size.zig");
const Offset = size.Offset;
const OffsetBuf = size.OffsetBuf;

const fastmem = @import("../fastmem.zig");

/// A reference counted set.
///
/// This set is created with some capacity in mind. You can determine
/// the exact memory requirement of a given capacity by calling `layout`
/// and checking the total size.
///
/// When the set exceeds capacity, an `OutOfMemory` or `NeedsRehash` error
/// is returned from any memory-using methods. The caller is responsible
/// for determining a path forward.
///
/// This set is reference counted. Each item in the set has an associated
/// reference count. The caller is responsible for calling release for an
/// item when it is no longer being used. Items with 0 references will be
/// kept until another item is written to their bucket. This allows items
/// to be ressurected if they are re-added before they get overwritten.
///
/// The backing data structure of this set is an open addressed hash table
/// with linear probing and Robin Hood hashing, and a flat array of items.
///
/// The table maps values to item IDs, which are indices in the item array
/// which contain the item's value and its reference count. Item IDs can be
/// used to efficiently access an item and update its reference count after
/// it has been added to the table, to avoid having to use the hash map to
/// look the value back up.
///
/// ID 0 is reserved and will never be assigned.
///
/// Parameters:
///
/// `Context`
///   A type containing methods to define behaviors.
///   - `fn hash(*Context, T) u64`    - Return a hash for an item.
///   - `fn eql(*Context, T, T) bool` - Check two items for equality.
///   - `fn deleted(*Context, T) void` - [OPTIONAL] Deletion callback.
///     If present, called whenever an item is finally deleted.
///     Useful if the item has memory that needs to be freed.
///
pub fn RefCountedSet(
    comptime T: type,
    comptime IdT: type,
    comptime RefCountInt: type,
    comptime ContextT: type,
) type {
    return struct {
        const Self = @This();

        pub const base_align = @max(
            @alignOf(Context),
            @alignOf(Layout),
            @alignOf(Item),
            @alignOf(Id),
        );

        /// Set item
        pub const Item = struct {
            /// The value this item represents.
            value: T = undefined,

            /// Metadata for this item.
            meta: Metadata = .{},

            pub const Metadata = struct {
                /// The bucket in the hash table where this item
                /// is referenced.
                bucket: Id = std.math.maxInt(Id),

                /// The length of the probe sequence between this
                /// item's starting bucket and the bucket it's in,
                /// used for Robin Hood hashing.
                psl: Id = 0,

                /// The reference count for this item.
                ref: RefCountInt = 0,
            };
        };

        // Re-export these types so they can be referenced by the caller.
        pub const Id = IdT;
        pub const Context = ContextT;

        /// A hash table of item indices
        table: Offset(Id),

        /// By keeping track of the max probe sequence length
        /// we can bail out early when looking up values that
        /// aren't present.
        max_psl: Id = 0,

        /// We keep track of how many items have a PSL of any
        /// given length, so that we can shrink max_psl when
        /// we delete items.
        ///
        /// A probe sequence of length 32 or more is astronomically
        /// unlikely. Roughly a (1/table_cap)^32 -- with any normal
        /// table capacity that is so unlikely that it's not worth
        /// handling.
        psl_stats: [32]Id = [_]Id{0} ** 32,

        /// The backing store of items
        items: Offset(Item),

        /// The number of living items currently stored in the set.
        living: Id = 0,

        /// The next index to store an item at.
        /// Id 0 is reserved for unused items.
        next_id: Id = 1,

        layout: Layout,

        /// An instance of the context structure.
        context: Context,

        /// Returns the memory layout for the given base offset and
        /// desired capacity. The layout can be used by the caller to
        /// determine how much memory to allocate, and the layout must
        /// be used to initialize the set so that the set knows all
        /// the offsets for the various buffers.
        ///
        /// The capacity passed for cap will be used for the hash table,
        /// which has a load factor of `0.8125` (13/16), so the number of
        /// items which can actually be stored in the set will be smaller.
        ///
        /// The laid out capacity will be at least `cap`, but may be higher,
        /// since it is rounded up to the next power of 2 for efficiency.
        ///
        /// The returned layout `cap` property will be 1 more than the number
        /// of items that the set can actually store, since ID 0 is reserved.
        pub fn layout(cap: usize) Layout {
            // Experimentally, this load factor works quite well.
            const load_factor = 0.8125;

            assert(cap <= @as(usize, @intCast(std.math.maxInt(Id))) + 1);

            const table_cap: usize = std.math.ceilPowerOfTwoAssert(usize, cap);
            const items_cap: usize = @intFromFloat(load_factor * @as(f64, @floatFromInt(table_cap)));

            const table_mask: Id = @intCast((@as(usize, 1) << std.math.log2_int(usize, table_cap)) - 1);

            const table_start = 0;
            const table_end = table_start + table_cap * @sizeOf(Id);

            const items_start = std.mem.alignForward(usize, table_end, @alignOf(Item));
            const items_end = items_start + items_cap * @sizeOf(Item);

            const total_size = items_end;

            return .{
                .cap = items_cap,
                .table_cap = table_cap,
                .table_mask = table_mask,
                .table_start = table_start,
                .items_start = items_start,
                .total_size = total_size,
            };
        }

        pub const Layout = struct {
            cap: usize,
            table_cap: usize,
            table_mask: Id,
            table_start: usize,
            items_start: usize,
            total_size: usize,
        };

        pub fn init(base: OffsetBuf, l: Layout, context: Context) Self {
            const table = base.member(Id, l.table_start);
            const items = base.member(Item, l.items_start);

            @memset(table.ptr(base)[0..l.table_cap], 0);
            @memset(items.ptr(base)[0..l.cap], .{});

            return .{
                .table = table,
                .items = items,
                .layout = l,
                .context = context,
            };
        }

        /// Possible errors for `add` and `addWithId`.
        pub const AddError = error{
            /// There is not enough memory to add a new item.
            /// Remove items or grow and reinitialize.
            OutOfMemory,

            /// The set needs to be rehashed, as there are many dead
            /// items with lower IDs which are inaccessible for re-use.
            NeedsRehash,
        };

        /// Add an item to the set if not present and increment its ref count.
        ///
        /// Returns the item's ID.
        ///
        /// If the set has no more room, then an OutOfMemory error is returned.
        pub fn add(self: *Self, base: anytype, value: T) AddError!Id {
            const items = self.items.ptr(base);

            // Trim dead items from the end of the list.
            while (self.next_id > 1 and items[self.next_id - 1].meta.ref == 0) {
                self.next_id -= 1;
                self.deleteItem(base, self.next_id);
            }

            // If we still don't have an available ID, we can't continue.
            if (self.next_id >= self.layout.cap) {
                // Arbitrarily chosen, threshold for rehashing.
                // If less than 90% of currently allocated IDs
                // correspond to living items, we should rehash.
                // Otherwise, claim we're out of memory because
                // we assume that we'll end up running out of
                // memory or rehashing again very soon if we
                // rehash with only a few IDs left.
                const rehash_threshold = 0.9;
                if (self.living < @as(Id, @intFromFloat(@as(f64, @floatFromInt(self.layout.cap)) * rehash_threshold))) {
                    return AddError.NeedsRehash;
                }

                // If we don't have at least 10% dead items then
                // we claim we're out of memory.
                return AddError.OutOfMemory;
            }

            const id = self.upsert(base, value, self.next_id);
            items[id].meta.ref += 1;

            if (id == self.next_id) self.next_id += 1;

            if (items[id].meta.ref == 1) {
                self.living += 1;
            }

            return id;
        }

        /// Add an item to the set if not present and increment its
        /// ref count. If possible, use the provided ID.
        ///
        /// Returns the item's ID, or null if the provided ID was used.
        ///
        /// If the set has no more room, then an OutOfMemory error is returned.
        pub fn addWithId(self: *Self, base: anytype, value: T, id: Id) AddError!?Id {
            const items = self.items.ptr(base);

            if (id < self.next_id) {
                if (items[id].meta.ref == 0) {
                    self.deleteItem(base, id);

                    const added_id = self.upsert(base, value, id);

                    items[added_id].meta.ref += 1;

                    self.living += 1;

                    return if (added_id == id) null else added_id;
                } else if (self.context.eql(base, value, items[id].value)) {
                    items[id].meta.ref += 1;

                    return null;
                }
            }

            return try self.add(base, value);
        }

        /// Increment an item's reference count by 1.
        ///
        /// Asserts that the item's reference count is greater than 0.
        pub fn use(self: *const Self, base: anytype, id: Id) void {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];

            // If `use` is being called on an item with 0 references, then
            // either someone forgot to call it before, released too early
            // or lied about releasing. In any case something is wrong and
            // shouldn't be allowed.
            assert(item.meta.ref > 0);

            item.meta.ref += 1;
        }

        /// Increment an item's reference count by a specified number.
        ///
        /// Asserts that the item's reference count is greater than 0.
        pub fn useMultiple(self: *const Self, base: anytype, id: Id, n: RefCountInt) void {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];

            // If `use` is being called on an item with 0 references, then
            // either someone forgot to call it before, released too early
            // or lied about releasing. In any case something is wrong and
            // shouldn't be allowed.
            assert(item.meta.ref > 0);

            item.meta.ref += n;
        }

        /// Get an item by its ID without incrementing its reference count.
        ///
        /// Asserts that the item's reference count is greater than 0.
        pub fn get(self: *const Self, base: anytype, id: Id) *T {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];

            assert(item.meta.ref > 0);

            return @ptrCast(&item.value);
        }

        /// Releases a reference to an item by its ID.
        ///
        /// Asserts that the item's reference count is greater than 0.
        pub fn release(self: *Self, base: anytype, id: Id) void {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];

            assert(item.meta.ref > 0);
            item.meta.ref -= 1;
            if (item.meta.ref == 0) self.living -= 1;
        }

        /// Release a specified number of references to an item by its ID.
        ///
        /// Asserts that the item's reference count is at least `n`.
        pub fn releaseMultiple(self: *Self, base: anytype, id: Id, n: Id) void {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];

            assert(item.meta.ref >= n);
            item.meta.ref -= n;

            if (item.meta.ref == 0) {
                self.living -= 1;
            }
        }

        /// Get the ref count for an item by its ID.
        pub fn refCount(self: *const Self, base: anytype, id: Id) RefCountInt {
            assert(id > 0);
            assert(id < self.layout.cap);

            const items = self.items.ptr(base);
            const item = &items[id];
            return item.meta.ref;
        }

        /// Get the current number of non-dead items in the set.
        pub fn count(self: *const Self) usize {
            return self.living;
        }

        /// Delete an item, removing any references from
        /// the table, and freeing its ID to be re-used.
        fn deleteItem(self: *Self, base: anytype, id: Id) void {
            const table = self.table.ptr(base);
            const items = self.items.ptr(base);

            const item = items[id];

            if (item.meta.bucket > self.layout.table_cap) return;

            if (table[item.meta.bucket] != id) return;

            if (comptime @hasDecl(Context, "deleted")) {
                // Inform the context struct that we're
                // deleting the dead item's value for good.
                self.context.deleted(base, item.value);
            }

            self.psl_stats[item.meta.psl] -= 1;
            table[item.meta.bucket] = 0;
            items[id] = .{};

            var p: Id = item.meta.bucket;
            var n: Id = (p +% 1) & self.layout.table_mask;

            while (table[n] != 0 and items[table[n]].meta.psl > 0) {
                items[table[n]].meta.bucket = p;
                self.psl_stats[items[table[n]].meta.psl] -= 1;
                items[table[n]].meta.psl -= 1;
                self.psl_stats[items[table[n]].meta.psl] += 1;
                table[p] = table[n];
                p = n;
                n = (p +% 1) & self.layout.table_mask;
            }

            while (self.max_psl > 0 and self.psl_stats[self.max_psl] == 0) {
                self.max_psl -= 1;
            }

            table[p] = 0;
        }

        /// Find an item in the table and return its ID.
        /// If the item does not exist in the table, null is returned.
        fn lookup(self: *Self, base: anytype, value: T) ?Id {
            const table = self.table.ptr(base);
            const items = self.items.ptr(base);

            const hash: u64 = self.context.hash(base, value);

            for (0..self.max_psl + 1) |i| {
                const p: usize = @intCast((hash + i) & self.layout.table_mask);
                const id = table[p];

                // Empty bucket, our item cannot have probed to
                // any point after this, meaning it's not present.
                if (id == 0) {
                    return null;
                }

                const item = items[id];

                // An item with a shorter probe sequence length would never
                // end up in the middle of another sequence, since it would
                // be swapped out if inserted before the new sequence, and
                // would not be swapped in if inserted afterwards.
                //
                // As such, our item cannot be present.
                if (item.meta.psl < i) {
                    return null;
                }

                // We don't bother checking dead items.
                if (item.meta.ref == 0) {
                    continue;
                }

                // If the item is a part of the same probe sequence,
                // we check if it matches the value we're looking for.
                if (item.meta.psl == i and
                    self.context.eql(base, value, item.value))
                {
                    return id;
                }
            }

            return null;
        }

        /// Find the provided value in the hash table, or add a new item
        /// for it if not present. If a new item is added, `new_id` will
        /// be used as the ID. If an existing item is found, the `new_id`
        /// is ignored and the existing item's ID is returned.
        fn upsert(self: *Self, base: anytype, value: T, new_id: Id) Id {
            // If the item already exists, return it.
            if (self.lookup(base, value)) |id| return id;

            const table = self.table.ptr(base);
            const items = self.items.ptr(base);

            // The new item that we'll put in to the table.
            var new_item: Item = .{
                .value = value,
                .meta = .{ .psl = 0, .ref = 0 },
            };

            const hash: u64 = self.context.hash(base, value);

            var held_id: Id = new_id;
            var held_item: *Item = &new_item;

            var chosen_p: ?Id = null;
            var chosen_id: Id = new_id;

            for (0..self.layout.table_cap - 1) |i| {
                const p: Id = @intCast((hash + i) & self.layout.table_mask);
                const id = table[p];

                // Empty bucket, put our held item in to it and break.
                if (id == 0) {
                    table[p] = held_id;
                    held_item.meta.bucket = p;
                    self.psl_stats[held_item.meta.psl] += 1;
                    self.max_psl = @max(self.max_psl, held_item.meta.psl);
                    break;
                }

                const item = &items[id];

                // If there's a dead item then we resurrect it
                // for our value so that we can re-use its ID.
                if (item.meta.ref == 0) {
                    if (comptime @hasDecl(Context, "deleted")) {
                        // Inform the context struct that we're
                        // deleting the dead item's value for good.
                        self.context.deleted(base, item.value);
                    }

                    chosen_id = id;

                    held_item.meta.bucket = p;
                    self.psl_stats[item.meta.psl] -= 1;
                    self.psl_stats[held_item.meta.psl] += 1;
                    self.max_psl = @max(self.max_psl, held_item.meta.psl);

                    // If we're not still holding our new item then we
                    // need to make sure that we put the re-used ID in
                    // the right place, where we previously put new_id.
                    if (chosen_p) |c| {
                        table[c] = id;
                        table[p] = held_id;
                    } else {
                        // If we're still holding our new item then we
                        // don't actually have to do anything, because
                        // the table already has the correct ID here.
                    }

                    break;
                }

                // This item has a lower PSL, swap it out with our held item.
                if (item.meta.psl < held_item.meta.psl) {
                    if (held_id == new_id) {
                        chosen_p = p;
                        new_item.meta.bucket = p;
                    }

                    table[p] = held_id;
                    items[held_id].meta.bucket = p;
                    self.psl_stats[held_item.meta.psl] += 1;
                    self.max_psl = @max(self.max_psl, held_item.meta.psl);

                    held_id = id;
                    held_item = item;
                    self.psl_stats[item.meta.psl] -= 1;
                }

                // Advance to the next probe position for our held item.
                held_item.meta.psl += 1;
            }

            items[chosen_id] = new_item;
            return chosen_id;
        }
    };
}