ghostty/src/terminal/hash_map.zig
2024-07-05 21:39:07 -07:00

1514 lines
54 KiB
Zig

//! This file contains a fork of the Zig stdlib HashMap implementation tuned
//! for use with our terminal page representation.
//!
//! The main goal we need to achieve that wasn't possible with the stdlib
//! HashMap is to utilize offsets rather than full pointers so that we can
//! copy around the entire backing memory and keep the hash map working.
//!
//! Additionally, for serialization/deserialization purposes, we need to be
//! able to create a HashMap instance and manually set the offsets up. The
//! stdlib HashMap does not export Metadata so this isn't possible.
//!
//! Also, I want to be able to understand possible capacity for a given K,V
//! type and fixed memory amount. The stdlib HashMap doesn't publish its
//! internal allocation size calculation.
//!
//! Finally, I removed many of the APIs that we'll never require for our
//! usage just so that this file is smaller, easier to understand, and has
//! less opportunity for bugs.
//!
//! Besides these shortcomings, the stdlib HashMap has some great qualities
//! that we want to keep, namely the fact that it is backed by a single large
//! allocation rather than pointers to separate allocations. This is important
//! because our terminal page representation is backed by a single large
//! allocation so we can give the HashMap a slice of memory to operate in.
//!
//! I haven't carefully benchmarked this implementation against other hash
//! map implementations. It's possible using some of the newer variants out
//! there would be better. However, I trust the built-in version is pretty good
//! and its more important to get the terminal page representation working
//! first then we can measure and improve this later if we find it to be a
//! bottleneck.
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const autoHash = std.hash.autoHash;
const math = std.math;
const mem = std.mem;
const Allocator = mem.Allocator;
const Wyhash = std.hash.Wyhash;
const Offset = @import("size.zig").Offset;
const OffsetBuf = @import("size.zig").OffsetBuf;
const getOffset = @import("size.zig").getOffset;
pub fn AutoOffsetHashMap(comptime K: type, comptime V: type) type {
return OffsetHashMap(K, V, AutoContext(K));
}
fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type {
return HashMapUnmanaged(K, V, AutoContext(K));
}
fn AutoContext(comptime K: type) type {
return struct {
pub const hash = std.hash_map.getAutoHashFn(K, @This());
pub const eql = std.hash_map.getAutoEqlFn(K, @This());
};
}
/// A HashMap type that uses offsets rather than pointers, making it
/// possible to efficiently move around the backing memory without
/// invalidating the HashMap.
pub fn OffsetHashMap(
comptime K: type,
comptime V: type,
comptime Context: type,
) type {
return struct {
const Self = @This();
/// This is the pointer-based map that we're wrapping.
pub const Unmanaged = HashMapUnmanaged(K, V, Context);
pub const Layout = Unmanaged.Layout;
/// This is the alignment that the base pointer must have.
pub const base_align = Unmanaged.base_align;
metadata: Offset(Unmanaged.Metadata) = .{},
/// Returns the total size of the backing memory required for a
/// HashMap with the given capacity. The base ptr must also be
/// aligned to base_align.
pub fn layout(cap: Unmanaged.Size) Layout {
return Unmanaged.layoutForCapacity(cap);
}
/// Initialize a new HashMap with the given capacity and backing
/// memory. The backing memory must be aligned to base_align.
pub fn init(buf: OffsetBuf, l: Layout) Self {
assert(@intFromPtr(buf.start()) % base_align == 0);
const m = Unmanaged.init(buf, l);
return .{ .metadata = getOffset(
Unmanaged.Metadata,
buf,
@ptrCast(m.metadata.?),
) };
}
/// Returns the pointer-based map from a base pointer.
pub fn map(self: Self, base: anytype) Unmanaged {
return .{ .metadata = self.metadata.ptr(base) };
}
};
}
/// Fork of stdlib.HashMap as of Zig 0.12 modified to to use offsets
/// for the key/values pointer. The metadata is still a pointer to limit
/// the amount of arithmetic required to access it. See the file comment
/// for full details.
fn HashMapUnmanaged(
comptime K: type,
comptime V: type,
comptime Context: type,
) type {
return struct {
const Self = @This();
comptime {
std.hash_map.verifyContext(Context, K, K, u64, false);
assert(@alignOf(Metadata) == 1);
}
const header_align = @alignOf(Header);
const key_align = if (@sizeOf(K) == 0) 1 else @alignOf(K);
const val_align = if (@sizeOf(V) == 0) 1 else @alignOf(V);
const base_align = @max(header_align, key_align, val_align);
// This is actually a midway pointer to the single buffer containing
// a `Header` field, the `Metadata`s and `Entry`s.
// At `-@sizeOf(Header)` is the Header field.
// At `sizeOf(Metadata) * capacity + offset`, which is pointed to by
// self.header().entries, is the array of entries.
// This means that the hashmap only holds one live allocation, to
// reduce memory fragmentation and struct size.
/// Pointer to the metadata.
metadata: ?[*]Metadata = null,
// This is purely empirical and not a /very smart magic constant™/.
/// Capacity of the first grow when bootstrapping the hashmap.
const minimal_capacity = 8;
// This hashmap is specially designed for sizes that fit in a u32.
pub const Size = u32;
// u64 hashes guarantee us that the fingerprint bits will never be used
// to compute the index of a slot, maximizing the use of entropy.
pub const Hash = u64;
pub const Entry = struct {
key_ptr: *K,
value_ptr: *V,
};
pub const KV = struct {
key: K,
value: V,
};
const Header = struct {
/// The keys/values offset are relative to the metadata
values: Offset(V),
keys: Offset(K),
capacity: Size,
size: Size,
};
/// Metadata for a slot. It can be in three states: empty, used or
/// tombstone. Tombstones indicate that an entry was previously used,
/// they are a simple way to handle removal.
/// To this state, we add 7 bits from the slot's key hash. These are
/// used as a fast way to disambiguate between entries without
/// having to use the equality function. If two fingerprints are
/// different, we know that we don't have to compare the keys at all.
/// The 7 bits are the highest ones from a 64 bit hash. This way, not
/// only we use the `log2(capacity)` lowest bits from the hash to determine
/// a slot index, but we use 7 more bits to quickly resolve collisions
/// when multiple elements with different hashes end up wanting to be in the same slot.
/// Not using the equality function means we don't have to read into
/// the entries array, likely avoiding a cache miss and a potentially
/// costly function call.
const Metadata = packed struct {
const FingerPrint = u7;
const free: FingerPrint = 0;
const tombstone: FingerPrint = 1;
fingerprint: FingerPrint = free,
used: u1 = 0,
const slot_free = @as(u8, @bitCast(Metadata{ .fingerprint = free }));
const slot_tombstone = @as(u8, @bitCast(Metadata{ .fingerprint = tombstone }));
pub fn isUsed(self: Metadata) bool {
return self.used == 1;
}
pub fn isTombstone(self: Metadata) bool {
return @as(u8, @bitCast(self)) == slot_tombstone;
}
pub fn isFree(self: Metadata) bool {
return @as(u8, @bitCast(self)) == slot_free;
}
pub fn takeFingerprint(hash: Hash) FingerPrint {
const hash_bits = @typeInfo(Hash).Int.bits;
const fp_bits = @typeInfo(FingerPrint).Int.bits;
return @as(FingerPrint, @truncate(hash >> (hash_bits - fp_bits)));
}
pub fn fill(self: *Metadata, fp: FingerPrint) void {
self.used = 1;
self.fingerprint = fp;
}
pub fn remove(self: *Metadata) void {
self.used = 0;
self.fingerprint = tombstone;
}
};
comptime {
assert(@sizeOf(Metadata) == 1);
assert(@alignOf(Metadata) == 1);
}
pub const Iterator = struct {
hm: *const Self,
index: Size = 0,
pub fn next(it: *Iterator) ?Entry {
assert(it.index <= it.hm.capacity());
if (it.hm.header().size == 0) return null;
const cap = it.hm.capacity();
const end = it.hm.metadata.? + cap;
var metadata = it.hm.metadata.? + it.index;
while (metadata != end) : ({
metadata += 1;
it.index += 1;
}) {
if (metadata[0].isUsed()) {
const key = &it.hm.keys()[it.index];
const value = &it.hm.values()[it.index];
it.index += 1;
return Entry{ .key_ptr = key, .value_ptr = value };
}
}
return null;
}
};
pub const KeyIterator = FieldIterator(K);
pub const ValueIterator = FieldIterator(V);
fn FieldIterator(comptime T: type) type {
return struct {
len: usize,
metadata: [*]const Metadata,
items: [*]T,
pub fn next(self: *@This()) ?*T {
while (self.len > 0) {
self.len -= 1;
const used = self.metadata[0].isUsed();
const item = &self.items[0];
self.metadata += 1;
self.items += 1;
if (used) {
return item;
}
}
return null;
}
};
}
pub const GetOrPutResult = struct {
key_ptr: *K,
value_ptr: *V,
found_existing: bool,
};
/// Initialize a hash map with a given capacity and a buffer. The
/// buffer must fit within the size defined by `layoutForCapacity`.
pub fn init(buf: OffsetBuf, layout: Layout) Self {
assert(@intFromPtr(buf.start()) % base_align == 0);
// Get all our main pointers
const metadata_buf = buf.rebase(@sizeOf(Header));
const metadata_ptr: [*]Metadata = @ptrCast(metadata_buf.start());
// Build our map
var map: Self = .{ .metadata = metadata_ptr };
const hdr = map.header();
hdr.capacity = layout.capacity;
hdr.size = 0;
if (@sizeOf([*]K) != 0) hdr.keys = metadata_buf.member(K, layout.keys_start);
if (@sizeOf([*]V) != 0) hdr.values = metadata_buf.member(V, layout.vals_start);
map.initMetadatas();
return map;
}
pub fn ensureTotalCapacity(self: *Self, new_size: Size) Allocator.Error!void {
if (new_size > self.header().size) {
try self.growIfNeeded(new_size - self.header().size);
}
}
pub fn ensureUnusedCapacity(self: *Self, additional_size: Size) Allocator.Error!void {
return ensureTotalCapacity(self, self.count() + additional_size);
}
pub fn clearRetainingCapacity(self: *Self) void {
if (self.metadata) |_| {
self.initMetadatas();
self.header().size = 0;
}
}
pub fn count(self: *const Self) Size {
return self.header().size;
}
fn header(self: *const Self) *Header {
return @ptrCast(@as([*]Header, @ptrCast(@alignCast(self.metadata.?))) - 1);
}
fn keys(self: *const Self) [*]K {
return self.header().keys.ptr(self.metadata.?);
}
fn values(self: *const Self) [*]V {
return self.header().values.ptr(self.metadata.?);
}
pub fn capacity(self: *const Self) Size {
if (self.metadata == null) return 0;
return self.header().capacity;
}
pub fn iterator(self: *const Self) Iterator {
return .{ .hm = self };
}
pub fn keyIterator(self: *const Self) KeyIterator {
if (self.metadata) |metadata| {
return .{
.len = self.capacity(),
.metadata = metadata,
.items = self.keys(),
};
} else {
return .{
.len = 0,
.metadata = undefined,
.items = undefined,
};
}
}
pub fn valueIterator(self: *const Self) ValueIterator {
if (self.metadata) |metadata| {
return .{
.len = self.capacity(),
.metadata = metadata,
.items = self.values(),
};
} else {
return .{
.len = 0,
.metadata = undefined,
.items = undefined,
};
}
}
/// Insert an entry in the map. Assumes it is not already present.
pub fn putNoClobber(self: *Self, key: K, value: V) Allocator.Error!void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putNoClobberContext instead.");
return self.putNoClobberContext(key, value, undefined);
}
pub fn putNoClobberContext(self: *Self, key: K, value: V, ctx: Context) Allocator.Error!void {
assert(!self.containsContext(key, ctx));
try self.growIfNeeded(1);
self.putAssumeCapacityNoClobberContext(key, value, ctx);
}
/// Asserts there is enough capacity to store the new key-value pair.
/// Clobbers any existing data. To detect if a put would clobber
/// existing data, see `getOrPutAssumeCapacity`.
pub fn putAssumeCapacity(self: *Self, key: K, value: V) void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putAssumeCapacityContext instead.");
return self.putAssumeCapacityContext(key, value, undefined);
}
pub fn putAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) void {
const gop = self.getOrPutAssumeCapacityContext(key, ctx);
gop.value_ptr.* = value;
}
/// Insert an entry in the map. Assumes it is not already present,
/// and that no allocation is needed.
pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putAssumeCapacityNoClobberContext instead.");
return self.putAssumeCapacityNoClobberContext(key, value, undefined);
}
pub fn putAssumeCapacityNoClobberContext(self: *Self, key: K, value: V, ctx: Context) void {
assert(!self.containsContext(key, ctx));
const hash = ctx.hash(key);
const mask = self.capacity() - 1;
var idx = @as(usize, @truncate(hash & mask));
var metadata = self.metadata.? + idx;
while (metadata[0].isUsed()) {
idx = (idx + 1) & mask;
metadata = self.metadata.? + idx;
}
const fingerprint = Metadata.takeFingerprint(hash);
metadata[0].fill(fingerprint);
self.keys()[idx] = key;
self.values()[idx] = value;
self.header().size += 1;
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
pub fn fetchPut(self: *Self, key: K, value: V) Allocator.Error!?KV {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call fetchPutContext instead.");
return self.fetchPutContext(key, value, undefined);
}
pub fn fetchPutContext(self: *Self, key: K, value: V, ctx: Context) Allocator.Error!?KV {
const gop = try self.getOrPutContext(key, ctx);
var result: ?KV = null;
if (gop.found_existing) {
result = KV{
.key = gop.key_ptr.*,
.value = gop.value_ptr.*,
};
}
gop.value_ptr.* = value;
return result;
}
/// Inserts a new `Entry` into the hash map, returning the previous one, if any.
/// If insertion happens, asserts there is enough capacity without allocating.
pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?KV {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call fetchPutAssumeCapacityContext instead.");
return self.fetchPutAssumeCapacityContext(key, value, undefined);
}
pub fn fetchPutAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) ?KV {
const gop = self.getOrPutAssumeCapacityContext(key, ctx);
var result: ?KV = null;
if (gop.found_existing) {
result = KV{
.key = gop.key_ptr.*,
.value = gop.value_ptr.*,
};
}
gop.value_ptr.* = value;
return result;
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and then returned from this function.
pub fn fetchRemove(self: *Self, key: K) ?KV {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call fetchRemoveContext instead.");
return self.fetchRemoveContext(key, undefined);
}
pub fn fetchRemoveContext(self: *Self, key: K, ctx: Context) ?KV {
return self.fetchRemoveAdapted(key, ctx);
}
pub fn fetchRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV {
if (self.getIndex(key, ctx)) |idx| {
const old_key = &self.keys()[idx];
const old_val = &self.values()[idx];
const result = KV{
.key = old_key.*,
.value = old_val.*,
};
self.metadata.?[idx].remove();
old_key.* = undefined;
old_val.* = undefined;
self.header().size -= 1;
return result;
}
return null;
}
/// Find the index containing the data for the given key.
/// Whether this function returns null is almost always
/// branched on after this function returns, and this function
/// returns null/not null from separate code paths. We
/// want the optimizer to remove that branch and instead directly
/// fuse the basic blocks after the branch to the basic blocks
/// from this function. To encourage that, this function is
/// marked as inline.
inline fn getIndex(self: Self, key: anytype, ctx: anytype) ?usize {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash, false);
if (self.header().size == 0) {
return null;
}
// If you get a compile error on this line, it means that your generic hash
// function is invalid for these parameters.
const hash = ctx.hash(key);
// verifyContext can't verify the return type of generic hash functions,
// so we need to double-check it here.
if (@TypeOf(hash) != Hash) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type! " ++ @typeName(Hash) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));
}
const mask = self.capacity() - 1;
const fingerprint = Metadata.takeFingerprint(hash);
// Don't loop indefinitely when there are no empty slots.
var limit = self.capacity();
var idx = @as(usize, @truncate(hash & mask));
var metadata = self.metadata.? + idx;
while (!metadata[0].isFree() and limit != 0) {
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
const test_key = &self.keys()[idx];
// If you get a compile error on this line, it means that your generic eql
// function is invalid for these parameters.
const eql = ctx.eql(key, test_key.*);
// verifyContext can't verify the return type of generic eql functions,
// so we need to double-check it here.
if (@TypeOf(eql) != bool) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type! bool was expected, but found " ++ @typeName(@TypeOf(eql)));
}
if (eql) {
return idx;
}
}
limit -= 1;
idx = (idx + 1) & mask;
metadata = self.metadata.? + idx;
}
return null;
}
pub fn getEntry(self: Self, key: K) ?Entry {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getEntryContext instead.");
return self.getEntryContext(key, undefined);
}
pub fn getEntryContext(self: Self, key: K, ctx: Context) ?Entry {
return self.getEntryAdapted(key, ctx);
}
pub fn getEntryAdapted(self: Self, key: anytype, ctx: anytype) ?Entry {
if (self.getIndex(key, ctx)) |idx| {
return Entry{
.key_ptr = &self.keys()[idx],
.value_ptr = &self.values()[idx],
};
}
return null;
}
/// Insert an entry if the associated key is not already present, otherwise update preexisting value.
pub fn put(self: *Self, key: K, value: V) Allocator.Error!void {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call putContext instead.");
return self.putContext(key, value, undefined);
}
pub fn putContext(self: *Self, key: K, value: V, ctx: Context) Allocator.Error!void {
const result = try self.getOrPutContext(key, ctx);
result.value_ptr.* = value;
}
/// Get an optional pointer to the actual key associated with adapted key, if present.
pub fn getKeyPtr(self: Self, key: K) ?*K {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getKeyPtrContext instead.");
return self.getKeyPtrContext(key, undefined);
}
pub fn getKeyPtrContext(self: Self, key: K, ctx: Context) ?*K {
return self.getKeyPtrAdapted(key, ctx);
}
pub fn getKeyPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*K {
if (self.getIndex(key, ctx)) |idx| {
return &self.keys()[idx];
}
return null;
}
/// Get a copy of the actual key associated with adapted key, if present.
pub fn getKey(self: Self, key: K) ?K {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getKeyContext instead.");
return self.getKeyContext(key, undefined);
}
pub fn getKeyContext(self: Self, key: K, ctx: Context) ?K {
return self.getKeyAdapted(key, ctx);
}
pub fn getKeyAdapted(self: Self, key: anytype, ctx: anytype) ?K {
if (self.getIndex(key, ctx)) |idx| {
return self.keys()[idx];
}
return null;
}
/// Get an optional pointer to the value associated with key, if present.
pub fn getPtr(self: Self, key: K) ?*V {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getPtrContext instead.");
return self.getPtrContext(key, undefined);
}
pub fn getPtrContext(self: Self, key: K, ctx: Context) ?*V {
return self.getPtrAdapted(key, ctx);
}
pub fn getPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*V {
if (self.getIndex(key, ctx)) |idx| {
return &self.values()[idx];
}
return null;
}
/// Get a copy of the value associated with key, if present.
pub fn get(self: Self, key: K) ?V {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getContext instead.");
return self.getContext(key, undefined);
}
pub fn getContext(self: Self, key: K, ctx: Context) ?V {
return self.getAdapted(key, ctx);
}
pub fn getAdapted(self: Self, key: anytype, ctx: anytype) ?V {
if (self.getIndex(key, ctx)) |idx| {
return self.values()[idx];
}
return null;
}
pub fn getOrPut(self: *Self, key: K) Allocator.Error!GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutContext instead.");
return self.getOrPutContext(key, undefined);
}
pub fn getOrPutContext(self: *Self, key: K, ctx: Context) Allocator.Error!GetOrPutResult {
const gop = try self.getOrPutContextAdapted(key, ctx);
if (!gop.found_existing) {
gop.key_ptr.* = key;
}
return gop;
}
pub fn getOrPutAdapted(self: *Self, key: anytype, key_ctx: anytype) Allocator.Error!GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutContextAdapted instead.");
return self.getOrPutContextAdapted(key, key_ctx);
}
pub fn getOrPutContextAdapted(self: *Self, key: anytype, key_ctx: anytype) Allocator.Error!GetOrPutResult {
self.growIfNeeded(1) catch |err| {
// If allocation fails, try to do the lookup anyway.
// If we find an existing item, we can return it.
// Otherwise return the error, we could not add another.
const index = self.getIndex(key, key_ctx) orelse return err;
return GetOrPutResult{
.key_ptr = &self.keys()[index],
.value_ptr = &self.values()[index],
.found_existing = true,
};
};
return self.getOrPutAssumeCapacityAdapted(key, key_ctx);
}
pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutAssumeCapacityContext instead.");
return self.getOrPutAssumeCapacityContext(key, undefined);
}
pub fn getOrPutAssumeCapacityContext(self: *Self, key: K, ctx: Context) GetOrPutResult {
const result = self.getOrPutAssumeCapacityAdapted(key, ctx);
if (!result.found_existing) {
result.key_ptr.* = key;
}
return result;
}
pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash, false);
// If you get a compile error on this line, it means that your generic hash
// function is invalid for these parameters.
const hash = ctx.hash(key);
// verifyContext can't verify the return type of generic hash functions,
// so we need to double-check it here.
if (@TypeOf(hash) != Hash) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type! " ++ @typeName(Hash) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));
}
const mask = self.capacity() - 1;
const fingerprint = Metadata.takeFingerprint(hash);
var limit = self.capacity();
var idx = @as(usize, @truncate(hash & mask));
var first_tombstone_idx: usize = self.capacity(); // invalid index
var metadata = self.metadata.? + idx;
while (!metadata[0].isFree() and limit != 0) {
if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) {
const test_key = &self.keys()[idx];
// If you get a compile error on this line, it means that your generic eql
// function is invalid for these parameters.
const eql = ctx.eql(key, test_key.*);
// verifyContext can't verify the return type of generic eql functions,
// so we need to double-check it here.
if (@TypeOf(eql) != bool) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type! bool was expected, but found " ++ @typeName(@TypeOf(eql)));
}
if (eql) {
return GetOrPutResult{
.key_ptr = test_key,
.value_ptr = &self.values()[idx],
.found_existing = true,
};
}
} else if (first_tombstone_idx == self.capacity() and metadata[0].isTombstone()) {
first_tombstone_idx = idx;
}
limit -= 1;
idx = (idx + 1) & mask;
metadata = self.metadata.? + idx;
}
if (first_tombstone_idx < self.capacity()) {
// Cheap try to lower probing lengths after deletions. Recycle a tombstone.
idx = first_tombstone_idx;
metadata = self.metadata.? + idx;
}
metadata[0].fill(fingerprint);
const new_key = &self.keys()[idx];
const new_value = &self.values()[idx];
new_key.* = undefined;
new_value.* = undefined;
self.header().size += 1;
return GetOrPutResult{
.key_ptr = new_key,
.value_ptr = new_value,
.found_existing = false,
};
}
pub fn getOrPutValue(self: *Self, key: K, value: V) Allocator.Error!Entry {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call getOrPutValueContext instead.");
return self.getOrPutValueContext(key, value, undefined);
}
pub fn getOrPutValueContext(self: *Self, key: K, value: V, ctx: Context) Allocator.Error!Entry {
const res = try self.getOrPutAdapted(key, ctx);
if (!res.found_existing) {
res.key_ptr.* = key;
res.value_ptr.* = value;
}
return Entry{ .key_ptr = res.key_ptr, .value_ptr = res.value_ptr };
}
/// Return true if there is a value associated with key in the map.
pub fn contains(self: *const Self, key: K) bool {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call containsContext instead.");
return self.containsContext(key, undefined);
}
pub fn containsContext(self: *const Self, key: K, ctx: Context) bool {
return self.containsAdapted(key, ctx);
}
pub fn containsAdapted(self: *const Self, key: anytype, ctx: anytype) bool {
return self.getIndex(key, ctx) != null;
}
fn removeByIndex(self: *Self, idx: usize) void {
self.metadata.?[idx].remove();
self.keys()[idx] = undefined;
self.values()[idx] = undefined;
self.header().size -= 1;
}
/// If there is an `Entry` with a matching key, it is deleted from
/// the hash map, and this function returns true. Otherwise this
/// function returns false.
pub fn remove(self: *Self, key: K) bool {
if (@sizeOf(Context) != 0)
@compileError("Cannot infer context " ++ @typeName(Context) ++ ", call removeContext instead.");
return self.removeContext(key, undefined);
}
pub fn removeContext(self: *Self, key: K, ctx: Context) bool {
return self.removeAdapted(key, ctx);
}
pub fn removeAdapted(self: *Self, key: anytype, ctx: anytype) bool {
if (self.getIndex(key, ctx)) |idx| {
self.removeByIndex(idx);
return true;
}
return false;
}
/// Delete the entry with key pointed to by key_ptr from the hash map.
/// key_ptr is assumed to be a valid pointer to a key that is present
/// in the hash map.
pub fn removeByPtr(self: *Self, key_ptr: *K) void {
// TODO: replace with pointer subtraction once supported by zig
// if @sizeOf(K) == 0 then there is at most one item in the hash
// map, which is assumed to exist as key_ptr must be valid. This
// item must be at index 0.
const idx = if (@sizeOf(K) > 0)
(@intFromPtr(key_ptr) - @intFromPtr(self.keys())) / @sizeOf(K)
else
0;
self.removeByIndex(idx);
}
fn initMetadatas(self: *Self) void {
@memset(@as([*]u8, @ptrCast(self.metadata.?))[0 .. @sizeOf(Metadata) * self.capacity()], 0);
}
fn growIfNeeded(self: *Self, new_count: Size) Allocator.Error!void {
const available = self.capacity() - self.header().size;
if (new_count > available) return error.OutOfMemory;
}
/// The memory layout for the underlying buffer for a given capacity.
const Layout = struct {
/// The total size of the buffer required. The buffer is expected
/// to be aligned to `base_align`.
total_size: usize,
/// The offset to the start of the keys data.
keys_start: usize,
/// The offset to the start of the values data.
vals_start: usize,
/// The capacity that was used to calculate this layout.
capacity: Size,
};
/// Returns the memory layout for the buffer for a given capacity.
/// The actual size may be able to fit more than the given capacity
/// because capacity is rounded up to the next power of two. This is
/// a design requirement for this hash map implementation.
pub fn layoutForCapacity(new_capacity: Size) Layout {
assert(new_capacity == 0 or std.math.isPowerOfTwo(new_capacity));
// Pack our metadata, keys, and values.
const meta_start = @sizeOf(Header);
const meta_end = @sizeOf(Header) + new_capacity * @sizeOf(Metadata);
const keys_start = std.mem.alignForward(usize, meta_end, key_align);
const keys_end = keys_start + new_capacity * @sizeOf(K);
const vals_start = std.mem.alignForward(usize, keys_end, val_align);
const vals_end = vals_start + new_capacity * @sizeOf(V);
// Our total memory size required is the end of our values
// aligned to the base required alignment.
const total_size = std.mem.alignForward(usize, vals_end, base_align);
// The offsets we actually store in the map are from the
// metadata pointer so that we can use self.metadata as
// the base.
const keys_offset = keys_start - meta_start;
const vals_offset = vals_start - meta_start;
return .{
.total_size = total_size,
.keys_start = keys_offset,
.vals_start = vals_offset,
.capacity = new_capacity,
};
}
};
}
const testing = std.testing;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
test "HashMap basic usage" {
const Map = AutoHashMapUnmanaged(u32, u32);
const alloc = testing.allocator;
const cap = 16;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
const count = 5;
var i: u32 = 0;
var total: u32 = 0;
while (i < count) : (i += 1) {
try map.put(i, i);
total += i;
}
var sum: u32 = 0;
var it = map.iterator();
while (it.next()) |kv| {
sum += kv.key_ptr.*;
}
try expectEqual(total, sum);
i = 0;
sum = 0;
while (i < count) : (i += 1) {
try expectEqual(i, map.get(i).?);
sum += map.get(i).?;
}
try expectEqual(total, sum);
}
test "HashMap ensureTotalCapacity" {
const Map = AutoHashMapUnmanaged(i32, i32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
const initial_capacity = map.capacity();
try testing.expect(initial_capacity >= 20);
var i: i32 = 0;
while (i < 20) : (i += 1) {
try testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null);
}
// shouldn't resize from putAssumeCapacity
try testing.expect(initial_capacity == map.capacity());
}
test "HashMap ensureUnusedCapacity with tombstones" {
const Map = AutoHashMapUnmanaged(i32, i32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: i32 = 0;
while (i < 100) : (i += 1) {
try map.ensureUnusedCapacity(1);
map.putAssumeCapacity(i, i);
_ = map.remove(i);
}
}
test "HashMap clearRetainingCapacity" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 16;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
map.clearRetainingCapacity();
try map.put(1, 1);
try expectEqual(map.get(1).?, 1);
try expectEqual(map.count(), 1);
map.clearRetainingCapacity();
map.putAssumeCapacity(1, 1);
try expectEqual(map.get(1).?, 1);
try expectEqual(map.count(), 1);
const actual_cap = map.capacity();
try expect(actual_cap > 0);
map.clearRetainingCapacity();
map.clearRetainingCapacity();
try expectEqual(map.count(), 0);
try expectEqual(map.capacity(), actual_cap);
try expect(!map.contains(1));
}
test "HashMap ensureTotalCapacity with existing elements" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = Map.minimal_capacity;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
try map.put(0, 0);
try expectEqual(map.count(), 1);
try expectEqual(map.capacity(), Map.minimal_capacity);
try testing.expectError(error.OutOfMemory, map.ensureTotalCapacity(65));
try expectEqual(map.count(), 1);
try expectEqual(map.capacity(), Map.minimal_capacity);
}
test "HashMap remove" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 16) : (i += 1) {
try map.put(i, i);
}
i = 0;
while (i < 16) : (i += 1) {
if (i % 3 == 0) {
_ = map.remove(i);
}
}
try expectEqual(map.count(), 10);
var it = map.iterator();
while (it.next()) |kv| {
try expectEqual(kv.key_ptr.*, kv.value_ptr.*);
try expect(kv.key_ptr.* % 3 != 0);
}
i = 0;
while (i < 16) : (i += 1) {
if (i % 3 == 0) {
try expect(!map.contains(i));
} else {
try expectEqual(map.get(i).?, i);
}
}
}
test "HashMap reverse removes" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 16) : (i += 1) {
try map.putNoClobber(i, i);
}
i = 16;
while (i > 0) : (i -= 1) {
_ = map.remove(i - 1);
try expect(!map.contains(i - 1));
var j: u32 = 0;
while (j < i - 1) : (j += 1) {
try expectEqual(map.get(j).?, j);
}
}
try expectEqual(map.count(), 0);
}
test "HashMap multiple removes on same metadata" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 16) : (i += 1) {
try map.put(i, i);
}
_ = map.remove(7);
_ = map.remove(15);
_ = map.remove(14);
_ = map.remove(13);
try expect(!map.contains(7));
try expect(!map.contains(15));
try expect(!map.contains(14));
try expect(!map.contains(13));
i = 0;
while (i < 13) : (i += 1) {
if (i == 7) {
try expect(!map.contains(i));
} else {
try expectEqual(map.get(i).?, i);
}
}
try map.put(15, 15);
try map.put(13, 13);
try map.put(14, 14);
try map.put(7, 7);
i = 0;
while (i < 16) : (i += 1) {
try expectEqual(map.get(i).?, i);
}
}
test "HashMap put and remove loop in random order" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 64;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var keys = std.ArrayList(u32).init(alloc);
defer keys.deinit();
const size = 32;
const iterations = 100;
var i: u32 = 0;
while (i < size) : (i += 1) {
try keys.append(i);
}
var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
while (i < iterations) : (i += 1) {
random.shuffle(u32, keys.items);
for (keys.items) |key| {
try map.put(key, key);
}
try expectEqual(map.count(), size);
for (keys.items) |key| {
_ = map.remove(key);
}
try expectEqual(map.count(), 0);
}
}
test "HashMap put" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 16) : (i += 1) {
try map.put(i, i);
}
i = 0;
while (i < 16) : (i += 1) {
try expectEqual(map.get(i).?, i);
}
i = 0;
while (i < 16) : (i += 1) {
try map.put(i, i * 16 + 1);
}
i = 0;
while (i < 16) : (i += 1) {
try expectEqual(map.get(i).?, i * 16 + 1);
}
}
test "HashMap put full load" {
const Map = AutoHashMapUnmanaged(usize, usize);
const cap = 16;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
for (0..cap) |i| try map.put(i, i);
for (0..cap) |i| try expectEqual(map.get(i).?, i);
try testing.expectError(error.OutOfMemory, map.put(cap, cap));
}
test "HashMap putAssumeCapacity" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 20) : (i += 1) {
map.putAssumeCapacityNoClobber(i, i);
}
i = 0;
var sum = i;
while (i < 20) : (i += 1) {
sum += map.getPtr(i).?.*;
}
try expectEqual(sum, 190);
i = 0;
while (i < 20) : (i += 1) {
map.putAssumeCapacity(i, 1);
}
i = 0;
sum = i;
while (i < 20) : (i += 1) {
sum += map.get(i).?;
}
try expectEqual(sum, 20);
}
test "HashMap repeat putAssumeCapacity/remove" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
const limit = cap;
var i: u32 = 0;
while (i < limit) : (i += 1) {
map.putAssumeCapacityNoClobber(i, i);
}
// Repeatedly delete/insert an entry without resizing the map.
// Put to different keys so entries don't land in the just-freed slot.
i = 0;
while (i < 10 * limit) : (i += 1) {
try testing.expect(map.remove(i));
if (i % 2 == 0) {
map.putAssumeCapacityNoClobber(limit + i, i);
} else {
map.putAssumeCapacity(limit + i, i);
}
}
i = 9 * limit;
while (i < 10 * limit) : (i += 1) {
try expectEqual(map.get(limit + i), i);
}
try expectEqual(map.count(), limit);
}
test "HashMap getOrPut" {
const Map = AutoHashMapUnmanaged(u32, u32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: u32 = 0;
while (i < 10) : (i += 1) {
try map.put(i * 2, 2);
}
i = 0;
while (i < 20) : (i += 1) {
_ = try map.getOrPutValue(i, 1);
}
i = 0;
var sum = i;
while (i < 20) : (i += 1) {
sum += map.get(i).?;
}
try expectEqual(sum, 30);
}
test "HashMap basic hash map usage" {
const Map = AutoHashMapUnmanaged(i32, i32);
const cap = 32;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
try testing.expect((try map.fetchPut(1, 11)) == null);
try testing.expect((try map.fetchPut(2, 22)) == null);
try testing.expect((try map.fetchPut(3, 33)) == null);
try testing.expect((try map.fetchPut(4, 44)) == null);
try map.putNoClobber(5, 55);
try testing.expect((try map.fetchPut(5, 66)).?.value == 55);
try testing.expect((try map.fetchPut(5, 55)).?.value == 66);
const gop1 = try map.getOrPut(5);
try testing.expect(gop1.found_existing == true);
try testing.expect(gop1.value_ptr.* == 55);
gop1.value_ptr.* = 77;
try testing.expect(map.getEntry(5).?.value_ptr.* == 77);
const gop2 = try map.getOrPut(99);
try testing.expect(gop2.found_existing == false);
gop2.value_ptr.* = 42;
try testing.expect(map.getEntry(99).?.value_ptr.* == 42);
const gop3 = try map.getOrPutValue(5, 5);
try testing.expect(gop3.value_ptr.* == 77);
const gop4 = try map.getOrPutValue(100, 41);
try testing.expect(gop4.value_ptr.* == 41);
try testing.expect(map.contains(2));
try testing.expect(map.getEntry(2).?.value_ptr.* == 22);
try testing.expect(map.get(2).? == 22);
const rmv1 = map.fetchRemove(2);
try testing.expect(rmv1.?.key == 2);
try testing.expect(rmv1.?.value == 22);
try testing.expect(map.fetchRemove(2) == null);
try testing.expect(map.remove(2) == false);
try testing.expect(map.getEntry(2) == null);
try testing.expect(map.get(2) == null);
try testing.expect(map.remove(3) == true);
}
test "HashMap ensureUnusedCapacity" {
const Map = AutoHashMapUnmanaged(u64, u64);
const cap = 64;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
try map.ensureUnusedCapacity(32);
try testing.expectError(error.OutOfMemory, map.ensureUnusedCapacity(cap + 1));
}
test "HashMap removeByPtr" {
const Map = AutoHashMapUnmanaged(i32, u64);
const cap = 64;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
var i: i32 = undefined;
i = 0;
while (i < 10) : (i += 1) {
try map.put(i, 0);
}
try testing.expect(map.count() == 10);
i = 0;
while (i < 10) : (i += 1) {
const key_ptr = map.getKeyPtr(i);
try testing.expect(key_ptr != null);
if (key_ptr) |ptr| {
map.removeByPtr(ptr);
}
}
try testing.expect(map.count() == 0);
}
test "HashMap removeByPtr 0 sized key" {
const Map = AutoHashMapUnmanaged(i32, u64);
const cap = 64;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
try map.put(0, 0);
try testing.expect(map.count() == 1);
const key_ptr = map.getKeyPtr(0);
try testing.expect(key_ptr != null);
if (key_ptr) |ptr| {
map.removeByPtr(ptr);
}
try testing.expect(map.count() == 0);
}
test "HashMap repeat fetchRemove" {
const Map = AutoHashMapUnmanaged(u64, void);
const cap = 64;
const alloc = testing.allocator;
const layout = Map.layoutForCapacity(cap);
const buf = try alloc.alignedAlloc(u8, Map.base_align, layout.total_size);
defer alloc.free(buf);
var map = Map.init(OffsetBuf.init(buf), layout);
map.putAssumeCapacity(0, {});
map.putAssumeCapacity(1, {});
map.putAssumeCapacity(2, {});
map.putAssumeCapacity(3, {});
// fetchRemove() should make slots available.
var i: usize = 0;
while (i < 10) : (i += 1) {
try testing.expect(map.fetchRemove(3) != null);
map.putAssumeCapacity(3, {});
}
try testing.expect(map.get(0) != null);
try testing.expect(map.get(1) != null);
try testing.expect(map.get(2) != null);
try testing.expect(map.get(3) != null);
}
test "OffsetHashMap basic usage" {
const OffsetMap = AutoOffsetHashMap(u32, u32);
const cap = 16;
const alloc = testing.allocator;
const layout = OffsetMap.layout(cap);
const buf = try alloc.alignedAlloc(u8, OffsetMap.base_align, layout.total_size);
defer alloc.free(buf);
var offset_map = OffsetMap.init(OffsetBuf.init(buf), layout);
var map = offset_map.map(buf.ptr);
const count = 5;
var i: u32 = 0;
var total: u32 = 0;
while (i < count) : (i += 1) {
try map.put(i, i);
total += i;
}
var sum: u32 = 0;
var it = map.iterator();
while (it.next()) |kv| {
sum += kv.key_ptr.*;
}
try expectEqual(total, sum);
i = 0;
sum = 0;
while (i < count) : (i += 1) {
try expectEqual(i, map.get(i).?);
sum += map.get(i).?;
}
try expectEqual(total, sum);
}
test "OffsetHashMap remake map" {
const OffsetMap = AutoOffsetHashMap(u32, u32);
const cap = 16;
const alloc = testing.allocator;
const layout = OffsetMap.layout(cap);
const buf = try alloc.alignedAlloc(u8, OffsetMap.base_align, layout.total_size);
defer alloc.free(buf);
var offset_map = OffsetMap.init(OffsetBuf.init(buf), layout);
{
var map = offset_map.map(buf.ptr);
try map.put(5, 5);
}
{
var map = offset_map.map(buf.ptr);
try expectEqual(5, map.get(5).?);
}
}