perf: introduce CacheTable strcture, use it for shaper cache

2025-08-02 14:57:31 +03:00 · 2024-06-10 13:58:35 -04:00
parent db2cefb668
commit 8d76b5f283
3 changed files with 224 additions and 56 deletions
--- a/src/cache_table.zig
+++ b/src/cache_table.zig
@ -0,0 +1,135 @@
 const fastmem = @import("./fastmem.zig");
 const std = @import("std");
 const assert = std.debug.assert;
 /// An associative data structure used for efficiently storing and
 /// retrieving values which are able to be recomputed if necessary.
 ///
 /// This structure is effectively a hash table with fixed-sized buckets.
 ///
 /// When inserting an item in to a full bucket, the least recently used
 /// item is replaced.
 ///
 /// To achieve this, when an item is accessed, it's moved to the end of
 /// the bucket, and the rest of the items are moved over to fill the gap.
 ///
 /// This should provide very good query performance and keep frequently
 /// accessed items cached indefinitely.
 ///
 /// Parameters:
 ///
 /// `Context`
 ///   A type containing methods to define CacheTable behaviors.
 ///   - `fn hash(*Context, K) u64`    - Return a hash for a key.
 ///   - `fn eql(*Context, K, K) bool` - Check two keys for equality.
 ///
 ///   - `fn evicted(*Context, K, V) void` - [OPTIONAL] Eviction callback.
 ///     If present, called whenever an item is evicted from the cache.
 ///
 /// `bucket_count`
 ///   Should ideally be close to the median number of important items that
 ///   you expect to be cached at any given point.
 ///
 ///   Performance will suffer if this is not a power of 2.
 ///
 /// `bucket_size`
 ///   should be larger if you expect a large number of unimportant items to
 ///   enter the cache at a time. Having larger buckets will avoid important
 ///   items being dropped from the cache prematurely.
 ///
 pub fn CacheTable(
    comptime K: type,
    comptime V: type,
    comptime Context: type,
    comptime bucket_count: usize,
    comptime bucket_size: u8,
 ) type {
    return struct {
        const Self = CacheTable(K, V, Context, bucket_count, bucket_size);
        const KV = struct {
            key: K,
            value: V,
        };
        /// `bucket_count` buckets containing `bucket_size` KV pairs each.
        ///
        /// We don't need to initialize this memory because we don't use it
        /// unless it's within a bucket's stored length, which will guarantee
        /// that we put actual items there.
        buckets: [bucket_count][bucket_size]KV = undefined,
        /// We use this array to keep track of how many slots in each bucket
        /// have actual items in them. Once all the buckets fill up this will
        /// become a pointless check, but hopefully branch prediction picks
        /// up on it at that point. The memory cost isn't too bad since it's
        /// just bytes, so should be a fraction the size of the main table.
        lengths: [bucket_count]u8 = [_]u8{0} ** bucket_count,
        /// An instance of the context structure.
        /// Must be initialized before calling any operations.
        context: Context,
        /// Adds an item to the cache table. If an old value was removed to
        /// make room then it is returned in a struct with its key and value.
        pub fn put(self: *Self, key: K, value: V) ?KV {
            const idx: u64 = self.context.hash(key) % bucket_count;
            const kv = .{
                .key = key,
                .value = value,
            };
            if (self.lengths[idx] < bucket_size) {
                self.buckets[idx][self.lengths[idx]] = kv;
                self.lengths[idx] += 1;
                return null;
            }
            assert(self.lengths[idx] == bucket_size);
            const evicted = fastmem.rotateIn(KV, &self.buckets[idx], kv);
            if (comptime @hasDecl(Context, "evicted")) {
                self.context.evicted(evicted.key, evicted.value);
            }
            return evicted;
        }
        /// Retrieves an item from the cache table.
        ///
        /// Returns null if no item is found with the provided key.
        pub fn get(self: *Self, key: K) ?V {
            const idx = self.context.hash(key) % bucket_count;
            const len = self.lengths[idx];
            var i: usize = len;
            while (i > 0) {
                i -= 1;
                if (self.context.eql(key, self.buckets[idx][i].key)) {
                    defer fastmem.rotateOnce(KV, self.buckets[idx][i..len]);
                    return self.buckets[idx][i].value;
                }
            }
            return null;
        }
        /// Removes all items from the cache table.
        ///
        /// If your `Context` has an `evicted` method,
        /// it will be called with all removed items.
        pub fn clear(self: *Self) void {
            if (comptime @hasDecl(Context, "evicted")) {
                for (self.buckets, self.lengths) |b, l| {
                    for (b[0..l]) |kv| {
                        self.context.evicted(kv.key, kv.value);
                    }
                }
            }
            @memset(&self.lengths, 0);
        }
    };
 }
--- a/src/fastmem.zig
+++ b/src/fastmem.zig
@ -22,13 +22,58 @@ pub inline fn copy(comptime T: type, dest: []T, source: []const T) void {
    }
 }
 /// Moves the first item to the end.
 /// For the reverse of this, use `fastmem.rotateOnceR`.
 ///
 /// Same as std.mem.rotate(T, items, 1) but more efficient by using memmove
 /// and a tmp var for the single rotated item instead of 3 calls to reverse.
 ///
 /// e.g. `0 1 2 3` -> `1 2 3 0`.
 pub inline fn rotateOnce(comptime T: type, items: []T) void {
    const tmp = items[0];
    move(T, items[0 .. items.len - 1], items[1..items.len]);
    items[items.len - 1] = tmp;
 }
 /// Moves the last item to the start.
 /// Reverse operation of `fastmem.rotateOnce`.
 ///
 /// Same as std.mem.rotate(T, items, items.len - 1) but more efficient by
 /// using memmove and a tmp var for the single rotated item instead of 3
 /// calls to reverse.
 ///
 /// e.g. `0 1 2 3` -> `3 0 1 2`.
 pub inline fn rotateOnceR(comptime T: type, items: []T) void {
    const tmp = items[items.len - 1];
    move(T, items[1..items.len], items[0 .. items.len - 1]);
    items[0] = tmp;
 }
 /// Rotates a new item in to the end of a slice.
 /// The first item from the slice is removed and returned.
 ///
 /// e.g. rotating `4` in to `0 1 2 3` makes it `1 2 3 4` and returns `0`.
 ///
 /// For the reverse of this, use `fastmem.rotateInR`.
 pub inline fn rotateIn(comptime T: type, items: []T, item: T) T {
    const removed = items[0];
    move(T, items[0 .. items.len - 1], items[1..items.len]);
    items[items.len - 1] = item;
    return removed;
 }
 /// Rotates a new item in to the start of a slice.
 /// The last item from the slice is removed and returned.
 ///
 /// e.g. rotating `4` in to `0 1 2 3` makes it `4 0 1 2` and returns `3`.
 ///
 /// Reverse operation of `fastmem.rotateIn`.
 pub inline fn rotateInR(comptime T: type, items: []T, item: T) T {
    const removed = items[items.len - 1];
    move(T, items[1..items.len], items[0 .. items.len - 1]);
    items[0] = item;
    return removed;
 }
 extern "c" fn memcpy(*anyopaque, *const anyopaque, usize) *anyopaque;
 extern "c" fn memmove(*anyopaque, *const anyopaque, usize) *anyopaque;
--- a/src/font/shaper/Cache.zig
+++ b/src/font/shaper/Cache.zig
@ -14,55 +14,57 @@ const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const font = @import("../main.zig");
-const lru = @import("../../lru.zig");
+const CacheTable = @import("../../cache_table.zig").CacheTable;
 const log = std.log.scoped(.font_shaper_cache);
-/// Our LRU is the run hash to the shaped cells.
+/// Context for cache table.
-const LRU = lru.AutoHashMap(u64, []font.shape.Cell);
+const CellCacheTableContext = struct {
    pub fn hash(self: *const CellCacheTableContext, key: u64) u64 {
        _ = self;
        return key;
    }
    pub fn eql(self: *const CellCacheTableContext, a: u64, b: u64) bool {
        _ = self;
        return a == b;
    }
 };
-/// This is the threshold of evictions at which point we reset
+/// Cache table for run hash -> shaped cells.
-/// the LRU completely. This is a workaround for the issue that
+const CellCacheTable = CacheTable(
-/// Zig stdlib hashmap gets slower over time
+    u64,
-/// (https://github.com/ziglang/zig/issues/17851).
+    []font.shape.Cell,
-///
+    CellCacheTableContext,
 /// The value is based on naive measuring on my local machine.
 /// If someone has a better idea of what this value should be,
 /// please let me know.
 const evictions_threshold = 8192;
-/// The cache of shaped cells.
+    // Capacity is slightly arbitrary. These numbers are guesses.
-map: LRU,
+    //
    // I'd expect then an average of 256 frequently cached runs is a
    // safe guess most terminal screens.
    256,
    // 8 items per bucket to give decent resilliency to important runs.
    8,
 );
-/// Keep track of the number of evictions. We use this to workaround
+/// The cache table of shaped cells.
-/// the issue that Zig stdlib hashmap gets slower over time
+map: CellCacheTable,
 /// (https://github.com/ziglang/zig/issues/17851). When evictions
 /// reaches a certain threshold, we reset the LRU.
 evictions: std.math.IntFittingRange(0, evictions_threshold) = 0,
 pub fn init() Cache {
-    // Note: this is very arbitrary. Increasing this number will increase
+    return .{ .map = .{ .context = .{} } };
    // the cache hit rate, but also increase the memory usage. We should do
    // some more empirical testing to see what the best value is.
    const capacity = 1024;
    return .{ .map = LRU.init(capacity) };
 }
 pub fn deinit(self: *Cache, alloc: Allocator) void {
-    var it = self.map.map.iterator();
+    self.clear(alloc);
    while (it.next()) |entry| alloc.free(entry.value_ptr.*.data.value);
    self.map.deinit(alloc);
 }
-/// Get the shaped cells for the given text run or null if they are not
+/// Get the shaped cells for the given text run,
-/// in the cache.
+/// or null if they are not in the cache.
-pub fn get(self: *const Cache, run: font.shape.TextRun) ?[]const font.shape.Cell {
+pub fn get(self: *Cache, run: font.shape.TextRun) ?[]const font.shape.Cell {
    return self.map.get(run.hash);
 }
-/// Insert the shaped cells for the given text run into the cache. The
+/// Insert the shaped cells for the given text run into the cache.
-/// cells will be duplicated.
+///
 /// The cells will be duplicated.
 pub fn put(
    self: *Cache,
    alloc: Allocator,
@ -70,33 +72,19 @@ pub fn put(
    cells: []const font.shape.Cell,
 ) Allocator.Error!void {
    const copy = try alloc.dupe(font.shape.Cell, cells);
-    const gop = try self.map.getOrPut(alloc, run.hash);
+    const evicted = self.map.put(run.hash, copy);
-    if (gop.evicted) |evicted| {
+    if (evicted) |kv| {
-        alloc.free(evicted.value);
+        alloc.free(kv.value);
        // See the doc comment on evictions_threshold for why we do this.
        self.evictions += 1;
        if (self.evictions >= evictions_threshold) {
            log.debug("resetting cache due to too many evictions", .{});
            // We need to put our value here so deinit can free
            gop.value_ptr.* = copy;
            self.clear(alloc);
            // We need to call put again because self is now a
            // different pointer value so our gop pointers are invalid.
            return try self.put(alloc, run, cells);
        }
    }
    gop.value_ptr.* = copy;
 }
 pub fn count(self: *const Cache) usize {
    return self.map.map.count();
 }
 fn clear(self: *Cache, alloc: Allocator) void {
-    self.deinit(alloc);
+    for (self.map.buckets, self.map.lengths) |b, l| {
-    self.* = init();
+        for (b[0..l]) |kv| {
            alloc.free(kv.value);
        }
    }
    self.map.clear();
 }
 test Cache {