From 8d76b5f2835f428adaebd8f3498ebed0e4777956 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Mon, 10 Jun 2024 13:58:35 -0400 Subject: [PATCH] perf: introduce CacheTable strcture, use it for shaper cache --- src/cache_table.zig | 135 ++++++++++++++++++++++++++++++++++++++ src/fastmem.zig | 45 +++++++++++++ src/font/shaper/Cache.zig | 100 +++++++++++++--------------- 3 files changed, 224 insertions(+), 56 deletions(-) create mode 100644 src/cache_table.zig diff --git a/src/cache_table.zig b/src/cache_table.zig new file mode 100644 index 000000000..bdc99cd14 --- /dev/null +++ b/src/cache_table.zig @@ -0,0 +1,135 @@ +const fastmem = @import("./fastmem.zig"); + +const std = @import("std"); +const assert = std.debug.assert; + +/// An associative data structure used for efficiently storing and +/// retrieving values which are able to be recomputed if necessary. +/// +/// This structure is effectively a hash table with fixed-sized buckets. +/// +/// When inserting an item in to a full bucket, the least recently used +/// item is replaced. +/// +/// To achieve this, when an item is accessed, it's moved to the end of +/// the bucket, and the rest of the items are moved over to fill the gap. +/// +/// This should provide very good query performance and keep frequently +/// accessed items cached indefinitely. +/// +/// Parameters: +/// +/// `Context` +/// A type containing methods to define CacheTable behaviors. +/// - `fn hash(*Context, K) u64` - Return a hash for a key. +/// - `fn eql(*Context, K, K) bool` - Check two keys for equality. +/// +/// - `fn evicted(*Context, K, V) void` - [OPTIONAL] Eviction callback. +/// If present, called whenever an item is evicted from the cache. +/// +/// `bucket_count` +/// Should ideally be close to the median number of important items that +/// you expect to be cached at any given point. +/// +/// Performance will suffer if this is not a power of 2. +/// +/// `bucket_size` +/// should be larger if you expect a large number of unimportant items to +/// enter the cache at a time. Having larger buckets will avoid important +/// items being dropped from the cache prematurely. +/// +pub fn CacheTable( + comptime K: type, + comptime V: type, + comptime Context: type, + comptime bucket_count: usize, + comptime bucket_size: u8, +) type { + return struct { + const Self = CacheTable(K, V, Context, bucket_count, bucket_size); + + const KV = struct { + key: K, + value: V, + }; + + /// `bucket_count` buckets containing `bucket_size` KV pairs each. + /// + /// We don't need to initialize this memory because we don't use it + /// unless it's within a bucket's stored length, which will guarantee + /// that we put actual items there. + buckets: [bucket_count][bucket_size]KV = undefined, + + /// We use this array to keep track of how many slots in each bucket + /// have actual items in them. Once all the buckets fill up this will + /// become a pointless check, but hopefully branch prediction picks + /// up on it at that point. The memory cost isn't too bad since it's + /// just bytes, so should be a fraction the size of the main table. + lengths: [bucket_count]u8 = [_]u8{0} ** bucket_count, + + /// An instance of the context structure. + /// Must be initialized before calling any operations. + context: Context, + + /// Adds an item to the cache table. If an old value was removed to + /// make room then it is returned in a struct with its key and value. + pub fn put(self: *Self, key: K, value: V) ?KV { + const idx: u64 = self.context.hash(key) % bucket_count; + + const kv = .{ + .key = key, + .value = value, + }; + + if (self.lengths[idx] < bucket_size) { + self.buckets[idx][self.lengths[idx]] = kv; + self.lengths[idx] += 1; + return null; + } + + assert(self.lengths[idx] == bucket_size); + + const evicted = fastmem.rotateIn(KV, &self.buckets[idx], kv); + + if (comptime @hasDecl(Context, "evicted")) { + self.context.evicted(evicted.key, evicted.value); + } + + return evicted; + } + + /// Retrieves an item from the cache table. + /// + /// Returns null if no item is found with the provided key. + pub fn get(self: *Self, key: K) ?V { + const idx = self.context.hash(key) % bucket_count; + + const len = self.lengths[idx]; + var i: usize = len; + while (i > 0) { + i -= 1; + if (self.context.eql(key, self.buckets[idx][i].key)) { + defer fastmem.rotateOnce(KV, self.buckets[idx][i..len]); + return self.buckets[idx][i].value; + } + } + + return null; + } + + /// Removes all items from the cache table. + /// + /// If your `Context` has an `evicted` method, + /// it will be called with all removed items. + pub fn clear(self: *Self) void { + if (comptime @hasDecl(Context, "evicted")) { + for (self.buckets, self.lengths) |b, l| { + for (b[0..l]) |kv| { + self.context.evicted(kv.key, kv.value); + } + } + } + @memset(&self.lengths, 0); + } + }; +} diff --git a/src/fastmem.zig b/src/fastmem.zig index 53c9e1122..687c057af 100644 --- a/src/fastmem.zig +++ b/src/fastmem.zig @@ -22,13 +22,58 @@ pub inline fn copy(comptime T: type, dest: []T, source: []const T) void { } } +/// Moves the first item to the end. +/// For the reverse of this, use `fastmem.rotateOnceR`. +/// /// Same as std.mem.rotate(T, items, 1) but more efficient by using memmove /// and a tmp var for the single rotated item instead of 3 calls to reverse. +/// +/// e.g. `0 1 2 3` -> `1 2 3 0`. pub inline fn rotateOnce(comptime T: type, items: []T) void { const tmp = items[0]; move(T, items[0 .. items.len - 1], items[1..items.len]); items[items.len - 1] = tmp; } +/// Moves the last item to the start. +/// Reverse operation of `fastmem.rotateOnce`. +/// +/// Same as std.mem.rotate(T, items, items.len - 1) but more efficient by +/// using memmove and a tmp var for the single rotated item instead of 3 +/// calls to reverse. +/// +/// e.g. `0 1 2 3` -> `3 0 1 2`. +pub inline fn rotateOnceR(comptime T: type, items: []T) void { + const tmp = items[items.len - 1]; + move(T, items[1..items.len], items[0 .. items.len - 1]); + items[0] = tmp; +} + +/// Rotates a new item in to the end of a slice. +/// The first item from the slice is removed and returned. +/// +/// e.g. rotating `4` in to `0 1 2 3` makes it `1 2 3 4` and returns `0`. +/// +/// For the reverse of this, use `fastmem.rotateInR`. +pub inline fn rotateIn(comptime T: type, items: []T, item: T) T { + const removed = items[0]; + move(T, items[0 .. items.len - 1], items[1..items.len]); + items[items.len - 1] = item; + return removed; +} + +/// Rotates a new item in to the start of a slice. +/// The last item from the slice is removed and returned. +/// +/// e.g. rotating `4` in to `0 1 2 3` makes it `4 0 1 2` and returns `3`. +/// +/// Reverse operation of `fastmem.rotateIn`. +pub inline fn rotateInR(comptime T: type, items: []T, item: T) T { + const removed = items[items.len - 1]; + move(T, items[1..items.len], items[0 .. items.len - 1]); + items[0] = item; + return removed; +} + extern "c" fn memcpy(*anyopaque, *const anyopaque, usize) *anyopaque; extern "c" fn memmove(*anyopaque, *const anyopaque, usize) *anyopaque; diff --git a/src/font/shaper/Cache.zig b/src/font/shaper/Cache.zig index afb89ee9c..2a1424118 100644 --- a/src/font/shaper/Cache.zig +++ b/src/font/shaper/Cache.zig @@ -14,55 +14,57 @@ const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const font = @import("../main.zig"); -const lru = @import("../../lru.zig"); +const CacheTable = @import("../../cache_table.zig").CacheTable; const log = std.log.scoped(.font_shaper_cache); -/// Our LRU is the run hash to the shaped cells. -const LRU = lru.AutoHashMap(u64, []font.shape.Cell); +/// Context for cache table. +const CellCacheTableContext = struct { + pub fn hash(self: *const CellCacheTableContext, key: u64) u64 { + _ = self; + return key; + } + pub fn eql(self: *const CellCacheTableContext, a: u64, b: u64) bool { + _ = self; + return a == b; + } +}; -/// This is the threshold of evictions at which point we reset -/// the LRU completely. This is a workaround for the issue that -/// Zig stdlib hashmap gets slower over time -/// (https://github.com/ziglang/zig/issues/17851). -/// -/// The value is based on naive measuring on my local machine. -/// If someone has a better idea of what this value should be, -/// please let me know. -const evictions_threshold = 8192; +/// Cache table for run hash -> shaped cells. +const CellCacheTable = CacheTable( + u64, + []font.shape.Cell, + CellCacheTableContext, -/// The cache of shaped cells. -map: LRU, + // Capacity is slightly arbitrary. These numbers are guesses. + // + // I'd expect then an average of 256 frequently cached runs is a + // safe guess most terminal screens. + 256, + // 8 items per bucket to give decent resilliency to important runs. + 8, +); -/// Keep track of the number of evictions. We use this to workaround -/// the issue that Zig stdlib hashmap gets slower over time -/// (https://github.com/ziglang/zig/issues/17851). When evictions -/// reaches a certain threshold, we reset the LRU. -evictions: std.math.IntFittingRange(0, evictions_threshold) = 0, +/// The cache table of shaped cells. +map: CellCacheTable, pub fn init() Cache { - // Note: this is very arbitrary. Increasing this number will increase - // the cache hit rate, but also increase the memory usage. We should do - // some more empirical testing to see what the best value is. - const capacity = 1024; - - return .{ .map = LRU.init(capacity) }; + return .{ .map = .{ .context = .{} } }; } pub fn deinit(self: *Cache, alloc: Allocator) void { - var it = self.map.map.iterator(); - while (it.next()) |entry| alloc.free(entry.value_ptr.*.data.value); - self.map.deinit(alloc); + self.clear(alloc); } -/// Get the shaped cells for the given text run or null if they are not -/// in the cache. -pub fn get(self: *const Cache, run: font.shape.TextRun) ?[]const font.shape.Cell { +/// Get the shaped cells for the given text run, +/// or null if they are not in the cache. +pub fn get(self: *Cache, run: font.shape.TextRun) ?[]const font.shape.Cell { return self.map.get(run.hash); } -/// Insert the shaped cells for the given text run into the cache. The -/// cells will be duplicated. +/// Insert the shaped cells for the given text run into the cache. +/// +/// The cells will be duplicated. pub fn put( self: *Cache, alloc: Allocator, @@ -70,33 +72,19 @@ pub fn put( cells: []const font.shape.Cell, ) Allocator.Error!void { const copy = try alloc.dupe(font.shape.Cell, cells); - const gop = try self.map.getOrPut(alloc, run.hash); - if (gop.evicted) |evicted| { - alloc.free(evicted.value); - - // See the doc comment on evictions_threshold for why we do this. - self.evictions += 1; - if (self.evictions >= evictions_threshold) { - log.debug("resetting cache due to too many evictions", .{}); - // We need to put our value here so deinit can free - gop.value_ptr.* = copy; - self.clear(alloc); - - // We need to call put again because self is now a - // different pointer value so our gop pointers are invalid. - return try self.put(alloc, run, cells); - } + const evicted = self.map.put(run.hash, copy); + if (evicted) |kv| { + alloc.free(kv.value); } - gop.value_ptr.* = copy; -} - -pub fn count(self: *const Cache) usize { - return self.map.map.count(); } fn clear(self: *Cache, alloc: Allocator) void { - self.deinit(alloc); - self.* = init(); + for (self.map.buckets, self.map.lengths) |b, l| { + for (b[0..l]) |kv| { + alloc.free(kv.value); + } + } + self.map.clear(); } test Cache {