mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-08-02 14:57:31 +03:00
perf: introduce CacheTable strcture, use it for shaper cache
This commit is contained in:
135
src/cache_table.zig
Normal file
135
src/cache_table.zig
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
const fastmem = @import("./fastmem.zig");
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
|
/// An associative data structure used for efficiently storing and
|
||||||
|
/// retrieving values which are able to be recomputed if necessary.
|
||||||
|
///
|
||||||
|
/// This structure is effectively a hash table with fixed-sized buckets.
|
||||||
|
///
|
||||||
|
/// When inserting an item in to a full bucket, the least recently used
|
||||||
|
/// item is replaced.
|
||||||
|
///
|
||||||
|
/// To achieve this, when an item is accessed, it's moved to the end of
|
||||||
|
/// the bucket, and the rest of the items are moved over to fill the gap.
|
||||||
|
///
|
||||||
|
/// This should provide very good query performance and keep frequently
|
||||||
|
/// accessed items cached indefinitely.
|
||||||
|
///
|
||||||
|
/// Parameters:
|
||||||
|
///
|
||||||
|
/// `Context`
|
||||||
|
/// A type containing methods to define CacheTable behaviors.
|
||||||
|
/// - `fn hash(*Context, K) u64` - Return a hash for a key.
|
||||||
|
/// - `fn eql(*Context, K, K) bool` - Check two keys for equality.
|
||||||
|
///
|
||||||
|
/// - `fn evicted(*Context, K, V) void` - [OPTIONAL] Eviction callback.
|
||||||
|
/// If present, called whenever an item is evicted from the cache.
|
||||||
|
///
|
||||||
|
/// `bucket_count`
|
||||||
|
/// Should ideally be close to the median number of important items that
|
||||||
|
/// you expect to be cached at any given point.
|
||||||
|
///
|
||||||
|
/// Performance will suffer if this is not a power of 2.
|
||||||
|
///
|
||||||
|
/// `bucket_size`
|
||||||
|
/// should be larger if you expect a large number of unimportant items to
|
||||||
|
/// enter the cache at a time. Having larger buckets will avoid important
|
||||||
|
/// items being dropped from the cache prematurely.
|
||||||
|
///
|
||||||
|
pub fn CacheTable(
|
||||||
|
comptime K: type,
|
||||||
|
comptime V: type,
|
||||||
|
comptime Context: type,
|
||||||
|
comptime bucket_count: usize,
|
||||||
|
comptime bucket_size: u8,
|
||||||
|
) type {
|
||||||
|
return struct {
|
||||||
|
const Self = CacheTable(K, V, Context, bucket_count, bucket_size);
|
||||||
|
|
||||||
|
const KV = struct {
|
||||||
|
key: K,
|
||||||
|
value: V,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// `bucket_count` buckets containing `bucket_size` KV pairs each.
|
||||||
|
///
|
||||||
|
/// We don't need to initialize this memory because we don't use it
|
||||||
|
/// unless it's within a bucket's stored length, which will guarantee
|
||||||
|
/// that we put actual items there.
|
||||||
|
buckets: [bucket_count][bucket_size]KV = undefined,
|
||||||
|
|
||||||
|
/// We use this array to keep track of how many slots in each bucket
|
||||||
|
/// have actual items in them. Once all the buckets fill up this will
|
||||||
|
/// become a pointless check, but hopefully branch prediction picks
|
||||||
|
/// up on it at that point. The memory cost isn't too bad since it's
|
||||||
|
/// just bytes, so should be a fraction the size of the main table.
|
||||||
|
lengths: [bucket_count]u8 = [_]u8{0} ** bucket_count,
|
||||||
|
|
||||||
|
/// An instance of the context structure.
|
||||||
|
/// Must be initialized before calling any operations.
|
||||||
|
context: Context,
|
||||||
|
|
||||||
|
/// Adds an item to the cache table. If an old value was removed to
|
||||||
|
/// make room then it is returned in a struct with its key and value.
|
||||||
|
pub fn put(self: *Self, key: K, value: V) ?KV {
|
||||||
|
const idx: u64 = self.context.hash(key) % bucket_count;
|
||||||
|
|
||||||
|
const kv = .{
|
||||||
|
.key = key,
|
||||||
|
.value = value,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (self.lengths[idx] < bucket_size) {
|
||||||
|
self.buckets[idx][self.lengths[idx]] = kv;
|
||||||
|
self.lengths[idx] += 1;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(self.lengths[idx] == bucket_size);
|
||||||
|
|
||||||
|
const evicted = fastmem.rotateIn(KV, &self.buckets[idx], kv);
|
||||||
|
|
||||||
|
if (comptime @hasDecl(Context, "evicted")) {
|
||||||
|
self.context.evicted(evicted.key, evicted.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return evicted;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieves an item from the cache table.
|
||||||
|
///
|
||||||
|
/// Returns null if no item is found with the provided key.
|
||||||
|
pub fn get(self: *Self, key: K) ?V {
|
||||||
|
const idx = self.context.hash(key) % bucket_count;
|
||||||
|
|
||||||
|
const len = self.lengths[idx];
|
||||||
|
var i: usize = len;
|
||||||
|
while (i > 0) {
|
||||||
|
i -= 1;
|
||||||
|
if (self.context.eql(key, self.buckets[idx][i].key)) {
|
||||||
|
defer fastmem.rotateOnce(KV, self.buckets[idx][i..len]);
|
||||||
|
return self.buckets[idx][i].value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes all items from the cache table.
|
||||||
|
///
|
||||||
|
/// If your `Context` has an `evicted` method,
|
||||||
|
/// it will be called with all removed items.
|
||||||
|
pub fn clear(self: *Self) void {
|
||||||
|
if (comptime @hasDecl(Context, "evicted")) {
|
||||||
|
for (self.buckets, self.lengths) |b, l| {
|
||||||
|
for (b[0..l]) |kv| {
|
||||||
|
self.context.evicted(kv.key, kv.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@memset(&self.lengths, 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
@ -22,13 +22,58 @@ pub inline fn copy(comptime T: type, dest: []T, source: []const T) void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Moves the first item to the end.
|
||||||
|
/// For the reverse of this, use `fastmem.rotateOnceR`.
|
||||||
|
///
|
||||||
/// Same as std.mem.rotate(T, items, 1) but more efficient by using memmove
|
/// Same as std.mem.rotate(T, items, 1) but more efficient by using memmove
|
||||||
/// and a tmp var for the single rotated item instead of 3 calls to reverse.
|
/// and a tmp var for the single rotated item instead of 3 calls to reverse.
|
||||||
|
///
|
||||||
|
/// e.g. `0 1 2 3` -> `1 2 3 0`.
|
||||||
pub inline fn rotateOnce(comptime T: type, items: []T) void {
|
pub inline fn rotateOnce(comptime T: type, items: []T) void {
|
||||||
const tmp = items[0];
|
const tmp = items[0];
|
||||||
move(T, items[0 .. items.len - 1], items[1..items.len]);
|
move(T, items[0 .. items.len - 1], items[1..items.len]);
|
||||||
items[items.len - 1] = tmp;
|
items[items.len - 1] = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Moves the last item to the start.
|
||||||
|
/// Reverse operation of `fastmem.rotateOnce`.
|
||||||
|
///
|
||||||
|
/// Same as std.mem.rotate(T, items, items.len - 1) but more efficient by
|
||||||
|
/// using memmove and a tmp var for the single rotated item instead of 3
|
||||||
|
/// calls to reverse.
|
||||||
|
///
|
||||||
|
/// e.g. `0 1 2 3` -> `3 0 1 2`.
|
||||||
|
pub inline fn rotateOnceR(comptime T: type, items: []T) void {
|
||||||
|
const tmp = items[items.len - 1];
|
||||||
|
move(T, items[1..items.len], items[0 .. items.len - 1]);
|
||||||
|
items[0] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Rotates a new item in to the end of a slice.
|
||||||
|
/// The first item from the slice is removed and returned.
|
||||||
|
///
|
||||||
|
/// e.g. rotating `4` in to `0 1 2 3` makes it `1 2 3 4` and returns `0`.
|
||||||
|
///
|
||||||
|
/// For the reverse of this, use `fastmem.rotateInR`.
|
||||||
|
pub inline fn rotateIn(comptime T: type, items: []T, item: T) T {
|
||||||
|
const removed = items[0];
|
||||||
|
move(T, items[0 .. items.len - 1], items[1..items.len]);
|
||||||
|
items[items.len - 1] = item;
|
||||||
|
return removed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Rotates a new item in to the start of a slice.
|
||||||
|
/// The last item from the slice is removed and returned.
|
||||||
|
///
|
||||||
|
/// e.g. rotating `4` in to `0 1 2 3` makes it `4 0 1 2` and returns `3`.
|
||||||
|
///
|
||||||
|
/// Reverse operation of `fastmem.rotateIn`.
|
||||||
|
pub inline fn rotateInR(comptime T: type, items: []T, item: T) T {
|
||||||
|
const removed = items[items.len - 1];
|
||||||
|
move(T, items[1..items.len], items[0 .. items.len - 1]);
|
||||||
|
items[0] = item;
|
||||||
|
return removed;
|
||||||
|
}
|
||||||
|
|
||||||
extern "c" fn memcpy(*anyopaque, *const anyopaque, usize) *anyopaque;
|
extern "c" fn memcpy(*anyopaque, *const anyopaque, usize) *anyopaque;
|
||||||
extern "c" fn memmove(*anyopaque, *const anyopaque, usize) *anyopaque;
|
extern "c" fn memmove(*anyopaque, *const anyopaque, usize) *anyopaque;
|
||||||
|
@ -14,55 +14,57 @@ const std = @import("std");
|
|||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
const font = @import("../main.zig");
|
const font = @import("../main.zig");
|
||||||
const lru = @import("../../lru.zig");
|
const CacheTable = @import("../../cache_table.zig").CacheTable;
|
||||||
|
|
||||||
const log = std.log.scoped(.font_shaper_cache);
|
const log = std.log.scoped(.font_shaper_cache);
|
||||||
|
|
||||||
/// Our LRU is the run hash to the shaped cells.
|
/// Context for cache table.
|
||||||
const LRU = lru.AutoHashMap(u64, []font.shape.Cell);
|
const CellCacheTableContext = struct {
|
||||||
|
pub fn hash(self: *const CellCacheTableContext, key: u64) u64 {
|
||||||
|
_ = self;
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
pub fn eql(self: *const CellCacheTableContext, a: u64, b: u64) bool {
|
||||||
|
_ = self;
|
||||||
|
return a == b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/// This is the threshold of evictions at which point we reset
|
/// Cache table for run hash -> shaped cells.
|
||||||
/// the LRU completely. This is a workaround for the issue that
|
const CellCacheTable = CacheTable(
|
||||||
/// Zig stdlib hashmap gets slower over time
|
u64,
|
||||||
/// (https://github.com/ziglang/zig/issues/17851).
|
[]font.shape.Cell,
|
||||||
///
|
CellCacheTableContext,
|
||||||
/// The value is based on naive measuring on my local machine.
|
|
||||||
/// If someone has a better idea of what this value should be,
|
|
||||||
/// please let me know.
|
|
||||||
const evictions_threshold = 8192;
|
|
||||||
|
|
||||||
/// The cache of shaped cells.
|
// Capacity is slightly arbitrary. These numbers are guesses.
|
||||||
map: LRU,
|
//
|
||||||
|
// I'd expect then an average of 256 frequently cached runs is a
|
||||||
|
// safe guess most terminal screens.
|
||||||
|
256,
|
||||||
|
// 8 items per bucket to give decent resilliency to important runs.
|
||||||
|
8,
|
||||||
|
);
|
||||||
|
|
||||||
/// Keep track of the number of evictions. We use this to workaround
|
/// The cache table of shaped cells.
|
||||||
/// the issue that Zig stdlib hashmap gets slower over time
|
map: CellCacheTable,
|
||||||
/// (https://github.com/ziglang/zig/issues/17851). When evictions
|
|
||||||
/// reaches a certain threshold, we reset the LRU.
|
|
||||||
evictions: std.math.IntFittingRange(0, evictions_threshold) = 0,
|
|
||||||
|
|
||||||
pub fn init() Cache {
|
pub fn init() Cache {
|
||||||
// Note: this is very arbitrary. Increasing this number will increase
|
return .{ .map = .{ .context = .{} } };
|
||||||
// the cache hit rate, but also increase the memory usage. We should do
|
|
||||||
// some more empirical testing to see what the best value is.
|
|
||||||
const capacity = 1024;
|
|
||||||
|
|
||||||
return .{ .map = LRU.init(capacity) };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Cache, alloc: Allocator) void {
|
pub fn deinit(self: *Cache, alloc: Allocator) void {
|
||||||
var it = self.map.map.iterator();
|
self.clear(alloc);
|
||||||
while (it.next()) |entry| alloc.free(entry.value_ptr.*.data.value);
|
|
||||||
self.map.deinit(alloc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the shaped cells for the given text run or null if they are not
|
/// Get the shaped cells for the given text run,
|
||||||
/// in the cache.
|
/// or null if they are not in the cache.
|
||||||
pub fn get(self: *const Cache, run: font.shape.TextRun) ?[]const font.shape.Cell {
|
pub fn get(self: *Cache, run: font.shape.TextRun) ?[]const font.shape.Cell {
|
||||||
return self.map.get(run.hash);
|
return self.map.get(run.hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Insert the shaped cells for the given text run into the cache. The
|
/// Insert the shaped cells for the given text run into the cache.
|
||||||
/// cells will be duplicated.
|
///
|
||||||
|
/// The cells will be duplicated.
|
||||||
pub fn put(
|
pub fn put(
|
||||||
self: *Cache,
|
self: *Cache,
|
||||||
alloc: Allocator,
|
alloc: Allocator,
|
||||||
@ -70,33 +72,19 @@ pub fn put(
|
|||||||
cells: []const font.shape.Cell,
|
cells: []const font.shape.Cell,
|
||||||
) Allocator.Error!void {
|
) Allocator.Error!void {
|
||||||
const copy = try alloc.dupe(font.shape.Cell, cells);
|
const copy = try alloc.dupe(font.shape.Cell, cells);
|
||||||
const gop = try self.map.getOrPut(alloc, run.hash);
|
const evicted = self.map.put(run.hash, copy);
|
||||||
if (gop.evicted) |evicted| {
|
if (evicted) |kv| {
|
||||||
alloc.free(evicted.value);
|
alloc.free(kv.value);
|
||||||
|
|
||||||
// See the doc comment on evictions_threshold for why we do this.
|
|
||||||
self.evictions += 1;
|
|
||||||
if (self.evictions >= evictions_threshold) {
|
|
||||||
log.debug("resetting cache due to too many evictions", .{});
|
|
||||||
// We need to put our value here so deinit can free
|
|
||||||
gop.value_ptr.* = copy;
|
|
||||||
self.clear(alloc);
|
|
||||||
|
|
||||||
// We need to call put again because self is now a
|
|
||||||
// different pointer value so our gop pointers are invalid.
|
|
||||||
return try self.put(alloc, run, cells);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
gop.value_ptr.* = copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn count(self: *const Cache) usize {
|
|
||||||
return self.map.map.count();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clear(self: *Cache, alloc: Allocator) void {
|
fn clear(self: *Cache, alloc: Allocator) void {
|
||||||
self.deinit(alloc);
|
for (self.map.buckets, self.map.lengths) |b, l| {
|
||||||
self.* = init();
|
for (b[0..l]) |kv| {
|
||||||
|
alloc.free(kv.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.map.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
test Cache {
|
test Cache {
|
||||||
|
Reference in New Issue
Block a user