perf: introduce CFReleaseThread for running CoreFoundation releases

Some CoreFoundation objects, such as those produced by CoreText, have
expensive callbacks that run when they are released. By offloading the
CFRelease calls to another thread, we can avoid important threads being
blocked by unexpectedly expensive callbacks.

This commit also changes the way that the coretext shaper's run iterator
builds its string. Rather than using a CFMutableString, an ArrayList of
unichars is built which is passed to CFStringCreateWithCharactersNoCopy,
which is a lot more efficient since it avoids all the CoreFoundation
overhead.
This commit is contained in:
Qwerasd
2024-06-14 01:43:02 -04:00
parent 04896a14b4
commit 626ec2b5ac
4 changed files with 307 additions and 25 deletions

View File

@ -19,6 +19,17 @@ pub const String = opaque {
)))) orelse Allocator.Error.OutOfMemory; )))) orelse Allocator.Error.OutOfMemory;
} }
pub fn createWithCharactersNoCopy(
unichars: []const u16,
) *String {
return @as(*String, @ptrFromInt(@intFromPtr(c.CFStringCreateWithCharactersNoCopy(
null,
@ptrCast(unichars.ptr),
@intCast(unichars.len),
foundation.c.kCFAllocatorNull,
))));
}
pub fn release(self: *String) void { pub fn release(self: *String) void {
c.CFRelease(self); c.CFRelease(self);
} }

185
src/cf_release_thread.zig Normal file
View File

@ -0,0 +1,185 @@
//! Represents the CFRelease thread. Pools of CFTypeRefs are sent to
//! this thread to be released, so that their release callback logic
//! doesn't block the execution of a high throughput thread like the
//! renderer thread.
pub const Thread = @This();
const std = @import("std");
const builtin = @import("builtin");
const xev = @import("xev");
const macos = @import("macos");
const BlockingQueue = @import("./blocking_queue.zig").BlockingQueue;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.cf_release_thread);
pub const Message = union(enum) {
/// Release a slice of CFTypeRefs. Uses alloc to
/// free the slice after releasing all the refs.
release: struct{
refs: []*anyopaque,
alloc: Allocator,
},
};
/// The type used for sending messages to the thread. For now this is
/// hardcoded with a capacity. We can make this a comptime parameter in
/// the future if we want it configurable.
pub const Mailbox = BlockingQueue(Message, 64);
/// Allocator used for some state
alloc: std.mem.Allocator,
/// The main event loop for the thread. The user data of this loop
/// is always the allocator used to create the loop. This is a convenience
/// so that users of the loop always have an allocator.
loop: xev.Loop,
/// This can be used to wake up the thread.
wakeup: xev.Async,
wakeup_c: xev.Completion = .{},
/// This can be used to stop the thread on the next loop iteration.
stop: xev.Async,
stop_c: xev.Completion = .{},
/// The mailbox that can be used to send this thread messages. Note
/// this is a blocking queue so if it is full you will get errors (or block).
mailbox: *Mailbox,
flags: packed struct {
/// This is set to true only when an abnormal exit is detected. It
/// tells our mailbox system to drain and ignore all messages.
drain: bool = false,
} = .{},
/// Initialize the thread. This does not START the thread. This only sets
/// up all the internal state necessary prior to starting the thread. It
/// is up to the caller to start the thread with the threadMain entrypoint.
pub fn init(
alloc: Allocator,
) !Thread {
// Create our event loop.
var loop = try xev.Loop.init(.{});
errdefer loop.deinit();
// This async handle is used to "wake up" the thread to collect objects.
var wakeup_h = try xev.Async.init();
errdefer wakeup_h.deinit();
// This async handle is used to stop the loop and force the thread to end.
var stop_h = try xev.Async.init();
errdefer stop_h.deinit();
// The mailbox for messaging this thread
var mailbox = try Mailbox.create(alloc);
errdefer mailbox.destroy(alloc);
return Thread{
.alloc = alloc,
.loop = loop,
.wakeup = wakeup_h,
.stop = stop_h,
.mailbox = mailbox,
};
}
/// Clean up the thread. This is only safe to call once the thread
/// completes executing; the caller must join prior to this.
pub fn deinit(self: *Thread) void {
self.stop.deinit();
self.wakeup.deinit();
self.loop.deinit();
// Nothing can possibly access the mailbox anymore, destroy it.
self.mailbox.destroy(self.alloc);
}
/// The main entrypoint for the thread.
pub fn threadMain(self: *Thread) void {
// Call child function so we can use errors...
self.threadMain_() catch |err| {
log.warn("error in cf release thread err={}", .{err});
};
// If our loop is not stopped, then we need to keep running so that
// messages are drained and we can wait for the surface to send a stop
// message.
if (!self.loop.flags.stopped) {
log.warn("abrupt cf release thread exit detected, starting xev to drain mailbox", .{});
defer log.debug("cf release thread fully exiting after abnormal failure", .{});
self.flags.drain = true;
self.loop.run(.until_done) catch |err| {
log.err("failed to start xev loop for draining err={}", .{err});
};
}
}
fn threadMain_(self: *Thread) !void {
defer log.debug("cf release thread exited", .{});
// Start the async handlers. We start these first so that they're
// registered even if anything below fails so we can drain the mailbox.
self.wakeup.wait(&self.loop, &self.wakeup_c, Thread, self, wakeupCallback);
self.stop.wait(&self.loop, &self.stop_c, Thread, self, stopCallback);
// Run
log.debug("starting cf release thread", .{});
defer log.debug("starting cf release thread shutdown", .{});
try self.loop.run(.until_done);
}
/// Drain the mailbox, handling all the messages in our terminal implementation.
fn drainMailbox(self: *Thread) !void {
// If we're draining, we just drain the mailbox and return.
if (self.flags.drain) {
while (self.mailbox.pop()) |_| {}
return;
}
while (self.mailbox.pop()) |message| {
// log.debug("mailbox message={}", .{message});
switch (message) {
.release => |msg| {
for (msg.refs) |ref| {
macos.foundation.CFRelease(ref);
}
// log.debug("Released {} CFTypeRefs.", .{ msg.refs.len });
msg.alloc.free(msg.refs);
}
}
}
}
fn wakeupCallback(
self_: ?*Thread,
_: *xev.Loop,
_: *xev.Completion,
r: xev.Async.WaitError!void,
) xev.CallbackAction {
_ = r catch |err| {
log.err("error in wakeup err={}", .{err});
return .rearm;
};
const t = self_.?;
// When we wake up, we check the mailbox. Mailbox producers should
// wake up our thread after publishing.
t.drainMailbox() catch |err|
log.err("error draining mailbox err={}", .{err});
return .rearm;
}
fn stopCallback(
self_: ?*Thread,
_: *xev.Loop,
_: *xev.Completion,
r: xev.Async.WaitError!void,
) xev.CallbackAction {
_ = r catch unreachable;
self_.?.loop.stop();
return .disarm;
}

View File

@ -64,24 +64,21 @@ pub const Shaper = struct {
}; };
const RunState = struct { const RunState = struct {
str: *macos.foundation.MutableString,
codepoints: CodepointList, codepoints: CodepointList,
unichars: std.ArrayListUnmanaged(u16),
fn init() !RunState { fn init() RunState {
var str = try macos.foundation.MutableString.create(0); return .{ .codepoints = .{}, .unichars = .{} };
errdefer str.release();
return .{ .str = str, .codepoints = .{} };
} }
fn deinit(self: *RunState, alloc: Allocator) void { fn deinit(self: *RunState, alloc: Allocator) void {
self.codepoints.deinit(alloc); self.codepoints.deinit(alloc);
self.str.release(); self.unichars.deinit(alloc);
} }
fn reset(self: *RunState) !void { fn reset(self: *RunState) !void {
self.codepoints.clearRetainingCapacity(); self.codepoints.clearRetainingCapacity();
self.str.release(); self.unichars.clearRetainingCapacity();
self.str = try macos.foundation.MutableString.create(0);
} }
}; };
@ -184,7 +181,7 @@ pub const Shaper = struct {
for (hardcoded_features) |name| try feats.append(name); for (hardcoded_features) |name| try feats.append(name);
for (opts.features) |name| try feats.append(name); for (opts.features) |name| try feats.append(name);
var run_state = try RunState.init(); var run_state = RunState.init();
errdefer run_state.deinit(alloc); errdefer run_state.deinit(alloc);
// For now we only support LTR text. If we shape RTL text then // For now we only support LTR text. If we shape RTL text then
@ -259,7 +256,14 @@ pub const Shaper = struct {
}; };
} }
pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]const font.shape.Cell { /// Expects an ArrayList `cf_release_pool` in which `CFTypeRef`s
/// can be placed, which guarantees that they will be `CFRelease`d
/// eventually.
pub fn shape(
self: *Shaper,
run: font.shape.TextRun,
cf_release_pool: *std.ArrayList(*anyopaque),
) ![]const font.shape.Cell {
const state = &self.run_state; const state = &self.run_state;
// { // {
@ -290,18 +294,28 @@ pub const Shaper = struct {
defer arena.deinit(); defer arena.deinit();
const alloc = arena.allocator(); const alloc = arena.allocator();
const attr_dict: *macos.foundation.Dictionary = try self.getFont(run.grid, run.font_index); const attr_dict: *macos.foundation.Dictionary = try self.getFont(
run.grid,
run.font_index,
cf_release_pool,
);
// Make room for the attributed string and the CTLine.
try cf_release_pool.ensureUnusedCapacity(3);
const str = macos.foundation.String.createWithCharactersNoCopy(state.unichars.items);
cf_release_pool.appendAssumeCapacity(str);
// Create an attributed string from our string // Create an attributed string from our string
const attr_str = try macos.foundation.AttributedString.create( const attr_str = try macos.foundation.AttributedString.create(
state.str.string(), str,
attr_dict, attr_dict,
); );
defer attr_str.release(); cf_release_pool.appendAssumeCapacity(attr_str);
// We should always have one run because we do our own run splitting. // We should always have one run because we do our own run splitting.
const line = try macos.text.Line.createWithAttributedString(attr_str); const line = try macos.text.Line.createWithAttributedString(attr_str);
defer line.release(); cf_release_pool.appendAssumeCapacity(line);
// This keeps track of the current offsets within a single cell. // This keeps track of the current offsets within a single cell.
var cell_offset: struct { var cell_offset: struct {
@ -393,7 +407,12 @@ pub const Shaper = struct {
/// Get an attr dict for a font from a specific index. /// Get an attr dict for a font from a specific index.
/// These items are cached, do not retain or release them. /// These items are cached, do not retain or release them.
fn getFont(self: *Shaper, grid: *font.SharedGrid, index: font.Collection.Index) !*macos.foundation.Dictionary { fn getFont(
self: *Shaper,
grid: *font.SharedGrid,
index: font.Collection.Index,
cf_release_pool: *std.ArrayList(*anyopaque),
) !*macos.foundation.Dictionary {
const index_int = index.int(); const index_int = index.int();
if (self.cached_fonts.items.len <= index_int) { if (self.cached_fonts.items.len <= index_int) {
@ -407,6 +426,8 @@ pub const Shaper = struct {
return cached; return cached;
} }
try cf_release_pool.ensureUnusedCapacity(3);
const run_font = font: { const run_font = font: {
// The CoreText shaper relies on CoreText and CoreText claims // The CoreText shaper relies on CoreText and CoreText claims
// that CTFonts are threadsafe. See: // that CTFonts are threadsafe. See:
@ -429,17 +450,17 @@ pub const Shaper = struct {
const original = face.font; const original = face.font;
const attrs = try self.features.attrsDict(face.quirks_disable_default_font_features); const attrs = try self.features.attrsDict(face.quirks_disable_default_font_features);
defer attrs.release(); cf_release_pool.appendAssumeCapacity(attrs);
const desc = try macos.text.FontDescriptor.createWithAttributes(attrs); const desc = try macos.text.FontDescriptor.createWithAttributes(attrs);
defer desc.release(); cf_release_pool.appendAssumeCapacity(desc);
const copied = try original.copyWithAttributes(0, null, desc); const copied = try original.copyWithAttributes(0, null, desc);
errdefer copied.release(); errdefer copied.release();
break :font copied; break :font copied;
}; };
defer run_font.release(); cf_release_pool.appendAssumeCapacity(run_font);
// Get our font and use that get the attributes to set for the // Get our font and use that get the attributes to set for the
// attributed string so the whole string uses the same font. // attributed string so the whole string uses the same font.
@ -470,15 +491,20 @@ pub const Shaper = struct {
} }
pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void { pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void {
const state = &self.shaper.run_state;
// Build our UTF-16 string for CoreText // Build our UTF-16 string for CoreText
var unichars: [2]u16 = undefined; try state.unichars.ensureUnusedCapacity(self.shaper.alloc, 2);
state.unichars.appendNTimesAssumeCapacity(0, 2);
const pair = macos.foundation.stringGetSurrogatePairForLongCharacter( const pair = macos.foundation.stringGetSurrogatePairForLongCharacter(
cp, cp,
&unichars, state.unichars.items[state.unichars.items.len-2..][0..2],
); );
const len: usize = if (pair) 2 else 1; if (!pair) {
const state = &self.shaper.run_state; state.unichars.items.len -= 1;
state.str.appendCharacters(unichars[0..len]); }
// Build our reverse lookup table for codepoints to clusters // Build our reverse lookup table for codepoints to clusters
try state.codepoints.append(self.shaper.alloc, .{ try state.codepoints.append(self.shaper.alloc, .{

View File

@ -28,6 +28,8 @@ const ArenaAllocator = std.heap.ArenaAllocator;
const Terminal = terminal.Terminal; const Terminal = terminal.Terminal;
const Health = renderer.Health; const Health = renderer.Health;
const CFReleaseThread = @import("../cf_release_thread.zig");
const mtl = @import("metal/api.zig"); const mtl = @import("metal/api.zig");
const mtl_buffer = @import("metal/buffer.zig"); const mtl_buffer = @import("metal/buffer.zig");
const mtl_cell = @import("metal/cell.zig"); const mtl_cell = @import("metal/cell.zig");
@ -134,6 +136,12 @@ layer: objc.Object, // CAMetalLayer
/// a display link. /// a display link.
display_link: ?DisplayLink = null, display_link: ?DisplayLink = null,
/// Dedicated thread for releasing CoreFoundation objects some objects,
/// such as those produced by CoreText, have excessively slow release
/// callback logic.
cf_release_thread: CFReleaseThread,
cf_release_thr: std.Thread,
/// Custom shader state. This is only set if we have custom shaders. /// Custom shader state. This is only set if we have custom shaders.
custom_shader_state: ?CustomShaderState = null, custom_shader_state: ?CustomShaderState = null,
@ -590,6 +598,9 @@ pub fn init(alloc: Allocator, options: renderer.Options) !Metal {
.cursor_color = options.config.cursor_color, .cursor_color = options.config.cursor_color,
.current_background_color = options.config.background, .current_background_color = options.config.background,
.cf_release_thread = undefined,
.cf_release_thr = undefined,
// Render state // Render state
.cells = .{}, .cells = .{},
.uniforms = .{ .uniforms = .{
@ -687,10 +698,22 @@ pub fn loopEnter(self: *Metal, thr: *renderer.Thread) !void {
&thr.draw_now, &thr.draw_now,
); );
display_link.start() catch {}; display_link.start() catch {};
// Create the CF release thread.
self.cf_release_thread = try CFReleaseThread.init(self.alloc);
errdefer self.cf_release_thread.deinit();
// Start the CF release thread.
self.cf_release_thr = try std.Thread.spawn(
.{},
CFReleaseThread.threadMain,
.{&self.cf_release_thread},
);
self.cf_release_thr.setName("cf_release") catch {};
} }
/// Called by renderer.Thread when it exits the main loop. /// Called by renderer.Thread when it exits the main loop.
pub fn loopExit(self: *const Metal) void { pub fn loopExit(self: *Metal) void {
// If we don't support a display link we have no work to do. // If we don't support a display link we have no work to do.
if (comptime DisplayLink == void) return; if (comptime DisplayLink == void) return;
@ -699,6 +722,15 @@ pub fn loopExit(self: *const Metal) void {
// is gone which is fine. // is gone which is fine.
const display_link = self.display_link orelse return; const display_link = self.display_link orelse return;
display_link.stop() catch {}; display_link.stop() catch {};
// Stop the CF release thread
{
self.cf_release_thread.stop.notify() catch |err|
log.err("error notifying cf release thread to stop, may stall err={}", .{err});
self.cf_release_thr.join();
}
self.cf_release_thread.deinit();
} }
fn displayLinkCallback( fn displayLinkCallback(
@ -986,6 +1018,9 @@ pub fn updateFrame(
if (critical.preedit) |p| p.deinit(self.alloc); if (critical.preedit) |p| p.deinit(self.alloc);
} }
var cf_release_pool = std.ArrayList(*anyopaque).init(self.alloc);
try cf_release_pool.ensureTotalCapacity(state.terminal.rows * 8);
// Build our GPU cells // Build our GPU cells
try self.rebuildCells( try self.rebuildCells(
critical.full_rebuild, critical.full_rebuild,
@ -994,8 +1029,29 @@ pub fn updateFrame(
critical.preedit, critical.preedit,
critical.cursor_style, critical.cursor_style,
&critical.color_palette, &critical.color_palette,
&cf_release_pool,
); );
if (cf_release_pool.items.len > 0) {
const items = try cf_release_pool.toOwnedSlice();
if (self.cf_release_thread.mailbox.push(
.{ .release = .{
.refs = items,
.alloc = self.alloc,
} },
.{ .forever = {} },
) != 0) {
try self.cf_release_thread.wakeup.notify();
} else {
for (items) |ref| {
macos.foundation.CFRelease(ref);
}
self.alloc.free(items);
}
} else {
cf_release_pool.deinit();
}
// Update our viewport pin // Update our viewport pin
self.cells_viewport = critical.viewport_pin; self.cells_viewport = critical.viewport_pin;
@ -1875,6 +1931,7 @@ fn rebuildCells(
preedit: ?renderer.State.Preedit, preedit: ?renderer.State.Preedit,
cursor_style_: ?renderer.CursorStyle, cursor_style_: ?renderer.CursorStyle,
color_palette: *const terminal.color.Palette, color_palette: *const terminal.color.Palette,
cf_release_pool: *std.ArrayList(*anyopaque),
) !void { ) !void {
// const start = try std.time.Instant.now(); // const start = try std.time.Instant.now();
// const start_micro = std.time.microTimestamp(); // const start_micro = std.time.microTimestamp();
@ -1956,7 +2013,10 @@ fn rebuildCells(
while (try iter.next(self.alloc)) |run| { while (try iter.next(self.alloc)) |run| {
// Try to read the cells from the shaping cache if we can. // Try to read the cells from the shaping cache if we can.
const shaper_cells = self.font_shaper_cache.get(run) orelse cache: { const shaper_cells = self.font_shaper_cache.get(run) orelse cache: {
const cells = try self.font_shaper.shape(run); const cells = if (font.options.backend == .coretext)
try self.font_shaper.shape(run, cf_release_pool)
else
try self.font_shaper.shape(run);
// Try to cache them. If caching fails for any reason we continue // Try to cache them. If caching fails for any reason we continue
// because it is just a performance optimization, not a correctness // because it is just a performance optimization, not a correctness