From 626ec2b5ac705d1d382ce605415f073482d8cfd9 Mon Sep 17 00:00:00 2001 From: Qwerasd Date: Fri, 14 Jun 2024 01:43:02 -0400 Subject: [PATCH] perf: introduce CFReleaseThread for running CoreFoundation releases Some CoreFoundation objects, such as those produced by CoreText, have expensive callbacks that run when they are released. By offloading the CFRelease calls to another thread, we can avoid important threads being blocked by unexpectedly expensive callbacks. This commit also changes the way that the coretext shaper's run iterator builds its string. Rather than using a CFMutableString, an ArrayList of unichars is built which is passed to CFStringCreateWithCharactersNoCopy, which is a lot more efficient since it avoids all the CoreFoundation overhead. --- pkg/macos/foundation/string.zig | 11 ++ src/cf_release_thread.zig | 185 ++++++++++++++++++++++++++++++++ src/font/shaper/coretext.zig | 72 +++++++++---- src/renderer/Metal.zig | 64 ++++++++++- 4 files changed, 307 insertions(+), 25 deletions(-) create mode 100644 src/cf_release_thread.zig diff --git a/pkg/macos/foundation/string.zig b/pkg/macos/foundation/string.zig index b642201de..f1f437140 100644 --- a/pkg/macos/foundation/string.zig +++ b/pkg/macos/foundation/string.zig @@ -19,6 +19,17 @@ pub const String = opaque { )))) orelse Allocator.Error.OutOfMemory; } + pub fn createWithCharactersNoCopy( + unichars: []const u16, + ) *String { + return @as(*String, @ptrFromInt(@intFromPtr(c.CFStringCreateWithCharactersNoCopy( + null, + @ptrCast(unichars.ptr), + @intCast(unichars.len), + foundation.c.kCFAllocatorNull, + )))); + } + pub fn release(self: *String) void { c.CFRelease(self); } diff --git a/src/cf_release_thread.zig b/src/cf_release_thread.zig new file mode 100644 index 000000000..6a37ca692 --- /dev/null +++ b/src/cf_release_thread.zig @@ -0,0 +1,185 @@ +//! Represents the CFRelease thread. Pools of CFTypeRefs are sent to +//! this thread to be released, so that their release callback logic +//! doesn't block the execution of a high throughput thread like the +//! renderer thread. +pub const Thread = @This(); + +const std = @import("std"); +const builtin = @import("builtin"); +const xev = @import("xev"); +const macos = @import("macos"); + +const BlockingQueue = @import("./blocking_queue.zig").BlockingQueue; + +const Allocator = std.mem.Allocator; +const log = std.log.scoped(.cf_release_thread); + +pub const Message = union(enum) { + /// Release a slice of CFTypeRefs. Uses alloc to + /// free the slice after releasing all the refs. + release: struct{ + refs: []*anyopaque, + alloc: Allocator, + }, +}; + +/// The type used for sending messages to the thread. For now this is +/// hardcoded with a capacity. We can make this a comptime parameter in +/// the future if we want it configurable. +pub const Mailbox = BlockingQueue(Message, 64); + +/// Allocator used for some state +alloc: std.mem.Allocator, + +/// The main event loop for the thread. The user data of this loop +/// is always the allocator used to create the loop. This is a convenience +/// so that users of the loop always have an allocator. +loop: xev.Loop, + +/// This can be used to wake up the thread. +wakeup: xev.Async, +wakeup_c: xev.Completion = .{}, + +/// This can be used to stop the thread on the next loop iteration. +stop: xev.Async, +stop_c: xev.Completion = .{}, + +/// The mailbox that can be used to send this thread messages. Note +/// this is a blocking queue so if it is full you will get errors (or block). +mailbox: *Mailbox, + +flags: packed struct { + /// This is set to true only when an abnormal exit is detected. It + /// tells our mailbox system to drain and ignore all messages. + drain: bool = false, +} = .{}, + +/// Initialize the thread. This does not START the thread. This only sets +/// up all the internal state necessary prior to starting the thread. It +/// is up to the caller to start the thread with the threadMain entrypoint. +pub fn init( + alloc: Allocator, +) !Thread { + // Create our event loop. + var loop = try xev.Loop.init(.{}); + errdefer loop.deinit(); + + // This async handle is used to "wake up" the thread to collect objects. + var wakeup_h = try xev.Async.init(); + errdefer wakeup_h.deinit(); + + // This async handle is used to stop the loop and force the thread to end. + var stop_h = try xev.Async.init(); + errdefer stop_h.deinit(); + + // The mailbox for messaging this thread + var mailbox = try Mailbox.create(alloc); + errdefer mailbox.destroy(alloc); + + return Thread{ + .alloc = alloc, + .loop = loop, + .wakeup = wakeup_h, + .stop = stop_h, + .mailbox = mailbox, + }; +} + +/// Clean up the thread. This is only safe to call once the thread +/// completes executing; the caller must join prior to this. +pub fn deinit(self: *Thread) void { + self.stop.deinit(); + self.wakeup.deinit(); + self.loop.deinit(); + + // Nothing can possibly access the mailbox anymore, destroy it. + self.mailbox.destroy(self.alloc); +} + +/// The main entrypoint for the thread. +pub fn threadMain(self: *Thread) void { + // Call child function so we can use errors... + self.threadMain_() catch |err| { + log.warn("error in cf release thread err={}", .{err}); + }; + + // If our loop is not stopped, then we need to keep running so that + // messages are drained and we can wait for the surface to send a stop + // message. + if (!self.loop.flags.stopped) { + log.warn("abrupt cf release thread exit detected, starting xev to drain mailbox", .{}); + defer log.debug("cf release thread fully exiting after abnormal failure", .{}); + self.flags.drain = true; + self.loop.run(.until_done) catch |err| { + log.err("failed to start xev loop for draining err={}", .{err}); + }; + } +} + +fn threadMain_(self: *Thread) !void { + defer log.debug("cf release thread exited", .{}); + + // Start the async handlers. We start these first so that they're + // registered even if anything below fails so we can drain the mailbox. + self.wakeup.wait(&self.loop, &self.wakeup_c, Thread, self, wakeupCallback); + self.stop.wait(&self.loop, &self.stop_c, Thread, self, stopCallback); + + // Run + log.debug("starting cf release thread", .{}); + defer log.debug("starting cf release thread shutdown", .{}); + try self.loop.run(.until_done); +} + +/// Drain the mailbox, handling all the messages in our terminal implementation. +fn drainMailbox(self: *Thread) !void { + // If we're draining, we just drain the mailbox and return. + if (self.flags.drain) { + while (self.mailbox.pop()) |_| {} + return; + } + + while (self.mailbox.pop()) |message| { + // log.debug("mailbox message={}", .{message}); + switch (message) { + .release => |msg| { + for (msg.refs) |ref| { + macos.foundation.CFRelease(ref); + } + // log.debug("Released {} CFTypeRefs.", .{ msg.refs.len }); + msg.alloc.free(msg.refs); + } + } + } +} + +fn wakeupCallback( + self_: ?*Thread, + _: *xev.Loop, + _: *xev.Completion, + r: xev.Async.WaitError!void, +) xev.CallbackAction { + _ = r catch |err| { + log.err("error in wakeup err={}", .{err}); + return .rearm; + }; + + const t = self_.?; + + // When we wake up, we check the mailbox. Mailbox producers should + // wake up our thread after publishing. + t.drainMailbox() catch |err| + log.err("error draining mailbox err={}", .{err}); + + return .rearm; +} + +fn stopCallback( + self_: ?*Thread, + _: *xev.Loop, + _: *xev.Completion, + r: xev.Async.WaitError!void, +) xev.CallbackAction { + _ = r catch unreachable; + self_.?.loop.stop(); + return .disarm; +} diff --git a/src/font/shaper/coretext.zig b/src/font/shaper/coretext.zig index 4b4f2abad..c32145019 100644 --- a/src/font/shaper/coretext.zig +++ b/src/font/shaper/coretext.zig @@ -64,24 +64,21 @@ pub const Shaper = struct { }; const RunState = struct { - str: *macos.foundation.MutableString, codepoints: CodepointList, + unichars: std.ArrayListUnmanaged(u16), - fn init() !RunState { - var str = try macos.foundation.MutableString.create(0); - errdefer str.release(); - return .{ .str = str, .codepoints = .{} }; + fn init() RunState { + return .{ .codepoints = .{}, .unichars = .{} }; } fn deinit(self: *RunState, alloc: Allocator) void { self.codepoints.deinit(alloc); - self.str.release(); + self.unichars.deinit(alloc); } fn reset(self: *RunState) !void { self.codepoints.clearRetainingCapacity(); - self.str.release(); - self.str = try macos.foundation.MutableString.create(0); + self.unichars.clearRetainingCapacity(); } }; @@ -184,7 +181,7 @@ pub const Shaper = struct { for (hardcoded_features) |name| try feats.append(name); for (opts.features) |name| try feats.append(name); - var run_state = try RunState.init(); + var run_state = RunState.init(); errdefer run_state.deinit(alloc); // For now we only support LTR text. If we shape RTL text then @@ -259,7 +256,14 @@ pub const Shaper = struct { }; } - pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]const font.shape.Cell { + /// Expects an ArrayList `cf_release_pool` in which `CFTypeRef`s + /// can be placed, which guarantees that they will be `CFRelease`d + /// eventually. + pub fn shape( + self: *Shaper, + run: font.shape.TextRun, + cf_release_pool: *std.ArrayList(*anyopaque), + ) ![]const font.shape.Cell { const state = &self.run_state; // { @@ -290,18 +294,28 @@ pub const Shaper = struct { defer arena.deinit(); const alloc = arena.allocator(); - const attr_dict: *macos.foundation.Dictionary = try self.getFont(run.grid, run.font_index); + const attr_dict: *macos.foundation.Dictionary = try self.getFont( + run.grid, + run.font_index, + cf_release_pool, + ); + + // Make room for the attributed string and the CTLine. + try cf_release_pool.ensureUnusedCapacity(3); + + const str = macos.foundation.String.createWithCharactersNoCopy(state.unichars.items); + cf_release_pool.appendAssumeCapacity(str); // Create an attributed string from our string const attr_str = try macos.foundation.AttributedString.create( - state.str.string(), + str, attr_dict, ); - defer attr_str.release(); + cf_release_pool.appendAssumeCapacity(attr_str); // We should always have one run because we do our own run splitting. const line = try macos.text.Line.createWithAttributedString(attr_str); - defer line.release(); + cf_release_pool.appendAssumeCapacity(line); // This keeps track of the current offsets within a single cell. var cell_offset: struct { @@ -393,7 +407,12 @@ pub const Shaper = struct { /// Get an attr dict for a font from a specific index. /// These items are cached, do not retain or release them. - fn getFont(self: *Shaper, grid: *font.SharedGrid, index: font.Collection.Index) !*macos.foundation.Dictionary { + fn getFont( + self: *Shaper, + grid: *font.SharedGrid, + index: font.Collection.Index, + cf_release_pool: *std.ArrayList(*anyopaque), + ) !*macos.foundation.Dictionary { const index_int = index.int(); if (self.cached_fonts.items.len <= index_int) { @@ -407,6 +426,8 @@ pub const Shaper = struct { return cached; } + try cf_release_pool.ensureUnusedCapacity(3); + const run_font = font: { // The CoreText shaper relies on CoreText and CoreText claims // that CTFonts are threadsafe. See: @@ -429,17 +450,17 @@ pub const Shaper = struct { const original = face.font; const attrs = try self.features.attrsDict(face.quirks_disable_default_font_features); - defer attrs.release(); + cf_release_pool.appendAssumeCapacity(attrs); const desc = try macos.text.FontDescriptor.createWithAttributes(attrs); - defer desc.release(); + cf_release_pool.appendAssumeCapacity(desc); const copied = try original.copyWithAttributes(0, null, desc); errdefer copied.release(); break :font copied; }; - defer run_font.release(); + cf_release_pool.appendAssumeCapacity(run_font); // Get our font and use that get the attributes to set for the // attributed string so the whole string uses the same font. @@ -470,15 +491,20 @@ pub const Shaper = struct { } pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void { + const state = &self.shaper.run_state; + // Build our UTF-16 string for CoreText - var unichars: [2]u16 = undefined; + try state.unichars.ensureUnusedCapacity(self.shaper.alloc, 2); + + state.unichars.appendNTimesAssumeCapacity(0, 2); + const pair = macos.foundation.stringGetSurrogatePairForLongCharacter( cp, - &unichars, + state.unichars.items[state.unichars.items.len-2..][0..2], ); - const len: usize = if (pair) 2 else 1; - const state = &self.shaper.run_state; - state.str.appendCharacters(unichars[0..len]); + if (!pair) { + state.unichars.items.len -= 1; + } // Build our reverse lookup table for codepoints to clusters try state.codepoints.append(self.shaper.alloc, .{ diff --git a/src/renderer/Metal.zig b/src/renderer/Metal.zig index ca52ea362..5b85d9a4e 100644 --- a/src/renderer/Metal.zig +++ b/src/renderer/Metal.zig @@ -28,6 +28,8 @@ const ArenaAllocator = std.heap.ArenaAllocator; const Terminal = terminal.Terminal; const Health = renderer.Health; +const CFReleaseThread = @import("../cf_release_thread.zig"); + const mtl = @import("metal/api.zig"); const mtl_buffer = @import("metal/buffer.zig"); const mtl_cell = @import("metal/cell.zig"); @@ -134,6 +136,12 @@ layer: objc.Object, // CAMetalLayer /// a display link. display_link: ?DisplayLink = null, +/// Dedicated thread for releasing CoreFoundation objects some objects, +/// such as those produced by CoreText, have excessively slow release +/// callback logic. +cf_release_thread: CFReleaseThread, +cf_release_thr: std.Thread, + /// Custom shader state. This is only set if we have custom shaders. custom_shader_state: ?CustomShaderState = null, @@ -590,6 +598,9 @@ pub fn init(alloc: Allocator, options: renderer.Options) !Metal { .cursor_color = options.config.cursor_color, .current_background_color = options.config.background, + .cf_release_thread = undefined, + .cf_release_thr = undefined, + // Render state .cells = .{}, .uniforms = .{ @@ -687,10 +698,22 @@ pub fn loopEnter(self: *Metal, thr: *renderer.Thread) !void { &thr.draw_now, ); display_link.start() catch {}; + + // Create the CF release thread. + self.cf_release_thread = try CFReleaseThread.init(self.alloc); + errdefer self.cf_release_thread.deinit(); + + // Start the CF release thread. + self.cf_release_thr = try std.Thread.spawn( + .{}, + CFReleaseThread.threadMain, + .{&self.cf_release_thread}, + ); + self.cf_release_thr.setName("cf_release") catch {}; } /// Called by renderer.Thread when it exits the main loop. -pub fn loopExit(self: *const Metal) void { +pub fn loopExit(self: *Metal) void { // If we don't support a display link we have no work to do. if (comptime DisplayLink == void) return; @@ -699,6 +722,15 @@ pub fn loopExit(self: *const Metal) void { // is gone which is fine. const display_link = self.display_link orelse return; display_link.stop() catch {}; + + // Stop the CF release thread + { + self.cf_release_thread.stop.notify() catch |err| + log.err("error notifying cf release thread to stop, may stall err={}", .{err}); + self.cf_release_thr.join(); + } + + self.cf_release_thread.deinit(); } fn displayLinkCallback( @@ -986,6 +1018,9 @@ pub fn updateFrame( if (critical.preedit) |p| p.deinit(self.alloc); } + var cf_release_pool = std.ArrayList(*anyopaque).init(self.alloc); + try cf_release_pool.ensureTotalCapacity(state.terminal.rows * 8); + // Build our GPU cells try self.rebuildCells( critical.full_rebuild, @@ -994,8 +1029,29 @@ pub fn updateFrame( critical.preedit, critical.cursor_style, &critical.color_palette, + &cf_release_pool, ); + if (cf_release_pool.items.len > 0) { + const items = try cf_release_pool.toOwnedSlice(); + if (self.cf_release_thread.mailbox.push( + .{ .release = .{ + .refs = items, + .alloc = self.alloc, + } }, + .{ .forever = {} }, + ) != 0) { + try self.cf_release_thread.wakeup.notify(); + } else { + for (items) |ref| { + macos.foundation.CFRelease(ref); + } + self.alloc.free(items); + } + } else { + cf_release_pool.deinit(); + } + // Update our viewport pin self.cells_viewport = critical.viewport_pin; @@ -1875,6 +1931,7 @@ fn rebuildCells( preedit: ?renderer.State.Preedit, cursor_style_: ?renderer.CursorStyle, color_palette: *const terminal.color.Palette, + cf_release_pool: *std.ArrayList(*anyopaque), ) !void { // const start = try std.time.Instant.now(); // const start_micro = std.time.microTimestamp(); @@ -1956,7 +2013,10 @@ fn rebuildCells( while (try iter.next(self.alloc)) |run| { // Try to read the cells from the shaping cache if we can. const shaper_cells = self.font_shaper_cache.get(run) orelse cache: { - const cells = try self.font_shaper.shape(run); + const cells = if (font.options.backend == .coretext) + try self.font_shaper.shape(run, cf_release_pool) + else + try self.font_shaper.shape(run); // Try to cache them. If caching fails for any reason we continue // because it is just a performance optimization, not a correctness