diff --git a/src/renderer/Metal.zig b/src/renderer/Metal.zig index 866f9682d..ca13f87de 100644 --- a/src/renderer/Metal.zig +++ b/src/renderer/Metal.zig @@ -182,15 +182,34 @@ pub const GPUState = struct { /// This buffer is written exactly once so we can use it globally. instance: InstanceBuffer, // MTLBuffer + /// The default storage mode to use for resources created with our device. + /// + /// This is based on whether the device is a discrete GPU or not, since + /// discrete GPUs do not have unified memory and therefore do not support + /// the "shared" storage mode, instead we have to use the "managed" mode. + default_storage_mode: mtl.MTLResourceOptions.StorageMode, + pub fn init() !GPUState { const device = try chooseDevice(); const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{}); errdefer queue.release(); + // We determine whether our device is a discrete GPU based on these: + // - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated). + // - We're not on aarch64 (Apple Silicon, therefore integrated). + // - The device reports that it does not have unified memory. + const is_discrete = + builtin.target.os.tag == .macos and + builtin.target.cpu.arch != .aarch64 and + !device.getProperty(bool, "hasUnifiedMemory"); + + const default_storage_mode: mtl.MTLResourceOptions.StorageMode = + if (is_discrete) .managed else .shared; + var instance = try InstanceBuffer.initFill(device, &.{ 0, 1, 3, // Top-left triangle 1, 2, 3, // Bottom-right triangle - }); + }, .{ .storage_mode = default_storage_mode }); errdefer instance.deinit(); var result: GPUState = .{ @@ -198,11 +217,12 @@ pub const GPUState = struct { .queue = queue, .instance = instance, .frames = undefined, + .default_storage_mode = default_storage_mode, }; // Initialize all of our frame state. for (&result.frames) |*frame| { - frame.* = try FrameState.init(result.device); + frame.* = try FrameState.init(result.device, default_storage_mode); } return result; @@ -288,18 +308,47 @@ pub const FrameState = struct { const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg); const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText); - pub fn init(device: objc.Object) !FrameState { + pub fn init( + device: objc.Object, + /// Storage mode for buffers and textures. + storage_mode: mtl.MTLResourceOptions.StorageMode, + ) !FrameState { // Uniform buffer contains exactly 1 uniform struct. The // uniform data will be undefined so this must be set before // a frame is drawn. - var uniforms = try UniformBuffer.init(device, 1); + var uniforms = try UniformBuffer.init( + device, + 1, + .{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = storage_mode, + }, + ); errdefer uniforms.deinit(); // Create the buffers for our vertex data. The preallocation size // is likely too small but our first frame update will resize it. - var cells = try CellTextBuffer.init(device, 10 * 10); + var cells = try CellTextBuffer.init( + device, + 10 * 10, + .{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = storage_mode, + }, + ); errdefer cells.deinit(); - var cells_bg = try CellBgBuffer.init(device, 10 * 10); + var cells_bg = try CellBgBuffer.init( + device, + 10 * 10, + .{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = storage_mode, + }, + ); + errdefer cells_bg.deinit(); // Initialize our textures for our font atlas. @@ -307,13 +356,13 @@ pub const FrameState = struct { .data = undefined, .size = 8, .format = .grayscale, - }); + }, storage_mode); errdefer grayscale.release(); const color = try initAtlasTexture(device, &.{ .data = undefined, .size = 8, .format = .rgba, - }); + }, storage_mode); errdefer color.release(); return .{ @@ -1215,7 +1264,11 @@ pub fn updateFrame( .replace_gray_alpha, .replace_rgb, .replace_rgba, - => try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device), + => try kv.value_ptr.image.upload( + self.alloc, + self.gpu_state.device, + self.gpu_state.default_storage_mode, + ), .unload_pending, .unload_replace, @@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void { self.font_grid.lock.lockShared(); defer self.font_grid.lock.unlockShared(); frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic); - try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale); + try syncAtlasTexture( + self.gpu_state.device, + &self.font_grid.atlas_grayscale, + &frame.grayscale, + self.gpu_state.default_storage_mode, + ); } texture: { const modified = self.font_grid.atlas_color.modified.load(.monotonic); @@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void { self.font_grid.lock.lockShared(); defer self.font_grid.lock.unlockShared(); frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic); - try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color); + try syncAtlasTexture( + self.gpu_state.device, + &self.font_grid.atlas_color, + &frame.color, + self.gpu_state.default_storage_mode, + ); } // Command buffer (MTLCommandBuffer) @@ -1618,7 +1681,11 @@ fn drawImagePlacement( @as(f32, @floatFromInt(p.width)), @as(f32, @floatFromInt(p.height)), }, - }}); + }}, .{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = self.gpu_state.default_storage_mode, + }); defer buf.deinit(); // Set our buffer @@ -3217,14 +3284,20 @@ fn addPreeditCell( /// Sync the atlas data to the given texture. This copies the bytes /// associated with the atlas to the given texture. If the atlas no longer /// fits into the texture, the texture will be resized. -fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void { +fn syncAtlasTexture( + device: objc.Object, + atlas: *const font.Atlas, + texture: *objc.Object, + /// Storage mode for the MTLTexture object + storage_mode: mtl.MTLResourceOptions.StorageMode, +) !void { const width = texture.getProperty(c_ulong, "width"); if (atlas.size > width) { // Free our old texture texture.*.release(); // Reallocate - texture.* = try initAtlasTexture(device, atlas); + texture.* = try initAtlasTexture(device, atlas, storage_mode); } texture.msgSend( @@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj } /// Initialize a MTLTexture object for the given atlas. -fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object { +fn initAtlasTexture( + device: objc.Object, + atlas: *const font.Atlas, + /// Storage mode for the MTLTexture object + storage_mode: mtl.MTLResourceOptions.StorageMode, +) !objc.Object { // Determine our pixel format const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) { .grayscale => .r8unorm, @@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object desc.setProperty("width", @as(c_ulong, @intCast(atlas.size))); desc.setProperty("height", @as(c_ulong, @intCast(atlas.size))); - // Xcode tells us that this texture should be shared mode on - // aarch64. This configuration is not supported on x86_64 so - // we only set it on aarch64. - if (comptime builtin.target.cpu.arch == .aarch64) { - desc.setProperty( - "storageMode", - @as(c_ulong, mtl.MTLResourceStorageModeShared), - ); - } + desc.setProperty( + "resourceOptions", + mtl.MTLResourceOptions{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = storage_mode, + }, + ); // Initialize const id = device.msgSend( diff --git a/src/renderer/metal/api.zig b/src/renderer/metal/api.zig index 6ab42bbd6..535a0b42b 100644 --- a/src/renderer/metal/api.zig +++ b/src/renderer/metal/api.zig @@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) { store = 1, }; -/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc -pub const MTLStorageMode = enum(c_ulong) { - shared = 0, - managed = 1, - private = 2, - memoryless = 3, +/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc +pub const MTLResourceOptions = packed struct(c_ulong) { + /// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc + cpu_cache_mode: CPUCacheMode = .default, + /// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc + storage_mode: StorageMode, + /// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc + hazard_tracking_mode: HazardTrackingMode = .default, + + _pad: @Type(.{ + .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 }, + }) = 0, + + pub const CPUCacheMode = enum(u4) { + default = 0, + write_combined = 1, + }; + + pub const StorageMode = enum(u4) { + shared = 0, + managed = 1, + private = 2, + memoryless = 3, + }; + + pub const HazardTrackingMode = enum(u2) { + default = 0, + untracked = 1, + tracked = 2, + }; }; /// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc @@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) { pixel_format_view = 8, }; -/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc -/// (incomplete, we only use this mode so we just hardcode it) -pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4; - pub const MTLClearColor = extern struct { red: f64, green: f64, diff --git a/src/renderer/metal/buffer.zig b/src/renderer/metal/buffer.zig index 55a207f03..4128e297b 100644 --- a/src/renderer/metal/buffer.zig +++ b/src/renderer/metal/buffer.zig @@ -2,6 +2,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const objc = @import("objc"); +const macos = @import("macos"); const mtl = @import("api.zig"); @@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type { return struct { const Self = @This(); + /// The resource options for this buffer. + options: mtl.MTLResourceOptions, + buffer: objc.Object, // MTLBuffer /// Initialize a buffer with the given length pre-allocated. - pub fn init(device: objc.Object, len: usize) !Self { + pub fn init( + device: objc.Object, + len: usize, + options: mtl.MTLResourceOptions, + ) !Self { const buffer = device.msgSend( objc.Object, objc.sel("newBufferWithLength:options:"), .{ @as(c_ulong, @intCast(len * @sizeOf(T))), - mtl.MTLResourceStorageModeShared, + options, }, ); - return .{ .buffer = buffer }; + return .{ .buffer = buffer, .options = options }; } /// Init the buffer filled with the given data. - pub fn initFill(device: objc.Object, data: []const T) !Self { + pub fn initFill( + device: objc.Object, + data: []const T, + options: mtl.MTLResourceOptions, + ) !Self { const buffer = device.msgSend( objc.Object, objc.sel("newBufferWithBytes:length:options:"), .{ @as(*const anyopaque, @ptrCast(data.ptr)), @as(c_ulong, @intCast(data.len * @sizeOf(T))), - mtl.MTLResourceStorageModeShared, + options, }, ); - return .{ .buffer = buffer }; + return .{ .buffer = buffer, .options = options }; } pub fn deinit(self: *Self) void { @@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type { objc.sel("newBufferWithLength:options:"), .{ @as(c_ulong, @intCast(size * @sizeOf(T))), - mtl.MTLResourceStorageModeShared, + self.options, }, ); } @@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type { }; @memcpy(dst, src); + + // If we're using the managed resource storage mode, then + // we need to signal Metal to synchronize the buffer data. + // + // Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc + if (self.options.storage_mode == .managed) { + self.buffer.msgSend( + void, + "didModifyRange:", + .{macos.foundation.Range.init(0, req_bytes)}, + ); + } } /// Like Buffer.sync but takes data from an array of ArrayLists, @@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type { objc.sel("newBufferWithLength:options:"), .{ @as(c_ulong, @intCast(size * @sizeOf(T))), - mtl.MTLResourceStorageModeShared, + self.options, }, ); } @@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type { i += list.items.len * @sizeOf(T); } + // If we're using the managed resource storage mode, then + // we need to signal Metal to synchronize the buffer data. + // + // Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc + if (self.options.storage_mode == .managed) { + self.buffer.msgSend( + void, + "didModifyRange:", + .{macos.foundation.Range.init(0, req_bytes)}, + ); + } + return total_len; } }; diff --git a/src/renderer/metal/image.zig b/src/renderer/metal/image.zig index 9d72cae96..835fbd672 100644 --- a/src/renderer/metal/image.zig +++ b/src/renderer/metal/image.zig @@ -358,6 +358,8 @@ pub const Image = union(enum) { self: *Image, alloc: Allocator, device: objc.Object, + /// Storage mode for the MTLTexture object + storage_mode: mtl.MTLResourceOptions.StorageMode, ) !void { // Convert our data if we have to try self.convert(alloc); @@ -366,7 +368,7 @@ pub const Image = union(enum) { const p = self.pending().?; // Create our texture - const texture = try initTexture(p, device); + const texture = try initTexture(p, device, storage_mode); errdefer texture.msgSend(void, objc.sel("release"), .{}); // Upload our data @@ -424,7 +426,12 @@ pub const Image = union(enum) { }; } - fn initTexture(p: Pending, device: objc.Object) !objc.Object { + fn initTexture( + p: Pending, + device: objc.Object, + /// Storage mode for the MTLTexture object + storage_mode: mtl.MTLResourceOptions.StorageMode, + ) !objc.Object { // Create our descriptor const desc = init: { const Class = objc.getClass("MTLTextureDescriptor").?; @@ -438,6 +445,15 @@ pub const Image = union(enum) { desc.setProperty("width", @as(c_ulong, @intCast(p.width))); desc.setProperty("height", @as(c_ulong, @intCast(p.height))); + desc.setProperty( + "resourceOptions", + mtl.MTLResourceOptions{ + // Indicate that the CPU writes to this resource but never reads it. + .cpu_cache_mode = .write_combined, + .storage_mode = storage_mode, + }, + ); + // Initialize const id = device.msgSend( ?*anyopaque,