Metal: use "Managed" resource storage mode on discrete GPUs (#5625)

Discrete GPUs cannot use the "shared" storage mode. This causes
undefined behavior right now, and I believe it's what's causing a
problem on Intel systems with discrete GPUs with "inverted" cells.
(Observed in discussion #5597)

This commit also sets the CPU cache mode to "write combined" for our
resources since we don't read them back so Metal can optimize them
further with this hint.
This commit is contained in:
Qwerasd
2025-02-07 13:49:39 -05:00
committed by GitHub
4 changed files with 193 additions and 44 deletions

View File

@ -182,15 +182,34 @@ pub const GPUState = struct {
/// This buffer is written exactly once so we can use it globally. /// This buffer is written exactly once so we can use it globally.
instance: InstanceBuffer, // MTLBuffer instance: InstanceBuffer, // MTLBuffer
/// The default storage mode to use for resources created with our device.
///
/// This is based on whether the device is a discrete GPU or not, since
/// discrete GPUs do not have unified memory and therefore do not support
/// the "shared" storage mode, instead we have to use the "managed" mode.
default_storage_mode: mtl.MTLResourceOptions.StorageMode,
pub fn init() !GPUState { pub fn init() !GPUState {
const device = try chooseDevice(); const device = try chooseDevice();
const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{}); const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
errdefer queue.release(); errdefer queue.release();
// We determine whether our device is a discrete GPU based on these:
// - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
// - We're not on aarch64 (Apple Silicon, therefore integrated).
// - The device reports that it does not have unified memory.
const is_discrete =
builtin.target.os.tag == .macos and
builtin.target.cpu.arch != .aarch64 and
!device.getProperty(bool, "hasUnifiedMemory");
const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
if (is_discrete) .managed else .shared;
var instance = try InstanceBuffer.initFill(device, &.{ var instance = try InstanceBuffer.initFill(device, &.{
0, 1, 3, // Top-left triangle 0, 1, 3, // Top-left triangle
1, 2, 3, // Bottom-right triangle 1, 2, 3, // Bottom-right triangle
}); }, .{ .storage_mode = default_storage_mode });
errdefer instance.deinit(); errdefer instance.deinit();
var result: GPUState = .{ var result: GPUState = .{
@ -198,11 +217,12 @@ pub const GPUState = struct {
.queue = queue, .queue = queue,
.instance = instance, .instance = instance,
.frames = undefined, .frames = undefined,
.default_storage_mode = default_storage_mode,
}; };
// Initialize all of our frame state. // Initialize all of our frame state.
for (&result.frames) |*frame| { for (&result.frames) |*frame| {
frame.* = try FrameState.init(result.device); frame.* = try FrameState.init(result.device, default_storage_mode);
} }
return result; return result;
@ -288,18 +308,47 @@ pub const FrameState = struct {
const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg); const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText); const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);
pub fn init(device: objc.Object) !FrameState { pub fn init(
device: objc.Object,
/// Storage mode for buffers and textures.
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !FrameState {
// Uniform buffer contains exactly 1 uniform struct. The // Uniform buffer contains exactly 1 uniform struct. The
// uniform data will be undefined so this must be set before // uniform data will be undefined so this must be set before
// a frame is drawn. // a frame is drawn.
var uniforms = try UniformBuffer.init(device, 1); var uniforms = try UniformBuffer.init(
device,
1,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer uniforms.deinit(); errdefer uniforms.deinit();
// Create the buffers for our vertex data. The preallocation size // Create the buffers for our vertex data. The preallocation size
// is likely too small but our first frame update will resize it. // is likely too small but our first frame update will resize it.
var cells = try CellTextBuffer.init(device, 10 * 10); var cells = try CellTextBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells.deinit(); errdefer cells.deinit();
var cells_bg = try CellBgBuffer.init(device, 10 * 10); var cells_bg = try CellBgBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells_bg.deinit(); errdefer cells_bg.deinit();
// Initialize our textures for our font atlas. // Initialize our textures for our font atlas.
@ -307,13 +356,13 @@ pub const FrameState = struct {
.data = undefined, .data = undefined,
.size = 8, .size = 8,
.format = .grayscale, .format = .grayscale,
}); }, storage_mode);
errdefer grayscale.release(); errdefer grayscale.release();
const color = try initAtlasTexture(device, &.{ const color = try initAtlasTexture(device, &.{
.data = undefined, .data = undefined,
.size = 8, .size = 8,
.format = .rgba, .format = .rgba,
}); }, storage_mode);
errdefer color.release(); errdefer color.release();
return .{ return .{
@ -1215,7 +1264,11 @@ pub fn updateFrame(
.replace_gray_alpha, .replace_gray_alpha,
.replace_rgb, .replace_rgb,
.replace_rgba, .replace_rgba,
=> try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device), => try kv.value_ptr.image.upload(
self.alloc,
self.gpu_state.device,
self.gpu_state.default_storage_mode,
),
.unload_pending, .unload_pending,
.unload_replace, .unload_replace,
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared(); self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared(); defer self.font_grid.lock.unlockShared();
frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic); frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale); try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_grayscale,
&frame.grayscale,
self.gpu_state.default_storage_mode,
);
} }
texture: { texture: {
const modified = self.font_grid.atlas_color.modified.load(.monotonic); const modified = self.font_grid.atlas_color.modified.load(.monotonic);
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared(); self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared(); defer self.font_grid.lock.unlockShared();
frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic); frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color); try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_color,
&frame.color,
self.gpu_state.default_storage_mode,
);
} }
// Command buffer (MTLCommandBuffer) // Command buffer (MTLCommandBuffer)
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
@as(f32, @floatFromInt(p.width)), @as(f32, @floatFromInt(p.width)),
@as(f32, @floatFromInt(p.height)), @as(f32, @floatFromInt(p.height)),
}, },
}}); }}, .{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = self.gpu_state.default_storage_mode,
});
defer buf.deinit(); defer buf.deinit();
// Set our buffer // Set our buffer
@ -3217,14 +3284,20 @@ fn addPreeditCell(
/// Sync the atlas data to the given texture. This copies the bytes /// Sync the atlas data to the given texture. This copies the bytes
/// associated with the atlas to the given texture. If the atlas no longer /// associated with the atlas to the given texture. If the atlas no longer
/// fits into the texture, the texture will be resized. /// fits into the texture, the texture will be resized.
fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void { fn syncAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
texture: *objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void {
const width = texture.getProperty(c_ulong, "width"); const width = texture.getProperty(c_ulong, "width");
if (atlas.size > width) { if (atlas.size > width) {
// Free our old texture // Free our old texture
texture.*.release(); texture.*.release();
// Reallocate // Reallocate
texture.* = try initAtlasTexture(device, atlas); texture.* = try initAtlasTexture(device, atlas, storage_mode);
} }
texture.msgSend( texture.msgSend(
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
} }
/// Initialize a MTLTexture object for the given atlas. /// Initialize a MTLTexture object for the given atlas.
fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object { fn initAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Determine our pixel format // Determine our pixel format
const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) { const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
.grayscale => .r8unorm, .grayscale => .r8unorm,
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
desc.setProperty("width", @as(c_ulong, @intCast(atlas.size))); desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
desc.setProperty("height", @as(c_ulong, @intCast(atlas.size))); desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));
// Xcode tells us that this texture should be shared mode on
// aarch64. This configuration is not supported on x86_64 so
// we only set it on aarch64.
if (comptime builtin.target.cpu.arch == .aarch64) {
desc.setProperty( desc.setProperty(
"storageMode", "resourceOptions",
@as(c_ulong, mtl.MTLResourceStorageModeShared), mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
); );
}
// Initialize // Initialize
const id = device.msgSend( const id = device.msgSend(

View File

@ -24,14 +24,38 @@ pub const MTLStoreAction = enum(c_ulong) {
store = 1, store = 1,
}; };
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
pub const MTLResourceOptions = packed struct(c_ulong) {
/// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
cpu_cache_mode: CPUCacheMode = .default,
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc /// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
pub const MTLStorageMode = enum(c_ulong) { storage_mode: StorageMode,
/// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
hazard_tracking_mode: HazardTrackingMode = .default,
_pad: @Type(.{
.Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
}) = 0,
pub const CPUCacheMode = enum(u4) {
default = 0,
write_combined = 1,
};
pub const StorageMode = enum(u4) {
shared = 0, shared = 0,
managed = 1, managed = 1,
private = 2, private = 2,
memoryless = 3, memoryless = 3,
}; };
pub const HazardTrackingMode = enum(u2) {
default = 0,
untracked = 1,
tracked = 2,
};
};
/// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc /// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
pub const MTLPrimitiveType = enum(c_ulong) { pub const MTLPrimitiveType = enum(c_ulong) {
point = 0, point = 0,
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
pixel_format_view = 8, pixel_format_view = 8,
}; };
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
/// (incomplete, we only use this mode so we just hardcode it)
pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
pub const MTLClearColor = extern struct { pub const MTLClearColor = extern struct {
red: f64, red: f64,
green: f64, green: f64,

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const assert = std.debug.assert; const assert = std.debug.assert;
const objc = @import("objc"); const objc = @import("objc");
const macos = @import("macos");
const mtl = @import("api.zig"); const mtl = @import("api.zig");
@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
return struct { return struct {
const Self = @This(); const Self = @This();
/// The resource options for this buffer.
options: mtl.MTLResourceOptions,
buffer: objc.Object, // MTLBuffer buffer: objc.Object, // MTLBuffer
/// Initialize a buffer with the given length pre-allocated. /// Initialize a buffer with the given length pre-allocated.
pub fn init(device: objc.Object, len: usize) !Self { pub fn init(
device: objc.Object,
len: usize,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend( const buffer = device.msgSend(
objc.Object, objc.Object,
objc.sel("newBufferWithLength:options:"), objc.sel("newBufferWithLength:options:"),
.{ .{
@as(c_ulong, @intCast(len * @sizeOf(T))), @as(c_ulong, @intCast(len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared, options,
}, },
); );
return .{ .buffer = buffer }; return .{ .buffer = buffer, .options = options };
} }
/// Init the buffer filled with the given data. /// Init the buffer filled with the given data.
pub fn initFill(device: objc.Object, data: []const T) !Self { pub fn initFill(
device: objc.Object,
data: []const T,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend( const buffer = device.msgSend(
objc.Object, objc.Object,
objc.sel("newBufferWithBytes:length:options:"), objc.sel("newBufferWithBytes:length:options:"),
.{ .{
@as(*const anyopaque, @ptrCast(data.ptr)), @as(*const anyopaque, @ptrCast(data.ptr)),
@as(c_ulong, @intCast(data.len * @sizeOf(T))), @as(c_ulong, @intCast(data.len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared, options,
}, },
); );
return .{ .buffer = buffer }; return .{ .buffer = buffer, .options = options };
} }
pub fn deinit(self: *Self) void { pub fn deinit(self: *Self) void {
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"), objc.sel("newBufferWithLength:options:"),
.{ .{
@as(c_ulong, @intCast(size * @sizeOf(T))), @as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared, self.options,
}, },
); );
} }
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
}; };
@memcpy(dst, src); @memcpy(dst, src);
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
} }
/// Like Buffer.sync but takes data from an array of ArrayLists, /// Like Buffer.sync but takes data from an array of ArrayLists,
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"), objc.sel("newBufferWithLength:options:"),
.{ .{
@as(c_ulong, @intCast(size * @sizeOf(T))), @as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared, self.options,
}, },
); );
} }
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
i += list.items.len * @sizeOf(T); i += list.items.len * @sizeOf(T);
} }
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
return total_len; return total_len;
} }
}; };

View File

@ -358,6 +358,8 @@ pub const Image = union(enum) {
self: *Image, self: *Image,
alloc: Allocator, alloc: Allocator,
device: objc.Object, device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void { ) !void {
// Convert our data if we have to // Convert our data if we have to
try self.convert(alloc); try self.convert(alloc);
@ -366,7 +368,7 @@ pub const Image = union(enum) {
const p = self.pending().?; const p = self.pending().?;
// Create our texture // Create our texture
const texture = try initTexture(p, device); const texture = try initTexture(p, device, storage_mode);
errdefer texture.msgSend(void, objc.sel("release"), .{}); errdefer texture.msgSend(void, objc.sel("release"), .{});
// Upload our data // Upload our data
@ -424,7 +426,12 @@ pub const Image = union(enum) {
}; };
} }
fn initTexture(p: Pending, device: objc.Object) !objc.Object { fn initTexture(
p: Pending,
device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Create our descriptor // Create our descriptor
const desc = init: { const desc = init: {
const Class = objc.getClass("MTLTextureDescriptor").?; const Class = objc.getClass("MTLTextureDescriptor").?;
@ -438,6 +445,15 @@ pub const Image = union(enum) {
desc.setProperty("width", @as(c_ulong, @intCast(p.width))); desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
desc.setProperty("height", @as(c_ulong, @intCast(p.height))); desc.setProperty("height", @as(c_ulong, @intCast(p.height)));
desc.setProperty(
"resourceOptions",
mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
// Initialize // Initialize
const id = device.msgSend( const id = device.msgSend(
?*anyopaque, ?*anyopaque,