Metal: use "Managed" resource storage mode on discrete GPUs (#5625)

Discrete GPUs cannot use the "shared" storage mode. This causes
undefined behavior right now, and I believe it's what's causing a
problem on Intel systems with discrete GPUs with "inverted" cells.
(Observed in discussion #5597)

This commit also sets the CPU cache mode to "write combined" for our
resources since we don't read them back so Metal can optimize them
further with this hint.
This commit is contained in:
Qwerasd
2025-02-07 13:49:39 -05:00
committed by GitHub
4 changed files with 193 additions and 44 deletions

View File

@ -182,15 +182,34 @@ pub const GPUState = struct {
/// This buffer is written exactly once so we can use it globally.
instance: InstanceBuffer, // MTLBuffer
/// The default storage mode to use for resources created with our device.
///
/// This is based on whether the device is a discrete GPU or not, since
/// discrete GPUs do not have unified memory and therefore do not support
/// the "shared" storage mode, instead we have to use the "managed" mode.
default_storage_mode: mtl.MTLResourceOptions.StorageMode,
pub fn init() !GPUState {
const device = try chooseDevice();
const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
errdefer queue.release();
// We determine whether our device is a discrete GPU based on these:
// - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
// - We're not on aarch64 (Apple Silicon, therefore integrated).
// - The device reports that it does not have unified memory.
const is_discrete =
builtin.target.os.tag == .macos and
builtin.target.cpu.arch != .aarch64 and
!device.getProperty(bool, "hasUnifiedMemory");
const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
if (is_discrete) .managed else .shared;
var instance = try InstanceBuffer.initFill(device, &.{
0, 1, 3, // Top-left triangle
1, 2, 3, // Bottom-right triangle
});
}, .{ .storage_mode = default_storage_mode });
errdefer instance.deinit();
var result: GPUState = .{
@ -198,11 +217,12 @@ pub const GPUState = struct {
.queue = queue,
.instance = instance,
.frames = undefined,
.default_storage_mode = default_storage_mode,
};
// Initialize all of our frame state.
for (&result.frames) |*frame| {
frame.* = try FrameState.init(result.device);
frame.* = try FrameState.init(result.device, default_storage_mode);
}
return result;
@ -288,18 +308,47 @@ pub const FrameState = struct {
const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);
pub fn init(device: objc.Object) !FrameState {
pub fn init(
device: objc.Object,
/// Storage mode for buffers and textures.
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !FrameState {
// Uniform buffer contains exactly 1 uniform struct. The
// uniform data will be undefined so this must be set before
// a frame is drawn.
var uniforms = try UniformBuffer.init(device, 1);
var uniforms = try UniformBuffer.init(
device,
1,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer uniforms.deinit();
// Create the buffers for our vertex data. The preallocation size
// is likely too small but our first frame update will resize it.
var cells = try CellTextBuffer.init(device, 10 * 10);
var cells = try CellTextBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells.deinit();
var cells_bg = try CellBgBuffer.init(device, 10 * 10);
var cells_bg = try CellBgBuffer.init(
device,
10 * 10,
.{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
errdefer cells_bg.deinit();
// Initialize our textures for our font atlas.
@ -307,13 +356,13 @@ pub const FrameState = struct {
.data = undefined,
.size = 8,
.format = .grayscale,
});
}, storage_mode);
errdefer grayscale.release();
const color = try initAtlasTexture(device, &.{
.data = undefined,
.size = 8,
.format = .rgba,
});
}, storage_mode);
errdefer color.release();
return .{
@ -1215,7 +1264,11 @@ pub fn updateFrame(
.replace_gray_alpha,
.replace_rgb,
.replace_rgba,
=> try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device),
=> try kv.value_ptr.image.upload(
self.alloc,
self.gpu_state.device,
self.gpu_state.default_storage_mode,
),
.unload_pending,
.unload_replace,
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared();
frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale);
try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_grayscale,
&frame.grayscale,
self.gpu_state.default_storage_mode,
);
}
texture: {
const modified = self.font_grid.atlas_color.modified.load(.monotonic);
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
self.font_grid.lock.lockShared();
defer self.font_grid.lock.unlockShared();
frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color);
try syncAtlasTexture(
self.gpu_state.device,
&self.font_grid.atlas_color,
&frame.color,
self.gpu_state.default_storage_mode,
);
}
// Command buffer (MTLCommandBuffer)
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
@as(f32, @floatFromInt(p.width)),
@as(f32, @floatFromInt(p.height)),
},
}});
}}, .{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = self.gpu_state.default_storage_mode,
});
defer buf.deinit();
// Set our buffer
@ -3217,14 +3284,20 @@ fn addPreeditCell(
/// Sync the atlas data to the given texture. This copies the bytes
/// associated with the atlas to the given texture. If the atlas no longer
/// fits into the texture, the texture will be resized.
fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void {
fn syncAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
texture: *objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void {
const width = texture.getProperty(c_ulong, "width");
if (atlas.size > width) {
// Free our old texture
texture.*.release();
// Reallocate
texture.* = try initAtlasTexture(device, atlas);
texture.* = try initAtlasTexture(device, atlas, storage_mode);
}
texture.msgSend(
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
}
/// Initialize a MTLTexture object for the given atlas.
fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object {
fn initAtlasTexture(
device: objc.Object,
atlas: *const font.Atlas,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Determine our pixel format
const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
.grayscale => .r8unorm,
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));
// Xcode tells us that this texture should be shared mode on
// aarch64. This configuration is not supported on x86_64 so
// we only set it on aarch64.
if (comptime builtin.target.cpu.arch == .aarch64) {
desc.setProperty(
"storageMode",
@as(c_ulong, mtl.MTLResourceStorageModeShared),
);
}
desc.setProperty(
"resourceOptions",
mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
// Initialize
const id = device.msgSend(

View File

@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) {
store = 1,
};
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
pub const MTLStorageMode = enum(c_ulong) {
shared = 0,
managed = 1,
private = 2,
memoryless = 3,
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
pub const MTLResourceOptions = packed struct(c_ulong) {
/// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
cpu_cache_mode: CPUCacheMode = .default,
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
storage_mode: StorageMode,
/// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
hazard_tracking_mode: HazardTrackingMode = .default,
_pad: @Type(.{
.Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
}) = 0,
pub const CPUCacheMode = enum(u4) {
default = 0,
write_combined = 1,
};
pub const StorageMode = enum(u4) {
shared = 0,
managed = 1,
private = 2,
memoryless = 3,
};
pub const HazardTrackingMode = enum(u2) {
default = 0,
untracked = 1,
tracked = 2,
};
};
/// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
pixel_format_view = 8,
};
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
/// (incomplete, we only use this mode so we just hardcode it)
pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
pub const MTLClearColor = extern struct {
red: f64,
green: f64,

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const objc = @import("objc");
const macos = @import("macos");
const mtl = @import("api.zig");
@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
return struct {
const Self = @This();
/// The resource options for this buffer.
options: mtl.MTLResourceOptions,
buffer: objc.Object, // MTLBuffer
/// Initialize a buffer with the given length pre-allocated.
pub fn init(device: objc.Object, len: usize) !Self {
pub fn init(
device: objc.Object,
len: usize,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend(
objc.Object,
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
options,
},
);
return .{ .buffer = buffer };
return .{ .buffer = buffer, .options = options };
}
/// Init the buffer filled with the given data.
pub fn initFill(device: objc.Object, data: []const T) !Self {
pub fn initFill(
device: objc.Object,
data: []const T,
options: mtl.MTLResourceOptions,
) !Self {
const buffer = device.msgSend(
objc.Object,
objc.sel("newBufferWithBytes:length:options:"),
.{
@as(*const anyopaque, @ptrCast(data.ptr)),
@as(c_ulong, @intCast(data.len * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
options,
},
);
return .{ .buffer = buffer };
return .{ .buffer = buffer, .options = options };
}
pub fn deinit(self: *Self) void {
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
self.options,
},
);
}
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
};
@memcpy(dst, src);
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
}
/// Like Buffer.sync but takes data from an array of ArrayLists,
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
objc.sel("newBufferWithLength:options:"),
.{
@as(c_ulong, @intCast(size * @sizeOf(T))),
mtl.MTLResourceStorageModeShared,
self.options,
},
);
}
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
i += list.items.len * @sizeOf(T);
}
// If we're using the managed resource storage mode, then
// we need to signal Metal to synchronize the buffer data.
//
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
if (self.options.storage_mode == .managed) {
self.buffer.msgSend(
void,
"didModifyRange:",
.{macos.foundation.Range.init(0, req_bytes)},
);
}
return total_len;
}
};

View File

@ -358,6 +358,8 @@ pub const Image = union(enum) {
self: *Image,
alloc: Allocator,
device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !void {
// Convert our data if we have to
try self.convert(alloc);
@ -366,7 +368,7 @@ pub const Image = union(enum) {
const p = self.pending().?;
// Create our texture
const texture = try initTexture(p, device);
const texture = try initTexture(p, device, storage_mode);
errdefer texture.msgSend(void, objc.sel("release"), .{});
// Upload our data
@ -424,7 +426,12 @@ pub const Image = union(enum) {
};
}
fn initTexture(p: Pending, device: objc.Object) !objc.Object {
fn initTexture(
p: Pending,
device: objc.Object,
/// Storage mode for the MTLTexture object
storage_mode: mtl.MTLResourceOptions.StorageMode,
) !objc.Object {
// Create our descriptor
const desc = init: {
const Class = objc.getClass("MTLTextureDescriptor").?;
@ -438,6 +445,15 @@ pub const Image = union(enum) {
desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
desc.setProperty("height", @as(c_ulong, @intCast(p.height)));
desc.setProperty(
"resourceOptions",
mtl.MTLResourceOptions{
// Indicate that the CPU writes to this resource but never reads it.
.cpu_cache_mode = .write_combined,
.storage_mode = storage_mode,
},
);
// Initialize
const id = device.msgSend(
?*anyopaque,