mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-16 16:56:09 +03:00
Metal: use "Managed" resource storage mode on discrete GPUs (#5625)
Discrete GPUs cannot use the "shared" storage mode. This causes undefined behavior right now, and I believe it's what's causing a problem on Intel systems with discrete GPUs with "inverted" cells. (Observed in discussion #5597) This commit also sets the CPU cache mode to "write combined" for our resources since we don't read them back so Metal can optimize them further with this hint.
This commit is contained in:
@ -182,15 +182,34 @@ pub const GPUState = struct {
|
||||
/// This buffer is written exactly once so we can use it globally.
|
||||
instance: InstanceBuffer, // MTLBuffer
|
||||
|
||||
/// The default storage mode to use for resources created with our device.
|
||||
///
|
||||
/// This is based on whether the device is a discrete GPU or not, since
|
||||
/// discrete GPUs do not have unified memory and therefore do not support
|
||||
/// the "shared" storage mode, instead we have to use the "managed" mode.
|
||||
default_storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
|
||||
pub fn init() !GPUState {
|
||||
const device = try chooseDevice();
|
||||
const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
|
||||
errdefer queue.release();
|
||||
|
||||
// We determine whether our device is a discrete GPU based on these:
|
||||
// - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
|
||||
// - We're not on aarch64 (Apple Silicon, therefore integrated).
|
||||
// - The device reports that it does not have unified memory.
|
||||
const is_discrete =
|
||||
builtin.target.os.tag == .macos and
|
||||
builtin.target.cpu.arch != .aarch64 and
|
||||
!device.getProperty(bool, "hasUnifiedMemory");
|
||||
|
||||
const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
|
||||
if (is_discrete) .managed else .shared;
|
||||
|
||||
var instance = try InstanceBuffer.initFill(device, &.{
|
||||
0, 1, 3, // Top-left triangle
|
||||
1, 2, 3, // Bottom-right triangle
|
||||
});
|
||||
}, .{ .storage_mode = default_storage_mode });
|
||||
errdefer instance.deinit();
|
||||
|
||||
var result: GPUState = .{
|
||||
@ -198,11 +217,12 @@ pub const GPUState = struct {
|
||||
.queue = queue,
|
||||
.instance = instance,
|
||||
.frames = undefined,
|
||||
.default_storage_mode = default_storage_mode,
|
||||
};
|
||||
|
||||
// Initialize all of our frame state.
|
||||
for (&result.frames) |*frame| {
|
||||
frame.* = try FrameState.init(result.device);
|
||||
frame.* = try FrameState.init(result.device, default_storage_mode);
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -288,18 +308,47 @@ pub const FrameState = struct {
|
||||
const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
|
||||
const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);
|
||||
|
||||
pub fn init(device: objc.Object) !FrameState {
|
||||
pub fn init(
|
||||
device: objc.Object,
|
||||
/// Storage mode for buffers and textures.
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !FrameState {
|
||||
// Uniform buffer contains exactly 1 uniform struct. The
|
||||
// uniform data will be undefined so this must be set before
|
||||
// a frame is drawn.
|
||||
var uniforms = try UniformBuffer.init(device, 1);
|
||||
var uniforms = try UniformBuffer.init(
|
||||
device,
|
||||
1,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
errdefer uniforms.deinit();
|
||||
|
||||
// Create the buffers for our vertex data. The preallocation size
|
||||
// is likely too small but our first frame update will resize it.
|
||||
var cells = try CellTextBuffer.init(device, 10 * 10);
|
||||
var cells = try CellTextBuffer.init(
|
||||
device,
|
||||
10 * 10,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
errdefer cells.deinit();
|
||||
var cells_bg = try CellBgBuffer.init(device, 10 * 10);
|
||||
var cells_bg = try CellBgBuffer.init(
|
||||
device,
|
||||
10 * 10,
|
||||
.{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
errdefer cells_bg.deinit();
|
||||
|
||||
// Initialize our textures for our font atlas.
|
||||
@ -307,13 +356,13 @@ pub const FrameState = struct {
|
||||
.data = undefined,
|
||||
.size = 8,
|
||||
.format = .grayscale,
|
||||
});
|
||||
}, storage_mode);
|
||||
errdefer grayscale.release();
|
||||
const color = try initAtlasTexture(device, &.{
|
||||
.data = undefined,
|
||||
.size = 8,
|
||||
.format = .rgba,
|
||||
});
|
||||
}, storage_mode);
|
||||
errdefer color.release();
|
||||
|
||||
return .{
|
||||
@ -1215,7 +1264,11 @@ pub fn updateFrame(
|
||||
.replace_gray_alpha,
|
||||
.replace_rgb,
|
||||
.replace_rgba,
|
||||
=> try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device),
|
||||
=> try kv.value_ptr.image.upload(
|
||||
self.alloc,
|
||||
self.gpu_state.device,
|
||||
self.gpu_state.default_storage_mode,
|
||||
),
|
||||
|
||||
.unload_pending,
|
||||
.unload_replace,
|
||||
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
|
||||
self.font_grid.lock.lockShared();
|
||||
defer self.font_grid.lock.unlockShared();
|
||||
frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
|
||||
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale);
|
||||
try syncAtlasTexture(
|
||||
self.gpu_state.device,
|
||||
&self.font_grid.atlas_grayscale,
|
||||
&frame.grayscale,
|
||||
self.gpu_state.default_storage_mode,
|
||||
);
|
||||
}
|
||||
texture: {
|
||||
const modified = self.font_grid.atlas_color.modified.load(.monotonic);
|
||||
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
|
||||
self.font_grid.lock.lockShared();
|
||||
defer self.font_grid.lock.unlockShared();
|
||||
frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
|
||||
try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color);
|
||||
try syncAtlasTexture(
|
||||
self.gpu_state.device,
|
||||
&self.font_grid.atlas_color,
|
||||
&frame.color,
|
||||
self.gpu_state.default_storage_mode,
|
||||
);
|
||||
}
|
||||
|
||||
// Command buffer (MTLCommandBuffer)
|
||||
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
|
||||
@as(f32, @floatFromInt(p.width)),
|
||||
@as(f32, @floatFromInt(p.height)),
|
||||
},
|
||||
}});
|
||||
}}, .{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = self.gpu_state.default_storage_mode,
|
||||
});
|
||||
defer buf.deinit();
|
||||
|
||||
// Set our buffer
|
||||
@ -3217,14 +3284,20 @@ fn addPreeditCell(
|
||||
/// Sync the atlas data to the given texture. This copies the bytes
|
||||
/// associated with the atlas to the given texture. If the atlas no longer
|
||||
/// fits into the texture, the texture will be resized.
|
||||
fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void {
|
||||
fn syncAtlasTexture(
|
||||
device: objc.Object,
|
||||
atlas: *const font.Atlas,
|
||||
texture: *objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !void {
|
||||
const width = texture.getProperty(c_ulong, "width");
|
||||
if (atlas.size > width) {
|
||||
// Free our old texture
|
||||
texture.*.release();
|
||||
|
||||
// Reallocate
|
||||
texture.* = try initAtlasTexture(device, atlas);
|
||||
texture.* = try initAtlasTexture(device, atlas, storage_mode);
|
||||
}
|
||||
|
||||
texture.msgSend(
|
||||
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
|
||||
}
|
||||
|
||||
/// Initialize a MTLTexture object for the given atlas.
|
||||
fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object {
|
||||
fn initAtlasTexture(
|
||||
device: objc.Object,
|
||||
atlas: *const font.Atlas,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !objc.Object {
|
||||
// Determine our pixel format
|
||||
const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
|
||||
.grayscale => .r8unorm,
|
||||
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
|
||||
desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
|
||||
desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));
|
||||
|
||||
// Xcode tells us that this texture should be shared mode on
|
||||
// aarch64. This configuration is not supported on x86_64 so
|
||||
// we only set it on aarch64.
|
||||
if (comptime builtin.target.cpu.arch == .aarch64) {
|
||||
desc.setProperty(
|
||||
"storageMode",
|
||||
@as(c_ulong, mtl.MTLResourceStorageModeShared),
|
||||
);
|
||||
}
|
||||
desc.setProperty(
|
||||
"resourceOptions",
|
||||
mtl.MTLResourceOptions{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
// Initialize
|
||||
const id = device.msgSend(
|
||||
|
@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) {
|
||||
store = 1,
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
|
||||
pub const MTLStorageMode = enum(c_ulong) {
|
||||
shared = 0,
|
||||
managed = 1,
|
||||
private = 2,
|
||||
memoryless = 3,
|
||||
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
|
||||
pub const MTLResourceOptions = packed struct(c_ulong) {
|
||||
/// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
|
||||
cpu_cache_mode: CPUCacheMode = .default,
|
||||
/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
|
||||
storage_mode: StorageMode,
|
||||
/// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
|
||||
hazard_tracking_mode: HazardTrackingMode = .default,
|
||||
|
||||
_pad: @Type(.{
|
||||
.Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
|
||||
}) = 0,
|
||||
|
||||
pub const CPUCacheMode = enum(u4) {
|
||||
default = 0,
|
||||
write_combined = 1,
|
||||
};
|
||||
|
||||
pub const StorageMode = enum(u4) {
|
||||
shared = 0,
|
||||
managed = 1,
|
||||
private = 2,
|
||||
memoryless = 3,
|
||||
};
|
||||
|
||||
pub const HazardTrackingMode = enum(u2) {
|
||||
default = 0,
|
||||
untracked = 1,
|
||||
tracked = 2,
|
||||
};
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
|
||||
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
|
||||
pixel_format_view = 8,
|
||||
};
|
||||
|
||||
/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
|
||||
/// (incomplete, we only use this mode so we just hardcode it)
|
||||
pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
|
||||
|
||||
pub const MTLClearColor = extern struct {
|
||||
red: f64,
|
||||
green: f64,
|
||||
|
@ -2,6 +2,7 @@ const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const assert = std.debug.assert;
|
||||
const objc = @import("objc");
|
||||
const macos = @import("macos");
|
||||
|
||||
const mtl = @import("api.zig");
|
||||
|
||||
@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The resource options for this buffer.
|
||||
options: mtl.MTLResourceOptions,
|
||||
|
||||
buffer: objc.Object, // MTLBuffer
|
||||
|
||||
/// Initialize a buffer with the given length pre-allocated.
|
||||
pub fn init(device: objc.Object, len: usize) !Self {
|
||||
pub fn init(
|
||||
device: objc.Object,
|
||||
len: usize,
|
||||
options: mtl.MTLResourceOptions,
|
||||
) !Self {
|
||||
const buffer = device.msgSend(
|
||||
objc.Object,
|
||||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(len * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
options,
|
||||
},
|
||||
);
|
||||
|
||||
return .{ .buffer = buffer };
|
||||
return .{ .buffer = buffer, .options = options };
|
||||
}
|
||||
|
||||
/// Init the buffer filled with the given data.
|
||||
pub fn initFill(device: objc.Object, data: []const T) !Self {
|
||||
pub fn initFill(
|
||||
device: objc.Object,
|
||||
data: []const T,
|
||||
options: mtl.MTLResourceOptions,
|
||||
) !Self {
|
||||
const buffer = device.msgSend(
|
||||
objc.Object,
|
||||
objc.sel("newBufferWithBytes:length:options:"),
|
||||
.{
|
||||
@as(*const anyopaque, @ptrCast(data.ptr)),
|
||||
@as(c_ulong, @intCast(data.len * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
options,
|
||||
},
|
||||
);
|
||||
|
||||
return .{ .buffer = buffer };
|
||||
return .{ .buffer = buffer, .options = options };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
|
||||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(size * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
self.options,
|
||||
},
|
||||
);
|
||||
}
|
||||
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
|
||||
};
|
||||
|
||||
@memcpy(dst, src);
|
||||
|
||||
// If we're using the managed resource storage mode, then
|
||||
// we need to signal Metal to synchronize the buffer data.
|
||||
//
|
||||
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
|
||||
if (self.options.storage_mode == .managed) {
|
||||
self.buffer.msgSend(
|
||||
void,
|
||||
"didModifyRange:",
|
||||
.{macos.foundation.Range.init(0, req_bytes)},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Like Buffer.sync but takes data from an array of ArrayLists,
|
||||
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
|
||||
objc.sel("newBufferWithLength:options:"),
|
||||
.{
|
||||
@as(c_ulong, @intCast(size * @sizeOf(T))),
|
||||
mtl.MTLResourceStorageModeShared,
|
||||
self.options,
|
||||
},
|
||||
);
|
||||
}
|
||||
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
|
||||
i += list.items.len * @sizeOf(T);
|
||||
}
|
||||
|
||||
// If we're using the managed resource storage mode, then
|
||||
// we need to signal Metal to synchronize the buffer data.
|
||||
//
|
||||
// Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
|
||||
if (self.options.storage_mode == .managed) {
|
||||
self.buffer.msgSend(
|
||||
void,
|
||||
"didModifyRange:",
|
||||
.{macos.foundation.Range.init(0, req_bytes)},
|
||||
);
|
||||
}
|
||||
|
||||
return total_len;
|
||||
}
|
||||
};
|
||||
|
@ -358,6 +358,8 @@ pub const Image = union(enum) {
|
||||
self: *Image,
|
||||
alloc: Allocator,
|
||||
device: objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !void {
|
||||
// Convert our data if we have to
|
||||
try self.convert(alloc);
|
||||
@ -366,7 +368,7 @@ pub const Image = union(enum) {
|
||||
const p = self.pending().?;
|
||||
|
||||
// Create our texture
|
||||
const texture = try initTexture(p, device);
|
||||
const texture = try initTexture(p, device, storage_mode);
|
||||
errdefer texture.msgSend(void, objc.sel("release"), .{});
|
||||
|
||||
// Upload our data
|
||||
@ -424,7 +426,12 @@ pub const Image = union(enum) {
|
||||
};
|
||||
}
|
||||
|
||||
fn initTexture(p: Pending, device: objc.Object) !objc.Object {
|
||||
fn initTexture(
|
||||
p: Pending,
|
||||
device: objc.Object,
|
||||
/// Storage mode for the MTLTexture object
|
||||
storage_mode: mtl.MTLResourceOptions.StorageMode,
|
||||
) !objc.Object {
|
||||
// Create our descriptor
|
||||
const desc = init: {
|
||||
const Class = objc.getClass("MTLTextureDescriptor").?;
|
||||
@ -438,6 +445,15 @@ pub const Image = union(enum) {
|
||||
desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
|
||||
desc.setProperty("height", @as(c_ulong, @intCast(p.height)));
|
||||
|
||||
desc.setProperty(
|
||||
"resourceOptions",
|
||||
mtl.MTLResourceOptions{
|
||||
// Indicate that the CPU writes to this resource but never reads it.
|
||||
.cpu_cache_mode = .write_combined,
|
||||
.storage_mode = storage_mode,
|
||||
},
|
||||
);
|
||||
|
||||
// Initialize
|
||||
const id = device.msgSend(
|
||||
?*anyopaque,
|
||||
|
Reference in New Issue
Block a user