Metal: use "Managed" resource storage mode on discrete GPUs (#5625)

Discrete GPUs cannot use the "shared" storage mode. This causes undefined behavior right now, and I believe it's what's causing a problem on Intel systems with discrete GPUs with "inverted" cells. (Observed in discussion #5597) This commit also sets the CPU cache mode to "write combined" for our resources since we don't read them back so Metal can optimize them further with this hint.
2025-07-16 16:56:09 +03:00 · 2025-02-07 13:49:39 -05:00
parent 79d6d26784 ea16890fd3
commit f95f636f1f
4 changed files with 193 additions and 44 deletions
--- a/src/renderer/Metal.zig
+++ b/src/renderer/Metal.zig
@ -182,15 +182,34 @@ pub const GPUState = struct {
    /// This buffer is written exactly once so we can use it globally.
    instance: InstanceBuffer, // MTLBuffer

+    /// The default storage mode to use for resources created with our device.
+    ///
+    /// This is based on whether the device is a discrete GPU or not, since
+    /// discrete GPUs do not have unified memory and therefore do not support
+    /// the "shared" storage mode, instead we have to use the "managed" mode.
+    default_storage_mode: mtl.MTLResourceOptions.StorageMode,
+
    pub fn init() !GPUState {
        const device = try chooseDevice();
        const queue = device.msgSend(objc.Object, objc.sel("newCommandQueue"), .{});
        errdefer queue.release();

+        // We determine whether our device is a discrete GPU based on these:
+        // - We're on macOS (iOS, iPadOS, etc. are guaranteed to be integrated).
+        // - We're not on aarch64 (Apple Silicon, therefore integrated).
+        // - The device reports that it does not have unified memory.
+        const is_discrete =
+            builtin.target.os.tag == .macos and
+            builtin.target.cpu.arch != .aarch64 and
+            !device.getProperty(bool, "hasUnifiedMemory");
+
+        const default_storage_mode: mtl.MTLResourceOptions.StorageMode =
+            if (is_discrete) .managed else .shared;
+
        var instance = try InstanceBuffer.initFill(device, &.{
            0, 1, 3, // Top-left triangle
            1, 2, 3, // Bottom-right triangle
-        });
+        }, .{ .storage_mode = default_storage_mode });
        errdefer instance.deinit();

        var result: GPUState = .{
@ -198,11 +217,12 @@ pub const GPUState = struct {
            .queue = queue,
            .instance = instance,
            .frames = undefined,
+            .default_storage_mode = default_storage_mode,
        };

        // Initialize all of our frame state.
        for (&result.frames) |*frame| {
-            frame.* = try FrameState.init(result.device);
+            frame.* = try FrameState.init(result.device, default_storage_mode);
        }

        return result;
@ -288,18 +308,47 @@ pub const FrameState = struct {
    const CellBgBuffer = mtl_buffer.Buffer(mtl_shaders.CellBg);
    const CellTextBuffer = mtl_buffer.Buffer(mtl_shaders.CellText);

-    pub fn init(device: objc.Object) !FrameState {
+    pub fn init(
+        device: objc.Object,
+        /// Storage mode for buffers and textures.
+        storage_mode: mtl.MTLResourceOptions.StorageMode,
+    ) !FrameState {
        // Uniform buffer contains exactly 1 uniform struct. The
        // uniform data will be undefined so this must be set before
        // a frame is drawn.
-        var uniforms = try UniformBuffer.init(device, 1);
+        var uniforms = try UniformBuffer.init(
+            device,
+            1,
+            .{
+                // Indicate that the CPU writes to this resource but never reads it.
+                .cpu_cache_mode = .write_combined,
+                .storage_mode = storage_mode,
+            },
+        );
        errdefer uniforms.deinit();

        // Create the buffers for our vertex data. The preallocation size
        // is likely too small but our first frame update will resize it.
-        var cells = try CellTextBuffer.init(device, 10 * 10);
+        var cells = try CellTextBuffer.init(
+            device,
+            10 * 10,
+            .{
+                // Indicate that the CPU writes to this resource but never reads it.
+                .cpu_cache_mode = .write_combined,
+                .storage_mode = storage_mode,
+            },
+        );
        errdefer cells.deinit();
-        var cells_bg = try CellBgBuffer.init(device, 10 * 10);
+        var cells_bg = try CellBgBuffer.init(
+            device,
+            10 * 10,
+            .{
+                // Indicate that the CPU writes to this resource but never reads it.
+                .cpu_cache_mode = .write_combined,
+                .storage_mode = storage_mode,
+            },
+        );
+
        errdefer cells_bg.deinit();

        // Initialize our textures for our font atlas.
@ -307,13 +356,13 @@ pub const FrameState = struct {
            .data = undefined,
            .size = 8,
            .format = .grayscale,
-        });
+        }, storage_mode);
        errdefer grayscale.release();
        const color = try initAtlasTexture(device, &.{
            .data = undefined,
            .size = 8,
            .format = .rgba,
-        });
+        }, storage_mode);
        errdefer color.release();

        return .{
@ -1215,7 +1264,11 @@ pub fn updateFrame(
                .replace_gray_alpha,
                .replace_rgb,
                .replace_rgba,
-                => try kv.value_ptr.image.upload(self.alloc, self.gpu_state.device),
+                => try kv.value_ptr.image.upload(
+                    self.alloc,
+                    self.gpu_state.device,
+                    self.gpu_state.default_storage_mode,
+                ),

                .unload_pending,
                .unload_replace,
@ -1283,7 +1336,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
        self.font_grid.lock.lockShared();
        defer self.font_grid.lock.unlockShared();
        frame.grayscale_modified = self.font_grid.atlas_grayscale.modified.load(.monotonic);
-        try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_grayscale, &frame.grayscale);
+        try syncAtlasTexture(
+            self.gpu_state.device,
+            &self.font_grid.atlas_grayscale,
+            &frame.grayscale,
+            self.gpu_state.default_storage_mode,
+        );
    }
    texture: {
        const modified = self.font_grid.atlas_color.modified.load(.monotonic);
@ -1291,7 +1349,12 @@ pub fn drawFrame(self: *Metal, surface: *apprt.Surface) !void {
        self.font_grid.lock.lockShared();
        defer self.font_grid.lock.unlockShared();
        frame.color_modified = self.font_grid.atlas_color.modified.load(.monotonic);
-        try syncAtlasTexture(self.gpu_state.device, &self.font_grid.atlas_color, &frame.color);
+        try syncAtlasTexture(
+            self.gpu_state.device,
+            &self.font_grid.atlas_color,
+            &frame.color,
+            self.gpu_state.default_storage_mode,
+        );
    }

    // Command buffer (MTLCommandBuffer)
@ -1618,7 +1681,11 @@ fn drawImagePlacement(
            @as(f32, @floatFromInt(p.width)),
            @as(f32, @floatFromInt(p.height)),
        },
-    }});
+    }}, .{
+        // Indicate that the CPU writes to this resource but never reads it.
+        .cpu_cache_mode = .write_combined,
+        .storage_mode = self.gpu_state.default_storage_mode,
+    });
    defer buf.deinit();

    // Set our buffer
@ -3217,14 +3284,20 @@ fn addPreeditCell(
 /// Sync the atlas data to the given texture. This copies the bytes
 /// associated with the atlas to the given texture. If the atlas no longer
 /// fits into the texture, the texture will be resized.
-fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *objc.Object) !void {
+fn syncAtlasTexture(
+    device: objc.Object,
+    atlas: *const font.Atlas,
+    texture: *objc.Object,
+    /// Storage mode for the MTLTexture object
+    storage_mode: mtl.MTLResourceOptions.StorageMode,
+) !void {
    const width = texture.getProperty(c_ulong, "width");
    if (atlas.size > width) {
        // Free our old texture
        texture.*.release();

        // Reallocate
-        texture.* = try initAtlasTexture(device, atlas);
+        texture.* = try initAtlasTexture(device, atlas, storage_mode);
    }

    texture.msgSend(
@ -3247,7 +3320,12 @@ fn syncAtlasTexture(device: objc.Object, atlas: *const font.Atlas, texture: *obj
 }

 /// Initialize a MTLTexture object for the given atlas.
-fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object {
+fn initAtlasTexture(
+    device: objc.Object,
+    atlas: *const font.Atlas,
+    /// Storage mode for the MTLTexture object
+    storage_mode: mtl.MTLResourceOptions.StorageMode,
+) !objc.Object {
    // Determine our pixel format
    const pixel_format: mtl.MTLPixelFormat = switch (atlas.format) {
        .grayscale => .r8unorm,
@ -3268,15 +3346,14 @@ fn initAtlasTexture(device: objc.Object, atlas: *const font.Atlas) !objc.Object
    desc.setProperty("width", @as(c_ulong, @intCast(atlas.size)));
    desc.setProperty("height", @as(c_ulong, @intCast(atlas.size)));

-    // Xcode tells us that this texture should be shared mode on
-    // aarch64. This configuration is not supported on x86_64 so
-    // we only set it on aarch64.
-    if (comptime builtin.target.cpu.arch == .aarch64) {
-        desc.setProperty(
-            "storageMode",
-            @as(c_ulong, mtl.MTLResourceStorageModeShared),
-        );
-    }
+    desc.setProperty(
+        "resourceOptions",
+        mtl.MTLResourceOptions{
+            // Indicate that the CPU writes to this resource but never reads it.
+            .cpu_cache_mode = .write_combined,
+            .storage_mode = storage_mode,
+        },
+    );

    // Initialize
    const id = device.msgSend(
--- a/src/renderer/metal/api.zig
+++ b/src/renderer/metal/api.zig
@ -24,12 +24,36 @@ pub const MTLStoreAction = enum(c_ulong) {
    store = 1,
 };

-/// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
-pub const MTLStorageMode = enum(c_ulong) {
-    shared = 0,
-    managed = 1,
-    private = 2,
-    memoryless = 3,
+/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
+pub const MTLResourceOptions = packed struct(c_ulong) {
+    /// https://developer.apple.com/documentation/metal/mtlcpucachemode?language=objc
+    cpu_cache_mode: CPUCacheMode = .default,
+    /// https://developer.apple.com/documentation/metal/mtlstoragemode?language=objc
+    storage_mode: StorageMode,
+    /// https://developer.apple.com/documentation/metal/mtlhazardtrackingmode?language=objc
+    hazard_tracking_mode: HazardTrackingMode = .default,
+
+    _pad: @Type(.{
+        .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(c_ulong) - 10 },
+    }) = 0,
+
+    pub const CPUCacheMode = enum(u4) {
+        default = 0,
+        write_combined = 1,
+    };
+
+    pub const StorageMode = enum(u4) {
+        shared = 0,
+        managed = 1,
+        private = 2,
+        memoryless = 3,
+    };
+
+    pub const HazardTrackingMode = enum(u2) {
+        default = 0,
+        untracked = 1,
+        tracked = 2,
+    };
 };

 /// https://developer.apple.com/documentation/metal/mtlprimitivetype?language=objc
@ -139,10 +163,6 @@ pub const MTLTextureUsage = enum(c_ulong) {
    pixel_format_view = 8,
 };

-/// https://developer.apple.com/documentation/metal/mtlresourceoptions?language=objc
-/// (incomplete, we only use this mode so we just hardcode it)
-pub const MTLResourceStorageModeShared: c_ulong = @intFromEnum(MTLStorageMode.shared) << 4;
-
 pub const MTLClearColor = extern struct {
    red: f64,
    green: f64,
--- a/src/renderer/metal/buffer.zig
+++ b/src/renderer/metal/buffer.zig
@ -2,6 +2,7 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
 const objc = @import("objc");
+const macos = @import("macos");

 const mtl = @import("api.zig");

@ -14,35 +15,46 @@ pub fn Buffer(comptime T: type) type {
    return struct {
        const Self = @This();

+        /// The resource options for this buffer.
+        options: mtl.MTLResourceOptions,
+
        buffer: objc.Object, // MTLBuffer

        /// Initialize a buffer with the given length pre-allocated.
-        pub fn init(device: objc.Object, len: usize) !Self {
+        pub fn init(
+            device: objc.Object,
+            len: usize,
+            options: mtl.MTLResourceOptions,
+        ) !Self {
            const buffer = device.msgSend(
                objc.Object,
                objc.sel("newBufferWithLength:options:"),
                .{
                    @as(c_ulong, @intCast(len * @sizeOf(T))),
-                    mtl.MTLResourceStorageModeShared,
+                    options,
                },
            );

-            return .{ .buffer = buffer };
+            return .{ .buffer = buffer, .options = options };
        }

        /// Init the buffer filled with the given data.
-        pub fn initFill(device: objc.Object, data: []const T) !Self {
+        pub fn initFill(
+            device: objc.Object,
+            data: []const T,
+            options: mtl.MTLResourceOptions,
+        ) !Self {
            const buffer = device.msgSend(
                objc.Object,
                objc.sel("newBufferWithBytes:length:options:"),
                .{
                    @as(*const anyopaque, @ptrCast(data.ptr)),
                    @as(c_ulong, @intCast(data.len * @sizeOf(T))),
-                    mtl.MTLResourceStorageModeShared,
+                    options,
                },
            );

-            return .{ .buffer = buffer };
+            return .{ .buffer = buffer, .options = options };
        }

        pub fn deinit(self: *Self) void {
@ -85,7 +97,7 @@ pub fn Buffer(comptime T: type) type {
                    objc.sel("newBufferWithLength:options:"),
                    .{
                        @as(c_ulong, @intCast(size * @sizeOf(T))),
-                        mtl.MTLResourceStorageModeShared,
+                        self.options,
                    },
                );
            }
@ -106,6 +118,18 @@ pub fn Buffer(comptime T: type) type {
            };

            @memcpy(dst, src);
+
+            // If we're using the managed resource storage mode, then
+            // we need to signal Metal to synchronize the buffer data.
+            //
+            // Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
+            if (self.options.storage_mode == .managed) {
+                self.buffer.msgSend(
+                    void,
+                    "didModifyRange:",
+                    .{macos.foundation.Range.init(0, req_bytes)},
+                );
+            }
        }

        /// Like Buffer.sync but takes data from an array of ArrayLists,
@ -130,7 +154,7 @@ pub fn Buffer(comptime T: type) type {
                    objc.sel("newBufferWithLength:options:"),
                    .{
                        @as(c_ulong, @intCast(size * @sizeOf(T))),
-                        mtl.MTLResourceStorageModeShared,
+                        self.options,
                    },
                );
            }
@ -153,6 +177,18 @@ pub fn Buffer(comptime T: type) type {
                i += list.items.len * @sizeOf(T);
            }

+            // If we're using the managed resource storage mode, then
+            // we need to signal Metal to synchronize the buffer data.
+            //
+            // Ref: https://developer.apple.com/documentation/metal/synchronizing-a-managed-resource-in-macos?language=objc
+            if (self.options.storage_mode == .managed) {
+                self.buffer.msgSend(
+                    void,
+                    "didModifyRange:",
+                    .{macos.foundation.Range.init(0, req_bytes)},
+                );
+            }
+
            return total_len;
        }
    };
--- a/src/renderer/metal/image.zig
+++ b/src/renderer/metal/image.zig
@ -358,6 +358,8 @@ pub const Image = union(enum) {
        self: *Image,
        alloc: Allocator,
        device: objc.Object,
+        /// Storage mode for the MTLTexture object
+        storage_mode: mtl.MTLResourceOptions.StorageMode,
    ) !void {
        // Convert our data if we have to
        try self.convert(alloc);
@ -366,7 +368,7 @@ pub const Image = union(enum) {
        const p = self.pending().?;

        // Create our texture
-        const texture = try initTexture(p, device);
+        const texture = try initTexture(p, device, storage_mode);
        errdefer texture.msgSend(void, objc.sel("release"), .{});

        // Upload our data
@ -424,7 +426,12 @@ pub const Image = union(enum) {
        };
    }

-    fn initTexture(p: Pending, device: objc.Object) !objc.Object {
+    fn initTexture(
+        p: Pending,
+        device: objc.Object,
+        /// Storage mode for the MTLTexture object
+        storage_mode: mtl.MTLResourceOptions.StorageMode,
+    ) !objc.Object {
        // Create our descriptor
        const desc = init: {
            const Class = objc.getClass("MTLTextureDescriptor").?;
@ -438,6 +445,15 @@ pub const Image = union(enum) {
        desc.setProperty("width", @as(c_ulong, @intCast(p.width)));
        desc.setProperty("height", @as(c_ulong, @intCast(p.height)));

+        desc.setProperty(
+            "resourceOptions",
+            mtl.MTLResourceOptions{
+                // Indicate that the CPU writes to this resource but never reads it.
+                .cpu_cache_mode = .write_combined,
+                .storage_mode = storage_mode,
+            },
+        );
+
        // Initialize
        const id = device.msgSend(
            ?*anyopaque,