From e6f97c28f8b3ff46b3685108539c47353c328d88 Mon Sep 17 00:00:00 2001 From: "Jeffrey C. Ollie" Date: Fri, 7 Jun 2024 23:48:03 -0600 Subject: [PATCH 1/4] Use clone3 / CLONE_INTO_CGROUP on Linux Use clone3 / CLONE_INTO_CGROUP to have the Linux kernel create the process in the correct cgroup rather than move the process into the cgroup after it is created. --- src/Command.zig | 12 +++++++-- src/os/cgroup.zig | 63 +++++++++++++++++++++++++++++++++++++++++++++ src/termio/Exec.zig | 8 +----- 3 files changed, 74 insertions(+), 9 deletions(-) diff --git a/src/Command.zig b/src/Command.zig index 505fe3f85..7c7eb85f1 100644 --- a/src/Command.zig +++ b/src/Command.zig @@ -19,6 +19,7 @@ const Command = @This(); const std = @import("std"); const builtin = @import("builtin"); const internal_os = @import("os/main.zig"); +const termio = @import("termio.zig"); const windows = internal_os.windows; const TempDir = internal_os.TempDir; const mem = std.mem; @@ -32,6 +33,8 @@ const EnvMap = std.process.EnvMap; const PreExecFn = fn (*Command) void; +const log = std.log.scoped(.command); + /// Path to the command to run. This must be an absolute path. This /// library does not do PATH lookup. path: []const u8, @@ -61,6 +64,8 @@ stderr: ?File = null, /// exec process takes over, such as signal handlers, setsid, setuid, etc. pre_exec: ?*const PreExecFn = null, +linux_cgroup: termio.Options.LinuxCgroup = termio.Options.linux_cgroup_default, + /// If set, then the process will be created attached to this pseudo console. /// `stdin`, `stdout`, and `stderr` will be ignored if set. pseudo_console: if (builtin.os.tag == .windows) ?windows.exp.HPCON else void = @@ -133,8 +138,11 @@ fn startPosix(self: *Command, arena: Allocator) !void { else @compileError("missing env vars"); - // Fork - const pid = try posix.fork(); + const pid: linux.pid_t = switch (builtin.os.tag) { + .linux => if (self.linux_cgroup) |cgroup| try internal_os.cgroup.cloneInto(cgroup) else try posix.fork(), + else => try posix.fork(), + }; + if (pid != 0) { // Parent, return immediately. self.pid = @intCast(pid); diff --git a/src/os/cgroup.zig b/src/os/cgroup.zig index aee772954..5afc8cdca 100644 --- a/src/os/cgroup.zig +++ b/src/os/cgroup.zig @@ -1,7 +1,11 @@ const std = @import("std"); const assert = std.debug.assert; +const linux = std.os.linux; +const posix = std.posix; const Allocator = std.mem.Allocator; +const log = std.log.scoped(.@"linux-cgroup"); + /// Returns the path to the cgroup for the given pid. pub fn current(alloc: Allocator, pid: std.os.linux.pid_t) !?[]const u8 { var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; @@ -64,6 +68,65 @@ pub fn moveInto( try file.writer().print("{}", .{pid}); } +/// Use clone3 to have the kernel create a new process with the correct cgroup +/// rather than moving the process to the correct cgroup later. +pub fn cloneInto(cgroup: []const u8) !posix.pid_t { + var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fmt.bufPrintZ(&buf, "/sys/fs/cgroup{s}", .{cgroup}); + + // Get a file descriptor that refers to the cgroup directory in the cgroup + // sysfs to pass to the kernel in clone3. + const fd: linux.fd_t = fd: { + const rc = linux.open(path, linux.O{ .PATH = true, .DIRECTORY = true }, 0); + switch (posix.errno(rc)) { + .SUCCESS => { + break :fd @as(linux.fd_t, @intCast(rc)); + }, + else => |errno| { + log.err("unable to open cgroup dir {s}: {}", .{ path, errno }); + break :fd -1; + }, + } + }; + + const args: extern struct { + flags: u64, + pidfd: u64, + child_tid: u64, + parent_tid: u64, + exit_signal: u64, + stack: u64, + stack_size: u64, + tls: u64, + set_tid: u64, + set_tid_size: u64, + cgroup: u64, + } = .{ + .flags = if (fd >= 0) linux.CLONE.INTO_CGROUP else 0, + .pidfd = 0, + .child_tid = 0, + .parent_tid = 0, + .exit_signal = linux.SIG.CHLD, + .stack = 0, + .stack_size = 0, + .tls = 0, + .set_tid = 0, + .set_tid_size = 0, + .cgroup = if (fd >= 0) @intCast(fd) else 0, + }; + + const rc = linux.syscall2(linux.SYS.clone3, @intFromPtr(&args), @sizeOf(@TypeOf(args))); + switch (posix.errno(rc)) { + .SUCCESS => { + return @as(posix.pid_t, @intCast(rc)); + }, + else => |errno| { + log.err("unable to clone: {}", .{errno}); + return error.CloneError; + }, + } +} + /// Returns all available cgroup controllers for the given cgroup. /// The cgroup should have a '/'-prefix. /// diff --git a/src/termio/Exec.zig b/src/termio/Exec.zig index d6342deb3..619babb47 100644 --- a/src/termio/Exec.zig +++ b/src/termio/Exec.zig @@ -1315,6 +1315,7 @@ const Subprocess = struct { } }).callback, .data = self, + .linux_cgroup = self.linux_cgroup, }; try cmd.start(alloc); errdefer killCommand(&cmd) catch |err| { @@ -1345,13 +1346,6 @@ const Subprocess = struct { fn childPreExec(self: *Subprocess) !void { // Setup our pty try self.pty.?.childPreExec(); - - // If we have a cgroup set, then we want to move into that cgroup. - if (comptime builtin.os.tag == .linux) { - if (self.linux_cgroup) |cgroup| { - try internal_os.cgroup.moveInto(cgroup, 0); - } - } } /// Called to notify that we exited externally so we can unset our From af9efd4d93931143adec60c06e9cb9754c155424 Mon Sep 17 00:00:00 2001 From: "Jeffrey C. Ollie" Date: Sat, 8 Jun 2024 07:43:44 -0600 Subject: [PATCH 2/4] use consistent type for pid --- src/Command.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Command.zig b/src/Command.zig index 7c7eb85f1..6b6752e05 100644 --- a/src/Command.zig +++ b/src/Command.zig @@ -138,7 +138,7 @@ fn startPosix(self: *Command, arena: Allocator) !void { else @compileError("missing env vars"); - const pid: linux.pid_t = switch (builtin.os.tag) { + const pid: posix.pid_t = switch (builtin.os.tag) { .linux => if (self.linux_cgroup) |cgroup| try internal_os.cgroup.cloneInto(cgroup) else try posix.fork(), else => try posix.fork(), }; From 8f9cdff1f5908360fc4d86a98a3bff83fa675bbf Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 8 Jun 2024 19:07:10 -0700 Subject: [PATCH 3/4] small stylistic tweaks --- src/Command.zig | 15 ++++++++++++--- src/os/cgroup.zig | 23 ++++++++++------------- src/termio/Exec.zig | 6 +++--- src/termio/Options.zig | 6 ++---- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/Command.zig b/src/Command.zig index 6b6752e05..c8cf00758 100644 --- a/src/Command.zig +++ b/src/Command.zig @@ -19,7 +19,6 @@ const Command = @This(); const std = @import("std"); const builtin = @import("builtin"); const internal_os = @import("os/main.zig"); -const termio = @import("termio.zig"); const windows = internal_os.windows; const TempDir = internal_os.TempDir; const mem = std.mem; @@ -64,7 +63,7 @@ stderr: ?File = null, /// exec process takes over, such as signal handlers, setsid, setuid, etc. pre_exec: ?*const PreExecFn = null, -linux_cgroup: termio.Options.LinuxCgroup = termio.Options.linux_cgroup_default, +linux_cgroup: LinuxCgroup = linux_cgroup_default, /// If set, then the process will be created attached to this pseudo console. /// `stdin`, `stdout`, and `stderr` will be ignored if set. @@ -78,6 +77,11 @@ data: ?*anyopaque = null, /// Process ID is set after start is called. pid: ?posix.pid_t = null, +/// LinuxCGroup type depends on our target OS +pub const LinuxCgroup = if (builtin.os.tag == .linux) ?[]const u8 else void; +pub const linux_cgroup_default = if (LinuxCgroup == void) +{} else null; + /// The various methods a process may exit. pub const Exit = if (builtin.os.tag == .windows) union(enum) { Exited: u32, @@ -138,8 +142,13 @@ fn startPosix(self: *Command, arena: Allocator) !void { else @compileError("missing env vars"); + // Fork. If we have a cgroup specified on Linxu then we use clone const pid: posix.pid_t = switch (builtin.os.tag) { - .linux => if (self.linux_cgroup) |cgroup| try internal_os.cgroup.cloneInto(cgroup) else try posix.fork(), + .linux => if (self.linux_cgroup) |cgroup| + try internal_os.cgroup.cloneInto(cgroup) + else + try posix.fork(), + else => try posix.fork(), }; diff --git a/src/os/cgroup.zig b/src/os/cgroup.zig index 5afc8cdca..417f5641e 100644 --- a/src/os/cgroup.zig +++ b/src/os/cgroup.zig @@ -79,15 +79,14 @@ pub fn cloneInto(cgroup: []const u8) !posix.pid_t { const fd: linux.fd_t = fd: { const rc = linux.open(path, linux.O{ .PATH = true, .DIRECTORY = true }, 0); switch (posix.errno(rc)) { - .SUCCESS => { - break :fd @as(linux.fd_t, @intCast(rc)); - }, + .SUCCESS => break :fd @as(linux.fd_t, @intCast(rc)), else => |errno| { log.err("unable to open cgroup dir {s}: {}", .{ path, errno }); - break :fd -1; + return error.CloneError; }, } }; + assert(fd >= 0); const args: extern struct { flags: u64, @@ -102,7 +101,7 @@ pub fn cloneInto(cgroup: []const u8) !posix.pid_t { set_tid_size: u64, cgroup: u64, } = .{ - .flags = if (fd >= 0) linux.CLONE.INTO_CGROUP else 0, + .flags = linux.CLONE.INTO_CGROUP, .pidfd = 0, .child_tid = 0, .parent_tid = 0, @@ -112,19 +111,17 @@ pub fn cloneInto(cgroup: []const u8) !posix.pid_t { .tls = 0, .set_tid = 0, .set_tid_size = 0, - .cgroup = if (fd >= 0) @intCast(fd) else 0, + .cgroup = @intCast(fd), }; const rc = linux.syscall2(linux.SYS.clone3, @intFromPtr(&args), @sizeOf(@TypeOf(args))); - switch (posix.errno(rc)) { - .SUCCESS => { - return @as(posix.pid_t, @intCast(rc)); - }, - else => |errno| { + return switch (posix.errno(rc)) { + .SUCCESS => @as(posix.pid_t, @intCast(rc)), + else => |errno| err: { log.err("unable to clone: {}", .{errno}); - return error.CloneError; + break :err error.CloneError; }, - } + }; } /// Returns all available cgroup controllers for the given cgroup. diff --git a/src/termio/Exec.zig b/src/termio/Exec.zig index 619babb47..58f1a0b50 100644 --- a/src/termio/Exec.zig +++ b/src/termio/Exec.zig @@ -897,7 +897,7 @@ const Subprocess = struct { pty: ?Pty = null, command: ?Command = null, flatpak_command: ?FlatpakHostCommand = null, - linux_cgroup: termio.Options.LinuxCgroup = termio.Options.linux_cgroup_default, + linux_cgroup: Command.LinuxCgroup = Command.linux_cgroup_default, /// Initialize the subprocess. This will NOT start it, this only sets /// up the internal state necessary to start it later. @@ -1196,8 +1196,8 @@ const Subprocess = struct { // If we have a cgroup, then we copy that into our arena so the // memory remains valid when we start. - const linux_cgroup: termio.Options.LinuxCgroup = cgroup: { - const default = termio.Options.linux_cgroup_default; + const linux_cgroup: Command.LinuxCgroup = cgroup: { + const default = Command.linux_cgroup_default; if (comptime builtin.os.tag != .linux) break :cgroup default; const path = opts.linux_cgroup orelse break :cgroup default; break :cgroup try alloc.dupe(u8, path); diff --git a/src/termio/Options.zig b/src/termio/Options.zig index 079828f38..b4edb473a 100644 --- a/src/termio/Options.zig +++ b/src/termio/Options.zig @@ -4,6 +4,7 @@ const builtin = @import("builtin"); const xev = @import("xev"); const apprt = @import("../apprt.zig"); const renderer = @import("../renderer.zig"); +const Command = @import("../Command.zig"); const Config = @import("../config.zig").Config; const termio = @import("../termio.zig"); @@ -45,7 +46,4 @@ surface_mailbox: apprt.surface.Mailbox, /// The cgroup to apply to the started termio process, if able by /// the termio implementation. This only applies to Linux. -linux_cgroup: LinuxCgroup = linux_cgroup_default, - -pub const LinuxCgroup = if (builtin.os.tag == .linux) ?[]const u8 else void; -pub const linux_cgroup_default = if (LinuxCgroup == void) {} else null; +linux_cgroup: Command.LinuxCgroup = Command.linux_cgroup_default, From 6ae6e3ba11db2cf2f69e6cd54aae91421cc020ed Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 8 Jun 2024 19:08:05 -0700 Subject: [PATCH 4/4] remove unused var --- src/Command.zig | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Command.zig b/src/Command.zig index c8cf00758..60a898309 100644 --- a/src/Command.zig +++ b/src/Command.zig @@ -32,8 +32,6 @@ const EnvMap = std.process.EnvMap; const PreExecFn = fn (*Command) void; -const log = std.log.scoped(.command); - /// Path to the command to run. This must be an absolute path. This /// library does not do PATH lookup. path: []const u8,