Merge pull request #1848 from jcollie/clone_into_cgroup

Use clone3 / CLONE_INTO_CGROUP on Linux
This commit is contained in:
Mitchell Hashimoto
2024-06-08 19:09:19 -07:00
committed by GitHub
4 changed files with 83 additions and 16 deletions

View File

@ -61,6 +61,8 @@ stderr: ?File = null,
/// exec process takes over, such as signal handlers, setsid, setuid, etc.
pre_exec: ?*const PreExecFn = null,
linux_cgroup: LinuxCgroup = linux_cgroup_default,
/// If set, then the process will be created attached to this pseudo console.
/// `stdin`, `stdout`, and `stderr` will be ignored if set.
pseudo_console: if (builtin.os.tag == .windows) ?windows.exp.HPCON else void =
@ -73,6 +75,11 @@ data: ?*anyopaque = null,
/// Process ID is set after start is called.
pid: ?posix.pid_t = null,
/// LinuxCGroup type depends on our target OS
pub const LinuxCgroup = if (builtin.os.tag == .linux) ?[]const u8 else void;
pub const linux_cgroup_default = if (LinuxCgroup == void)
{} else null;
/// The various methods a process may exit.
pub const Exit = if (builtin.os.tag == .windows) union(enum) {
Exited: u32,
@ -133,8 +140,16 @@ fn startPosix(self: *Command, arena: Allocator) !void {
else
@compileError("missing env vars");
// Fork
const pid = try posix.fork();
// Fork. If we have a cgroup specified on Linxu then we use clone
const pid: posix.pid_t = switch (builtin.os.tag) {
.linux => if (self.linux_cgroup) |cgroup|
try internal_os.cgroup.cloneInto(cgroup)
else
try posix.fork(),
else => try posix.fork(),
};
if (pid != 0) {
// Parent, return immediately.
self.pid = @intCast(pid);

View File

@ -1,7 +1,11 @@
const std = @import("std");
const assert = std.debug.assert;
const linux = std.os.linux;
const posix = std.posix;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.@"linux-cgroup");
/// Returns the path to the cgroup for the given pid.
pub fn current(alloc: Allocator, pid: std.os.linux.pid_t) !?[]const u8 {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
@ -64,6 +68,62 @@ pub fn moveInto(
try file.writer().print("{}", .{pid});
}
/// Use clone3 to have the kernel create a new process with the correct cgroup
/// rather than moving the process to the correct cgroup later.
pub fn cloneInto(cgroup: []const u8) !posix.pid_t {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const path = try std.fmt.bufPrintZ(&buf, "/sys/fs/cgroup{s}", .{cgroup});
// Get a file descriptor that refers to the cgroup directory in the cgroup
// sysfs to pass to the kernel in clone3.
const fd: linux.fd_t = fd: {
const rc = linux.open(path, linux.O{ .PATH = true, .DIRECTORY = true }, 0);
switch (posix.errno(rc)) {
.SUCCESS => break :fd @as(linux.fd_t, @intCast(rc)),
else => |errno| {
log.err("unable to open cgroup dir {s}: {}", .{ path, errno });
return error.CloneError;
},
}
};
assert(fd >= 0);
const args: extern struct {
flags: u64,
pidfd: u64,
child_tid: u64,
parent_tid: u64,
exit_signal: u64,
stack: u64,
stack_size: u64,
tls: u64,
set_tid: u64,
set_tid_size: u64,
cgroup: u64,
} = .{
.flags = linux.CLONE.INTO_CGROUP,
.pidfd = 0,
.child_tid = 0,
.parent_tid = 0,
.exit_signal = linux.SIG.CHLD,
.stack = 0,
.stack_size = 0,
.tls = 0,
.set_tid = 0,
.set_tid_size = 0,
.cgroup = @intCast(fd),
};
const rc = linux.syscall2(linux.SYS.clone3, @intFromPtr(&args), @sizeOf(@TypeOf(args)));
return switch (posix.errno(rc)) {
.SUCCESS => @as(posix.pid_t, @intCast(rc)),
else => |errno| err: {
log.err("unable to clone: {}", .{errno});
break :err error.CloneError;
},
};
}
/// Returns all available cgroup controllers for the given cgroup.
/// The cgroup should have a '/'-prefix.
///

View File

@ -897,7 +897,7 @@ const Subprocess = struct {
pty: ?Pty = null,
command: ?Command = null,
flatpak_command: ?FlatpakHostCommand = null,
linux_cgroup: termio.Options.LinuxCgroup = termio.Options.linux_cgroup_default,
linux_cgroup: Command.LinuxCgroup = Command.linux_cgroup_default,
/// Initialize the subprocess. This will NOT start it, this only sets
/// up the internal state necessary to start it later.
@ -1196,8 +1196,8 @@ const Subprocess = struct {
// If we have a cgroup, then we copy that into our arena so the
// memory remains valid when we start.
const linux_cgroup: termio.Options.LinuxCgroup = cgroup: {
const default = termio.Options.linux_cgroup_default;
const linux_cgroup: Command.LinuxCgroup = cgroup: {
const default = Command.linux_cgroup_default;
if (comptime builtin.os.tag != .linux) break :cgroup default;
const path = opts.linux_cgroup orelse break :cgroup default;
break :cgroup try alloc.dupe(u8, path);
@ -1315,6 +1315,7 @@ const Subprocess = struct {
}
}).callback,
.data = self,
.linux_cgroup = self.linux_cgroup,
};
try cmd.start(alloc);
errdefer killCommand(&cmd) catch |err| {
@ -1345,13 +1346,6 @@ const Subprocess = struct {
fn childPreExec(self: *Subprocess) !void {
// Setup our pty
try self.pty.?.childPreExec();
// If we have a cgroup set, then we want to move into that cgroup.
if (comptime builtin.os.tag == .linux) {
if (self.linux_cgroup) |cgroup| {
try internal_os.cgroup.moveInto(cgroup, 0);
}
}
}
/// Called to notify that we exited externally so we can unset our

View File

@ -4,6 +4,7 @@ const builtin = @import("builtin");
const xev = @import("xev");
const apprt = @import("../apprt.zig");
const renderer = @import("../renderer.zig");
const Command = @import("../Command.zig");
const Config = @import("../config.zig").Config;
const termio = @import("../termio.zig");
@ -45,7 +46,4 @@ surface_mailbox: apprt.surface.Mailbox,
/// The cgroup to apply to the started termio process, if able by
/// the termio implementation. This only applies to Linux.
linux_cgroup: LinuxCgroup = linux_cgroup_default,
pub const LinuxCgroup = if (builtin.os.tag == .linux) ?[]const u8 else void;
pub const linux_cgroup_default = if (LinuxCgroup == void) {} else null;
linux_cgroup: Command.LinuxCgroup = Command.linux_cgroup_default,