Merge pull request #1168 from mitchellh/kill

termio: ensure we kill all the children without killing ourselves
This commit is contained in:
Mitchell Hashimoto
2023-12-27 22:01:40 -08:00
committed by GitHub

View File

@ -1102,10 +1102,10 @@ const Subprocess = struct {
pub fn stop(self: *Subprocess) void { pub fn stop(self: *Subprocess) void {
// Kill our command // Kill our command
if (self.command) |*cmd| { if (self.command) |*cmd| {
// Note: this will also wait for the command to exit, so
// DO NOT call cmd.wait
killCommand(cmd) catch |err| killCommand(cmd) catch |err|
log.err("error sending SIGHUP to command, may hang: {}", .{err}); log.err("error sending SIGHUP to command, may hang: {}", .{err});
_ = cmd.wait(false) catch |err|
log.err("error waiting for command to exit: {}", .{err});
self.command = null; self.command = null;
} }
@ -1141,7 +1141,8 @@ const Subprocess = struct {
} }
/// Kill the underlying subprocess. This sends a SIGHUP to the child /// Kill the underlying subprocess. This sends a SIGHUP to the child
/// process. This doesn't wait for the child process to be exited. /// process. This also waits for the command to exit and will return the
/// exit code.
fn killCommand(command: *Command) !void { fn killCommand(command: *Command) !void {
if (command.pid) |pid| { if (command.pid) |pid| {
switch (builtin.os.tag) { switch (builtin.os.tag) {
@ -1149,38 +1150,77 @@ const Subprocess = struct {
if (windows.kernel32.TerminateProcess(pid, 0) == 0) { if (windows.kernel32.TerminateProcess(pid, 0) == 0) {
return windows.unexpectedError(windows.kernel32.GetLastError()); return windows.unexpectedError(windows.kernel32.GetLastError());
} }
_ = try command.wait(false);
}, },
else => { else => if (getpgid(pid)) |pgid| {
const pgid_: ?c.pid_t = pgid: { log.warn(
const pgid = c.getpgid(pid); "state killpg pid={} pgid={} mypid={} mypgid={}",
.{ pid, pgid, c.getpid(), c.getpgid(0) },
);
// Don't know why it would be zero but its not a valid pid // It is possible to send a killpg between the time that
if (pgid == 0) break :pgid null; // our child process calls setsid but before or simultaneous
// to calling execve. In this case, the direct child dies
// If the pid doesn't exist then... okay. // but grandchildren survive. To work around this, we loop
if (pgid == c.ESRCH) break :pgid null; // and repeatedly kill the process group until all
// descendents are well and truly dead. We will not rest
// If we have an error... // until the entire family tree is obliterated.
if (pgid < 0) { while (true) {
log.warn("error getting pgid for kill", .{});
break :pgid null;
}
break :pgid pgid;
};
if (pgid_) |pgid| {
if (c.killpg(pgid, c.SIGHUP) < 0) { if (c.killpg(pgid, c.SIGHUP) < 0) {
log.warn("error killing process group pgid={}", .{pgid}); log.warn("error killing process group pgid={}", .{pgid});
return error.KillFailed; return error.KillFailed;
} }
// See Command.zig wait for why we specify WNOHANG.
// The gist is that it lets us detect when children
// are still alive without blocking so that we can
// kill them again.
const res = std.os.waitpid(pid, std.c.W.NOHANG);
if (res.pid != 0) break;
} }
}, },
} }
} }
} }
fn getpgid(pid: c.pid_t) ?c.pid_t {
// Get our process group ID. Before the child pid calls setsid
// the pgid will be ours because we forked it. Its possible that
// we may be calling this before setsid if we are killing a surface
// VERY quickly after starting it.
const my_pgid = c.getpgid(0);
// We loop while pgid == my_pgid. The expectation if we have a valid
// pid is that setsid will eventually be called because it is the
// FIRST thing the child process does and as far as I can tell,
// setsid cannot fail. I'm sure that's not true, but I'd rather
// have a bug reported than defensively program against it now.
while (true) {
const pgid = c.getpgid(pid);
if (pgid == my_pgid) {
log.warn("pgid is our own, retrying", .{});
std.time.sleep(10 * std.time.ns_per_ms);
continue;
}
// Don't know why it would be zero but its not a valid pid
if (pgid == 0) return null;
// If the pid doesn't exist then... we're done!
if (pgid == c.ESRCH) return null;
// If we have an error we're done.
if (pgid < 0) {
log.warn("error getting pgid for kill", .{});
return null;
}
return pgid;
}
}
/// Kill the underlying process started via Flatpak host command. /// Kill the underlying process started via Flatpak host command.
/// This sends a signal via the Flatpak API. /// This sends a signal via the Flatpak API.
fn killCommandFlatpak(command: *FlatpakHostCommand) !void { fn killCommandFlatpak(command: *FlatpakHostCommand) !void {