diff --git a/src/termio/Exec.zig b/src/termio/Exec.zig
index 51992a343..14bedb219 100644
--- a/src/termio/Exec.zig
+++ b/src/termio/Exec.zig
@@ -1102,10 +1102,10 @@ const Subprocess = struct {
     pub fn stop(self: *Subprocess) void {
         // Kill our command
         if (self.command) |*cmd| {
+            // Note: this will also wait for the command to exit, so
+            // DO NOT call cmd.wait
             killCommand(cmd) catch |err|
                 log.err("error sending SIGHUP to command, may hang: {}", .{err});
-            _ = cmd.wait(false) catch |err|
-                log.err("error waiting for command to exit: {}", .{err});
             self.command = null;
         }
 
@@ -1141,7 +1141,8 @@ const Subprocess = struct {
     }
 
     /// Kill the underlying subprocess. This sends a SIGHUP to the child
-    /// process. This doesn't wait for the child process to be exited.
+    /// process. This also waits for the command to exit and will return the
+    /// exit code.
     fn killCommand(command: *Command) !void {
         if (command.pid) |pid| {
             switch (builtin.os.tag) {
@@ -1149,38 +1150,77 @@ const Subprocess = struct {
                     if (windows.kernel32.TerminateProcess(pid, 0) == 0) {
                         return windows.unexpectedError(windows.kernel32.GetLastError());
                     }
+
+                    _ = try command.wait(false);
                 },
 
-                else => {
-                    const pgid_: ?c.pid_t = pgid: {
-                        const pgid = c.getpgid(pid);
+                else => if (getpgid(pid)) |pgid| {
+                    log.warn(
+                        "state killpg pid={} pgid={} mypid={} mypgid={}",
+                        .{ pid, pgid, c.getpid(), c.getpgid(0) },
+                    );
 
-                        // Don't know why it would be zero but its not a valid pid
-                        if (pgid == 0) break :pgid null;
-
-                        // If the pid doesn't exist then... okay.
-                        if (pgid == c.ESRCH) break :pgid null;
-
-                        // If we have an error...
-                        if (pgid < 0) {
-                            log.warn("error getting pgid for kill", .{});
-                            break :pgid null;
-                        }
-
-                        break :pgid pgid;
-                    };
-
-                    if (pgid_) |pgid| {
+                    // It is possible to send a killpg between the time that
+                    // our child process calls setsid but before or simultaneous
+                    // to calling execve. In this case, the direct child dies
+                    // but grandchildren survive. To work around this, we loop
+                    // and repeatedly kill the process group until all
+                    // descendents are well and truly dead. We will not rest
+                    // until the entire family tree is obliterated.
+                    while (true) {
                         if (c.killpg(pgid, c.SIGHUP) < 0) {
                             log.warn("error killing process group pgid={}", .{pgid});
                             return error.KillFailed;
                         }
+
+                        // See Command.zig wait for why we specify WNOHANG.
+                        // The gist is that it lets us detect when children
+                        // are still alive without blocking so that we can
+                        // kill them again.
+                        const res = std.os.waitpid(pid, std.c.W.NOHANG);
+                        if (res.pid != 0) break;
                     }
                 },
             }
         }
     }
 
+    fn getpgid(pid: c.pid_t) ?c.pid_t {
+        // Get our process group ID. Before the child pid calls setsid
+        // the pgid will be ours because we forked it. Its possible that
+        // we may be calling this before setsid if we are killing a surface
+        // VERY quickly after starting it.
+        const my_pgid = c.getpgid(0);
+
+        // We loop while pgid == my_pgid. The expectation if we have a valid
+        // pid is that setsid will eventually be called because it is the
+        // FIRST thing the child process does and as far as I can tell,
+        // setsid cannot fail. I'm sure that's not true, but I'd rather
+        // have a bug reported than defensively program against it now.
+        while (true) {
+            const pgid = c.getpgid(pid);
+            if (pgid == my_pgid) {
+                log.warn("pgid is our own, retrying", .{});
+                std.time.sleep(10 * std.time.ns_per_ms);
+                continue;
+            }
+
+            // Don't know why it would be zero but its not a valid pid
+            if (pgid == 0) return null;
+
+            // If the pid doesn't exist then... we're done!
+            if (pgid == c.ESRCH) return null;
+
+            // If we have an error we're done.
+            if (pgid < 0) {
+                log.warn("error getting pgid for kill", .{});
+                return null;
+            }
+
+            return pgid;
+        }
+    }
+
     /// Kill the underlying process started via Flatpak host command.
     /// This sends a signal via the Flatpak API.
     fn killCommandFlatpak(command: *FlatpakHostCommand) !void {