Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/remote/proxyCommandRetry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import * as fs from "node:fs/promises";
import * as path from "node:path";

import { renameWithRetry, tempFilePath } from "../util";

import type { Logger } from "../logging/logger";

/**
* POSIX shell script that wraps `coder ssh` with retry logic for the SSH
* ProxyCommand. After sleep/wake, DNS failures cause `coder ssh` to exit
* instantly, producing unparsable output that the Remote SSH extension
* treats as a permanent error ("Reload Window"). Retrying with a delay
* allows DNS to recover, including across system suspend/resume.
*
* Only retries when the command exits quickly (< CODER_RETRY_MIN_RUNTIME).
*/
const RETRY_SCRIPT = `#!/bin/sh
# Coder SSH ProxyCommand retry wrapper.
# Written by the Coder VS Code extension; do not edit.
max_retries=\${CODER_RETRY_MAX_RETRIES:-10}
retry_sleep=\${CODER_RETRY_SLEEP:-5}
min_runtime=\${CODER_RETRY_MIN_RUNTIME:-10}
n=0
while [ $n -lt $max_retries ]; do
start=$(date +%s)
"$@"
rc=$?
elapsed=$(($(date +%s) - start))
[ $elapsed -ge $min_runtime ] && exit $rc
[ $rc -eq 0 ] && exit 0
n=$((n + 1))
echo "coder-retry: attempt $n/$max_retries failed (rc=$rc, elapsed=\${elapsed}s)" >&2
[ $n -lt $max_retries ] && sleep $retry_sleep
done
exit "$rc"
`;

const SCRIPT_NAME = "coder-ssh-retry.sh";

/**
* Ensure the retry wrapper script exists on disk and return its path.
*/
export async function ensureRetryScript(
baseDir: string,
logger: Logger,
): Promise<string> {
await fs.mkdir(baseDir, { recursive: true });
const scriptPath = path.join(baseDir, SCRIPT_NAME);

// Atomic write: temp file + rename to avoid races between concurrent
// VS Code windows writing the same script simultaneously.
const tmpPath = tempFilePath(scriptPath, "tmp");
await fs.writeFile(tmpPath, RETRY_SCRIPT, { mode: 0o755 });
try {
await renameWithRetry(
(src, dest) => fs.rename(src, dest),
tmpPath,
scriptPath,
);
} catch (error) {
await fs.unlink(tmpPath).catch((unlinkErr: NodeJS.ErrnoException) => {
if (unlinkErr.code !== "ENOENT") {
logger.warn("Failed to clean up temp retry script", tmpPath, unlinkErr);
}
});
throw new Error(`Failed to write retry script to ${scriptPath}`, {
cause: error,
});
}
return scriptPath;
}
83 changes: 59 additions & 24 deletions src/remote/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ import {
import { vscodeProposed } from "../vscodeProposed";
import { WorkspaceMonitor } from "../workspace/workspaceMonitor";

import { ensureRetryScript } from "./proxyCommandRetry";
import {
SshConfig,
type SSHValues,
Expand Down Expand Up @@ -660,8 +661,8 @@ export class Remote {

/**
* Builds the ProxyCommand for SSH connections to Coder workspaces.
* Uses `coder ssh` for modern deployments with wildcard support,
* or falls back to `coder vscodessh` for older deployments.
* On macOS/Linux, wraps with a retry script to survive transient DNS
* failures after sleep/wake (see proxyCommandRetry.ts).
*/
private async buildProxyCommand(
binaryPath: string,
Expand All @@ -670,6 +671,41 @@ export class Remote {
logDir: string,
useWildcardSSH: boolean,
cliAuth: CliAuth,
): Promise<string> {
const coderCommand = await this.buildCoderCommand(
binaryPath,
label,
hostPrefix,
logDir,
useWildcardSSH,
cliAuth,
);

if (os.platform() === "win32") {
return coderCommand;
}
try {
const retryScript = await ensureRetryScript(
this.pathResolver.getGlobalConfigDir(""),
this.logger,
);
return `${escapeCommandArg(retryScript)} ${coderCommand}`;
} catch (error) {
this.logger.warn("Failed to write retry wrapper, skipping", error);
return coderCommand;
}
}

/**
* Builds the raw `coder ssh` or `coder vscodessh` command string.
*/
private async buildCoderCommand(
binaryPath: string,
label: string,
hostPrefix: string,
logDir: string,
useWildcardSSH: boolean,
cliAuth: CliAuth,
): Promise<string> {
const vscodeConfig = vscode.workspace.getConfiguration();

Expand All @@ -678,8 +714,7 @@ export class Remote {
const logArgs = await this.getLogArgs(logDir);

if (useWildcardSSH) {
// User SSH flags are included first; internally-managed flags
// are appended last so they take precedence.
// User SSH flags first; internal flags last so they take precedence.
const userSshFlags = getSshFlags(vscodeConfig);
// Make sure to update the `coder.sshFlags` description if we add more internal flags here!
const internalFlags = [
Expand All @@ -695,28 +730,28 @@ export class Remote {

const allFlags = [...userSshFlags, ...internalFlags];
return `${escapedBinaryPath} ${globalConfig.join(" ")} ssh ${allFlags.join(" ")}`;
} else {
const networkInfoDir = escapeCommandArg(
this.pathResolver.getNetworkInfoPath(),
);
const sessionTokenFile = escapeCommandArg(
this.pathResolver.getSessionTokenPath(label),
);
const urlFile = escapeCommandArg(this.pathResolver.getUrlPath(label));
}

const sshFlags = [
"--network-info-dir",
networkInfoDir,
...logArgs,
"--session-token-file",
sessionTokenFile,
"--url-file",
urlFile,
"%h",
];
const networkInfoDir = escapeCommandArg(
this.pathResolver.getNetworkInfoPath(),
);
const sessionTokenFile = escapeCommandArg(
this.pathResolver.getSessionTokenPath(label),
);
const urlFile = escapeCommandArg(this.pathResolver.getUrlPath(label));

const sshFlags = [
"--network-info-dir",
networkInfoDir,
...logArgs,
"--session-token-file",
sessionTokenFile,
"--url-file",
urlFile,
"%h",
];

return `${escapedBinaryPath} ${globalConfig.join(" ")} vscodessh ${sshFlags.join(" ")}`;
}
return `${escapedBinaryPath} ${globalConfig.join(" ")} vscodessh ${sshFlags.join(" ")}`;
}

/**
Expand Down
109 changes: 109 additions & 0 deletions test/unit/remote/proxyCommandRetry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import { execFile } from "node:child_process";
import * as fs from "node:fs/promises";
import * as os from "node:os";
import * as path from "node:path";
import { promisify } from "node:util";
import { afterEach, beforeEach, describe, expect, it } from "vitest";

import { ensureRetryScript } from "@/remote/proxyCommandRetry";

import { createMockLogger } from "../../mocks/testHelpers";
import { isWindows } from "../../utils/platform";

const execFileAsync = promisify(execFile);

/** Run the retry script with fast defaults (0s sleep). */
function run(
script: string,
args: string[],
env: Record<string, string> = {},
timeout?: number,
) {
return execFileAsync(script, args, {
timeout,
env: {
...process.env,
CODER_RETRY_SLEEP: "0",
CODER_RETRY_MAX_RETRIES: "10",
CODER_RETRY_MIN_RUNTIME: "10",
...env,
},
});
}

describe.skipIf(isWindows())("proxyCommandRetry", () => {
let tmpDir: string;
let script: string;

beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "coder-retry-test-"));
script = await ensureRetryScript(tmpDir, createMockLogger());
});

afterEach(async () => {
await fs.rm(tmpDir, { recursive: true, force: true });
});

it("passes through on success", async () => {
const { stdout } = await run(script, ["echo", "hello"]);
expect(stdout.trim()).toBe("hello");
});

it("retries on quick failure then succeeds", async () => {
// Fails until marker file exists, then succeeds.
const marker = path.join(tmpDir, "marker");
const helper = path.join(tmpDir, "h.sh");
await fs.writeFile(
helper,
`#!/bin/sh\n[ -f "${marker}" ] && echo ok && exit 0\ntouch "${marker}"\nexit 1\n`,
{ mode: 0o755 },
);

const { stdout } = await run(script, [helper]);
expect(stdout.trim()).toBe("ok");
});

it("gives up after max retries and logs each attempt", async () => {
try {
await run(script, ["sh", "-c", "exit 42"], {
CODER_RETRY_MAX_RETRIES: "3",
});
expect.fail("should have thrown");
} catch (err: unknown) {
const e = err as { code: number; stderr: string };
expect(e.code).toBe(42);
expect(e.stderr).toContain("attempt 1/3 failed");
expect(e.stderr).toContain("attempt 3/3 failed");
}
});

it("skips retry when command ran longer than min runtime", async () => {
const marker = path.join(tmpDir, "ran");
const helper = path.join(tmpDir, "slow.sh");
await fs.writeFile(
helper,
`#!/bin/sh\n[ -f "${marker}" ] && exit 99\ntouch "${marker}"\nsleep 2\nexit 1\n`,
{ mode: 0o755 },
);

try {
await run(script, [helper], { CODER_RETRY_MIN_RUNTIME: "1" }, 10000);
expect.fail("should have thrown");
} catch (err: unknown) {
// Exit code 1 (not 99) proves it ran once and didn't retry.
expect((err as { code: number }).code).toBe(1);
}
});

it("preserves arguments with spaces", async () => {
const { stdout } = await run(script, [
"sh",
"-c",
'echo "$1 $2"',
"--",
"hello world",
"it's fine",
]);
expect(stdout.trim()).toBe("hello world it's fine");
});
});