Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,8 @@ jobs:
- name: Build
run: npm run build

- name: Packaging smoke
run: npm run smoke:packaged

- name: Deterministic end-to-end workflow validation
run: npm run ci:e2e
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"clean": "rm -rf dist",
"dev": "tsx src/cli.ts",
"prepack": "npm run build",
"smoke:packaged": "node scripts/run-packaging-smoke.mjs",
"smoke:live": "node scripts/run-live-codex-smoke.mjs",
"test": "vitest run",
"typecheck": "tsc -p tsconfig.json --noEmit"
Expand Down
41 changes: 41 additions & 0 deletions scripts/run-packaging-smoke.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env node

import assert from "node:assert/strict";
import { execFile } from "node:child_process";
import { promises as fs } from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { promisify } from "node:util";

const execFileAsync = promisify(execFile);
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");

async function main() {
await fs.access(path.join(repoRoot, "dist", "cli.js"));
await fs.access(path.join(repoRoot, "bin", "cstack.js"));
await fs.access(path.join(repoRoot, "README.md"));

const helpResult = await execFileAsync(process.execPath, [path.join(repoRoot, "bin", "cstack.js"), "--help"], {
cwd: repoRoot,
maxBuffer: 10 * 1024 * 1024
});
assert.match(helpResult.stdout, /cstack loop <intent>/, "built CLI help is missing the loop command");

const packResult = await execFileAsync("npm", ["pack", "--dry-run", "--json"], {
cwd: repoRoot,
maxBuffer: 10 * 1024 * 1024
});
const packOutput = JSON.parse(packResult.stdout);
assert.ok(Array.isArray(packOutput) && packOutput.length > 0, "npm pack --dry-run returned no package metadata");
const files = new Set(
(packOutput[0]?.files ?? [])
.map((entry) => (entry && typeof entry.path === "string" ? entry.path : ""))
.filter(Boolean)
);

for (const expectedPath of ["README.md", "bin/cstack.js", "dist/cli.js"]) {
assert.ok(files.has(expectedPath), `packaged output is missing ${expectedPath}`);
}
}

await main();
215 changes: 195 additions & 20 deletions src/validation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,10 @@ function detectSurfaces(options: {
const hasDocker = options.manifests.includes("Dockerfile") || options.manifests.includes("docker-compose.yml") || options.manifests.includes("docker-compose.yaml");
const hasMobileIos = options.manifests.includes("Podfile") || options.manifests.includes("Package.swift");
const hasMobileAndroid = options.manifests.some((manifest) => manifest.includes("gradle"));
const webSignals = deps.some((dep) => /react|next|vite|playwright|cypress|@testing-library/.test(dep)) || scripts.some((name) => /dev|start|storybook/.test(name));
const webSignals =
deps.some((dep) => /react|next|vite|playwright|cypress|@testing-library/.test(dep)) ||
scripts.some((name) => /start|storybook|preview/.test(name)) ||
options.existingTests.some((entry) => /playwright|cypress|storybook/i.test(entry));
const serviceSignals = deps.some((dep) => /express|fastify|nest/.test(dep)) || scripts.some((name) => /serve|api/.test(name));

if (webSignals) {
Expand Down Expand Up @@ -943,9 +946,15 @@ function selectDefaultLocalCommands(profile: ValidationRepoProfile, buildVerific
if (scriptMap.has("test:e2e")) {
add("npm run test:e2e");
}
if (scriptMap.has("ci:e2e")) {
add("npm run ci:e2e");
}
if (scriptMap.has("build")) {
add("npm run build");
}
if (scriptMap.has("smoke:packaged")) {
add("npm run smoke:packaged");
}
if (profile.buildSystems.includes("cargo")) {
add("cargo test");
}
Expand Down Expand Up @@ -984,6 +993,8 @@ function selectDefaultCiJobs(profile: ValidationRepoProfile, localCommands: stri
function buildInitialValidationPlan(profile: ValidationRepoProfile, toolResearch: ValidationToolResearch, buildVerificationRecord: BuildVerificationRecord, selectedSpecialists: ValidationSpecialistSelection): DeliverValidationPlan {
const localCommands = selectDefaultLocalCommands(profile, buildVerificationRecord);
const ciJobs = selectDefaultCiJobs(profile, localCommands);
const e2eCommands = localCommands.filter((command) => /e2e/.test(command));
const packagingCommands = localCommands.filter((command) => /build|pack|smoke/.test(command) && !/live/.test(command));
const layers: DeliverValidationPlan["layers"] = [
{
name: "static",
Expand Down Expand Up @@ -1017,23 +1028,23 @@ function buildInitialValidationPlan(profile: ValidationRepoProfile, toolResearch
},
{
name: "e2e-system",
selected: profile.surfaces.some((surface) => ["web-app", "ios-app", "android-app"].includes(surface)),
status: profile.surfaces.includes("web-app") ? "partial" : "skipped",
rationale: "System flows should cover representative user journeys for interactive products.",
selected: profile.surfaces.some((surface) => ["web-app", "ios-app", "android-app", "cli-binary"].includes(surface)) && e2eCommands.length > 0,
status: e2eCommands.length > 0 ? "ready" : profile.surfaces.some((surface) => ["web-app", "ios-app", "android-app", "cli-binary"].includes(surface)) ? "partial" : "skipped",
rationale: "System flows should cover representative user journeys for interactive products, including deterministic CLI workflow paths.",
selectedTools: toolResearch.candidates.filter((candidate) => candidate.category === "e2e-system" && candidate.selected).map((candidate) => candidate.tool),
localCommands: localCommands.filter((command) => /e2e/.test(command)),
ciCommands: localCommands.filter((command) => /e2e/.test(command)),
coverageIntent: ["critical user journeys", "auth or session flows", "release-time regressions"]
localCommands: e2eCommands,
ciCommands: e2eCommands,
coverageIntent: ["critical user journeys", "auth or session flows", "release-time regressions", "deterministic CLI workflow execution"]
},
{
name: "packaging-smoke",
selected: true,
status: profile.surfaces.includes("container") || profile.surfaces.includes("cli-binary") ? "partial" : "ready",
rationale: "Packaging and runtime smoke checks ensure the produced artifact can actually boot or run.",
status: packagingCommands.length > 0 ? "ready" : profile.surfaces.includes("container") || profile.surfaces.includes("cli-binary") ? "partial" : "ready",
rationale: "Packaging and runtime smoke checks ensure the produced artifact can actually boot or run without depending on registry installs.",
selectedTools: toolResearch.candidates.filter((candidate) => candidate.category === "packaging-smoke" && candidate.selected).map((candidate) => candidate.tool),
localCommands: localCommands.filter((command) => /build/.test(command)),
ciCommands: localCommands.filter((command) => /build/.test(command)),
coverageIntent: ["build artifact readiness", "runtime smoke", "container image structure where relevant"]
localCommands: packagingCommands,
ciCommands: packagingCommands,
coverageIntent: ["build artifact readiness", "runtime smoke", "package contents", "container image structure where relevant"]
}
];

Expand Down Expand Up @@ -1438,6 +1449,152 @@ function finalizeValidationPlanStatus(plan: DeliverValidationPlan, localValidati
return plan.localValidation.commands.length > 0 ? "ready" : plan.status;
}

function buildRecoveredValidationFinalBody(options: {
summary: string;
recoveryReason: string;
localValidationRecord: DeliverValidationLocalRecord;
validationPlan: DeliverValidationPlan;
}): string {
const executedCommands =
options.localValidationRecord.results.length > 0
? options.localValidationRecord.results.map((result) => `- \`${result.command}\`: ${result.status}`)
: ["- none"];
return [
"# Validation Summary",
"",
options.summary,
"",
`Recovery reason: ${options.recoveryReason}`,
`Validation status: ${options.validationPlan.status} (${options.validationPlan.outcomeCategory})`,
"",
"## Local validation commands",
...executedCommands,
"",
"## Coverage gaps",
...(options.validationPlan.coverage.gaps.length > 0 ? options.validationPlan.coverage.gaps.map((gap) => `- ${gap}`) : ["- none"]),
"",
"## Notes",
"- Validation plan synthesized from the repo profile, selected specialists, and local validation evidence."
].join("\n") + "\n";
}

async function recoverValidationLeadFailure(options: {
cwd: string;
stageDir: string;
finalPath: string;
repoProfile: ValidationRepoProfile;
toolResearch: ValidationToolResearch;
initialPlan: DeliverValidationPlan;
selectedSpecialists: ValidationSpecialistSelection;
specialistExecutions: SpecialistExecution[];
capabilityRecord: CapabilityUsageRecord;
leadFailure: Error;
}): Promise<DeliverValidationExecutionResult> {
const localValidationRecord = await runCommandSet(options.cwd, options.stageDir, options.initialPlan.localValidation.commands);
const basePlan: DeliverValidationPlan = {
...options.initialPlan,
summary:
localValidationRecord.status === "passed"
? "Recovered validation after the validation lead failed to emit final output; inferred commands passed locally."
: "Recovered validation after the validation lead failed to emit final output, but one or more inferred commands failed locally.",
profileSummary: `${options.initialPlan.profileSummary} Recovery used the inferred validation plan because the validation lead artifact was unavailable.`,
selectedSpecialists: options.selectedSpecialists
.filter((entry) => entry.selected)
.map((entry) => {
const execution = options.specialistExecutions.find((candidate) => candidate.name === entry.name);
return {
name: entry.name,
disposition: (execution?.disposition ?? "accepted") as SpecialistDisposition,
reason: execution?.notes ?? entry.reason
};
}),
localValidation: {
...options.initialPlan.localValidation,
notes: [
...options.initialPlan.localValidation.notes,
`Recovered from validation lead failure: ${options.leadFailure.message}`
]
},
coverage: {
...options.initialPlan.coverage,
summary:
localValidationRecord.status === "passed"
? "Recovered validation used inferred commands and repo profiling to preserve progress after the lead artifact was missing."
: "Recovered validation exposed failures in inferred commands after the lead artifact was missing.",
signals: [
...options.initialPlan.coverage.signals,
"validation lead artifact recovery path executed"
]
},
recommendedChanges: [
...options.initialPlan.recommendedChanges,
"Inspect validation lead logs to determine why the final artifact was missing."
],
reportMarkdown: ""
};
const normalizedPlan: DeliverValidationPlan = {
...basePlan,
status: finalizeValidationPlanStatus(basePlan, localValidationRecord),
outcomeCategory: deriveValidationOutcomeCategory(basePlan, localValidationRecord),
coverage: {
...basePlan.coverage,
gaps: [
...basePlan.coverage.gaps,
...(localValidationRecord.status === "failed" ? ["One or more selected validation commands failed."] : []),
...(localValidationRecord.blockerCategories?.map((blocker) => `Validation blocked by ${blocker}.`) ?? [])
]
}
};
const finalBody = buildRecoveredValidationFinalBody({
summary: normalizedPlan.summary,
recoveryReason: options.leadFailure.message,
localValidationRecord,
validationPlan: normalizedPlan
});
normalizedPlan.reportMarkdown = finalBody;

const selectedToolNames = options.toolResearch.candidates
.filter((candidate) => candidate.selected)
.map((candidate) => candidate.tool);
const observedCapabilities = inferUsedValidationCapabilities({
localValidationRecord,
validationPlan: normalizedPlan,
availableCapabilities: options.capabilityRecord.available,
selectedToolNames
});
const capabilityArtifact: CapabilityUsageRecord = {
...options.capabilityRecord,
used: observedCapabilities,
notes: [
...(options.capabilityRecord.notes ?? []),
"used capabilities are derived from executed local validation commands and observed CI validation job coverage.",
"validation lead recovery synthesized the plan from inferred defaults."
]
};
const coverageSummary = buildCoverageSummary(normalizedPlan, localValidationRecord);

await fs.writeFile(options.finalPath, finalBody, "utf8");
await writeJson(path.join(options.stageDir, "artifacts", "capabilities.json"), capabilityArtifact);
await writeJson(path.join(options.stageDir, "validation-plan.json"), normalizedPlan);
await fs.writeFile(path.join(options.stageDir, "artifacts", "test-pyramid.md"), normalizedPlan.pyramidMarkdown, "utf8");
await writeJson(path.join(options.stageDir, "artifacts", "coverage-summary.json"), coverageSummary);
await fs.writeFile(path.join(options.stageDir, "artifacts", "coverage-gaps.md"), renderCoverageGapsMarkdown(normalizedPlan, localValidationRecord), "utf8");
await writeJson(path.join(options.stageDir, "artifacts", "local-validation.json"), localValidationRecord);
await writeJson(path.join(options.stageDir, "artifacts", "ci-validation.json"), normalizedPlan.ciValidation);
await fs.writeFile(path.join(options.stageDir, "artifacts", "github-actions-plan.md"), normalizedPlan.githubActionsPlanMarkdown, "utf8");

return {
repoProfile: options.repoProfile,
toolResearch: options.toolResearch,
validationPlan: normalizedPlan,
localValidationRecord,
coverageSummary,
selectedSpecialists: options.selectedSpecialists,
specialistExecutions: options.specialistExecutions,
finalBody
};
}

export async function runDeliverValidationExecution(options: DeliverValidationExecutionOptions): Promise<DeliverValidationExecutionResult> {
await fs.mkdir(path.join(options.paths.stageDir, "artifacts"), { recursive: true });
await fs.writeFile(options.paths.stdoutPath, "", "utf8");
Expand Down Expand Up @@ -1524,14 +1681,32 @@ export async function runDeliverValidationExecution(options: DeliverValidationEx
...(typeof options.timeoutSeconds === "number" ? { timeoutSeconds: options.timeoutSeconds } : {})
});

const finalBody = await readCodexFinalOutput({
context: "Validation lead",
finalPath: options.paths.finalPath,
stdoutPath: options.paths.stdoutPath,
stderrPath: options.paths.stderrPath,
result
});
const validationPlan = parseJson<DeliverValidationPlan>(finalBody, "Validation lead");
let finalBody = "";
let validationPlan: DeliverValidationPlan;
try {
finalBody = await readCodexFinalOutput({
context: "Validation lead",
finalPath: options.paths.finalPath,
stdoutPath: options.paths.stdoutPath,
stderrPath: options.paths.stderrPath,
result
});
validationPlan = parseJson<DeliverValidationPlan>(finalBody, "Validation lead");
} catch (error) {
const leadFailure = error instanceof Error ? error : new Error(String(error));
return recoverValidationLeadFailure({
cwd: options.cwd,
stageDir: options.paths.stageDir,
finalPath: options.paths.finalPath,
repoProfile,
toolResearch,
initialPlan,
selectedSpecialists,
specialistExecutions,
capabilityRecord,
leadFailure
});
}
const acceptedByName = new Map(validationPlan.selectedSpecialists.map((entry) => [entry.name, entry]));
for (let index = 0; index < specialistExecutions.length; index += 1) {
const execution = specialistExecutions[index]!;
Expand Down
20 changes: 13 additions & 7 deletions test/deliver.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ describe("runDeliver", () => {
}
}, 60_000);

it("fails closed when the validation lead exits without writing final output", async () => {
it("recovers when the validation lead exits without writing final output", async () => {
process.env.FAKE_CODEX_NO_FINAL_VALIDATION = "1";
await writeGitHubFixture({
repoView: {
Expand Down Expand Up @@ -549,17 +549,23 @@ describe("runDeliver", () => {
status: string;
outcomeCategory: string;
summary: string;
localValidation: { notes: string[] };
};
const localValidation = JSON.parse(
await fs.readFile(path.join(runDir, "stages", "validation", "artifacts", "local-validation.json"), "utf8")
) as { status: string };
const validationFinal = await fs.readFile(path.join(runDir, "stages", "validation", "final.md"), "utf8");
const deliverySummary = await fs.readFile(run.finalPath, "utf8");

expect(run.status).toBe("failed");
expect(validationPlan.status).toBe("blocked");
expect(validationPlan.outcomeCategory).toBe("blocked-by-validation");
expect(validationPlan.summary).toContain("Validation lead did not write final output");
expect(validationPlan.summary).not.toContain("ENOENT");
expect(validationFinal).toContain("Validation stage failed");
expect(deliverySummary).toContain("Validation lead did not write final output");
expect(validationPlan.status).toBe("ready");
expect(validationPlan.outcomeCategory).toBe("ready");
expect(validationPlan.summary).toContain("Recovered validation");
expect(validationPlan.localValidation.notes.join("\n")).toContain("Validation lead did not write final output");
expect(localValidation.status).toBe("passed");
expect(validationFinal).toContain("Recovered validation");
expect(validationFinal).toContain("Recovery reason: Validation lead did not write final output");
expect(deliverySummary).toContain("Recovered validation");
expect(deliverySummary).not.toContain("ENOENT");
}, 60_000);

Expand Down
Loading