aws · Hweinstock · Mar 28, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md
@@ -0,0 +1,101 @@
+# Task Tester SOP
+
+## Role
+
+You are a CLI and TUI tester for the AgentCore CLI. You verify both interactive TUI behavior and non-interactive CLI
+commands. You drive the CLI using TUI harness tools and shell commands, then post results as PR comments.
+
+You MUST NOT modify any code, create branches, or push commits. Your only output is test result comments.
+
+## Tools
+
+- **TUI harness** (MCP tools): `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`,
+  `tui_read_screen`, `tui_close`, `tui_list_sessions` — for interactive TUI testing
+- **`shell`** — for non-interactive CLI commands, setup (temp dirs, project scaffolding), and verification
+- **GitHub tools** — for posting PR comments. Always use `aws/agentcore-cli` as the repository, not the fork.
+
+## What to Test
+
+Check the command text in the prompt:
+
+- `Run all predefined test flows` → read and execute every flow from `.github/agent-sops/tui-test-flows.md`
+- `Run this ad-hoc test flow: <description>` → design and execute a single flow matching the description
+
+## General Rules
+
+- The CLI is installed globally as `agentcore`
+- Use `tui_launch` with `command: "agentcore"` for interactive commands. Use `shell` for non-interactive ones.
+- Terminal dimensions: `cols: 100, rows: 24` for all TUI sessions
+- Use `timeoutMs: 10000` minimum for all `tui_wait_for` and `tui_action` calls
+- If a wait times out, retry once before declaring failure
+- Always `tui_close` sessions when done, even on failure
+- Run `mkdir -p /tmp/tui-screenshots` via `shell` as your very first action
+
+## Screenshot Rules
+
+**NEVER save .txt files. ONLY save .svg files.**
+
+Use this exact tool call pattern for every flow:
+
+```
+tui_screenshot(sessionId=<id>, format="svg", savePath="/tmp/tui-screenshots/<flow-name>.svg")
+```
+
+- `format` MUST be `"svg"`, NEVER `"text"`
+- Take the screenshot WHILE the session is still alive (before the process exits)
+- If a session has already exited, skip the screenshot — do NOT save a text file as a substitute
+
+## Post Results
+
+Post a single PR comment:
+
+```markdown
+## 🧪 TUI Test Results
+
+**X/Y flows passed**
+
+### ✅ Passed
+
+- Flow name 1
+- Flow name 2
+
+### ❌ Failed
+
+#### Flow name 3
+
+**Expected:** what should have happened **Actual:** what happened
+
+<details>
+<summary>Terminal output</summary>
+```
+
+(paste tui_read_screen text output here)
+
+```
+
+</details>
+```
+
+If all flows pass, omit the Failed section.
+
+For failures, use `tui_read_screen` to capture the terminal text and paste it in the comment. SVG screenshots are
+uploaded as workflow artifacts separately — do not try to embed them in the comment.
+
+## Verification
+
+After each flow completes, verify the side effects — not just the TUI output:
+
+- If a project was created: use `shell` to check the directory exists and contains expected files (e.g.
+  `agentcore.json`)
+- If a resource was added: use `shell` to check the config file was updated
+- If a command produced output: verify the output matches expectations
+
+Do not rely solely on what the TUI displays. Confirm the CLI actually did what it claimed.
+
+## Forbidden Actions
+
+- Do NOT modify, create, or delete source files
+- Do NOT run git commands (add, commit, push)
+- Do NOT create or update branches
+- Do NOT approve or merge the pull request
+- Do NOT deploy or create AWS resources unless the test flow explicitly requires it
diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md
@@ -0,0 +1,27 @@
+# TUI Test Flows
+
+---
+
+## Flow: Help text lists all commands
+
+1. Launch: `agentcore --help`
+2. Wait for "Usage:" on screen
+3. Take SVG screenshot immediately (before the process exits)
+4. Verify these commands are visible: `create`, `deploy`, `invoke`, `status`, `add`, `remove`, `dev`, `logs`
+5. Close session
+
+---
+
+## Flow: Create project with agent via TUI wizard
+
+1. Create a temp directory via `shell`: `mktemp -d`
+2. Launch: `agentcore create` with `cwd` set to the temp directory
+3. Wait for "Project name" prompt, type `TuiTest`, press Enter
+4. Wait for "Would you like to add an agent" — expect "Yes, add an agent" visible, press Enter
+5. Wait for "Agent name" prompt, accept the default, press Enter
+6. Wait for "Select agent type" — expect "Create new agent" visible, press Enter
+7. Wait for "Language" step — expect "Python" visible, press Enter
+8. Continue pressing Enter through remaining steps (Build, Protocol, Framework, Model) accepting defaults
+9. At the "Confirm" step, take SVG screenshot, then press Enter
+10. Wait for the process to exit or a success message
+11. Close session
diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs
@@ -4,48 +4,48 @@
 
 const fs = require('fs');
 
-async function getIssueInfo(github, context, inputs) {
+async function getIssueInfo(github, repo, inputs, eventName, payload) {
   let issueId;
 
-  if (context.eventName === 'workflow_dispatch') {
+  if (eventName === 'workflow_dispatch') {
     issueId = inputs.issue_id;
   } else {
     // Handle both issue comments and PR comments
-    issueId = (context.payload.issue?.number || context.payload.pull_request?.number)?.toString();
+    issueId = (payload.issue?.number || payload.pull_request?.number)?.toString();
   }
 
   const command =
-    context.eventName === 'workflow_dispatch'
+    eventName === 'workflow_dispatch'
       ? inputs.command
-      : context.payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || '';
+      : payload.comment.body.match(/^\/strands\s*(.*)$/)?.[1]?.trim() || '';
 
-  console.log(`Event: ${context.eventName}, Issue ID: ${issueId}, Command: "${command}"`);
+  console.log(`Event: ${eventName}, Issue ID: ${issueId}, Command: "${command}"`);
 
   const issue = await github.rest.issues.get({
-    owner: context.repo.owner,
-    repo: context.repo.repo,
+    owner: repo.owner,
+    repo: repo.repo,
     issue_number: issueId,
   });
 
   return { issueId, command, issue };
 }
 
-async function determineBranch(github, context, issueId, mode, isPullRequest) {
+async function determineBranch(github, repo, issueId, mode, isPullRequest) {
   let branchName = 'main';
 
   if (mode === 'implementer' && !isPullRequest) {
     branchName = `agent-tasks/${issueId}`;
 
     const mainRef = await github.rest.git.getRef({
-      owner: context.repo.owner,
-      repo: context.repo.repo,
+      owner: repo.owner,
+      repo: repo.repo,
       ref: 'heads/main',
     });
 
     try {
       await github.rest.git.createRef({
-        owner: context.repo.owner,
-        repo: context.repo.repo,
+        owner: repo.owner,
+        repo: repo.repo,
         ref: `refs/heads/${branchName}`,
         sha: mainRef.data.object.sha,
       });
@@ -59,8 +59,8 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) {
     }
   } else if (isPullRequest) {
     const pr = await github.rest.pulls.get({
-      owner: context.repo.owner,
-      repo: context.repo.repo,
+      owner: repo.owner,
+      repo: repo.repo,
       pull_number: issueId,
     });
     branchName = pr.data.head.ref;
@@ -69,7 +69,7 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) {
   return branchName;
 }
 
-function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) {
+function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, repo) {
   const sessionId =
     inputs.session_id ||
     (mode === 'implementer' ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`);
@@ -78,33 +78,57 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs)
     implementer: '.github/agent-sops/task-implementer.sop.md',
     reviewer: '.github/agent-sops/task-reviewer.sop.md',
     refiner: '.github/agent-sops/task-refiner.sop.md',
+    tester: '.github/agent-sops/task-tester.sop.md',
   };
   const scriptFile = sopFiles[mode] || sopFiles.refiner;
 
   const systemPrompt = fs.readFileSync(scriptFile, 'utf8');
 
   let prompt = isPullRequest ? 'The pull request id is:' : 'The issue id is:';
-  prompt += `${issueId}\n${command}\nreview and continue`;
+  prompt += `${issueId}\n`;
+  prompt += `The repository is: ${repo.owner}/${repo.repo}\n`;
+
+  if (mode === 'tester') {
+    const flowDescription = command.replace(/^test\s*/, '').trim();
+    if (flowDescription) {
+      prompt += `Run this ad-hoc test flow: ${flowDescription}\n`;
+    } else {
+      prompt += `Run all predefined test flows from .github/agent-sops/tui-test-flows.md\n`;
+    }
+  } else {
+    prompt += `${command}\n`;
+  }
+  prompt += 'review and continue';
 
   return { sessionId, systemPrompt, prompt };
 }
 
 module.exports = async (context, github, core, inputs) => {
   try {
-    const { issueId, command, issue } = await getIssueInfo(github, context, inputs);
+    const repo = inputs.target_repo || { owner: context.repo.owner, repo: context.repo.repo };
+
+    const { issueId, command, issue } = await getIssueInfo(github, repo, inputs, context.eventName, context.payload);
 
     const isPullRequest = !!issue.data.pull_request;
-    const mode = command.startsWith('review')
-      ? 'reviewer'
-      : isPullRequest || command.startsWith('implement')
-        ? 'implementer'
-        : 'refiner';
+
+    const COMMAND_MODES = { test: 'tester', review: 'reviewer', implement: 'implementer' };
+    const mode =
+      Object.entries(COMMAND_MODES).find(([prefix]) => command.startsWith(prefix))?.[1] ??
+      (isPullRequest ? 'implementer' : 'refiner');
     console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`);
 
-    const branchName = await determineBranch(github, context, issueId, mode, isPullRequest);
+    const branchName = await determineBranch(github, repo, issueId, mode, isPullRequest);
     console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`);
 
-    const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs);
+    const { sessionId, systemPrompt, prompt } = buildPrompts(
+      mode,
+      issueId,
+      isPullRequest,
+      command,
+      branchName,
+      inputs,
+      repo
+    );
 
     console.log(`Session ID: ${sessionId}`);
     console.log(`Task prompt: "${prompt}"`);
@@ -113,6 +137,7 @@ module.exports = async (context, github, core, inputs) => {
     core.setOutput('session_id', sessionId);
     core.setOutput('system_prompt', systemPrompt);
     core.setOutput('prompt', prompt);
+    core.setOutput('mode', mode);
   } catch (error) {
     const errorMsg = `Failed: ${error.message}`;
     console.error(errorMsg);

diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml
@@ -70,6 +70,9 @@ jobs:
           fetch-depth: 0
 
       - name: Add strands-running label
+        # continue-on-error: workflow_dispatch from a fork targets the fork repo
+        # where the upstream issue/PR doesn't exist, causing a 404.
+        continue-on-error: true
         uses: actions/github-script@v8
         with:
           script: |
@@ -90,10 +93,40 @@ jobs:
             const inputs = {
               issue_id: '${{ inputs.issue_id }}',
               command: '${{ inputs.command }}',
-              session_id: '${{ inputs.session_id }}'
+              session_id: '${{ inputs.session_id }}',
+              // When dispatched from a fork, target the upstream repo for API calls
+              ...(context.eventName === 'workflow_dispatch' && context.repo.owner !== 'aws'
+                ? { target_repo: { owner: 'aws', repo: 'agentcore-cli' } }
+                : {}),
             };
             await processInputs(context, github, core, inputs);
 
+      - name: Setup Node.js (tester mode)
+        if: steps.process-inputs.outputs.mode == 'tester'
+        uses: actions/setup-node@v6
+        with:
+          node-version: 20.x
+          cache: 'npm'
+
+      - name: Build CLI and TUI harness (tester mode)
+        if: steps.process-inputs.outputs.mode == 'tester'
+        run: |
+          npm ci
+          npm run build
+          npm run build:harness
+          npm install -g "$(npm pack | tail -1)"
+          mkdir -p /tmp/mcp-harness
+          cp dist/mcp-harness/index.mjs /tmp/mcp-harness/index.mjs
+          cd /tmp/mcp-harness && npm init -y && npm install node-pty @xterm/headless express
+
+      - name: Set MCP harness path
+        if: steps.process-inputs.outputs.mode == 'tester'
+        id: mcp-config
+        run:
+          echo
+          "mcp_servers={\"mcpServers\":{\"tui-harness\":{\"command\":\"node\",\"args\":[\"/tmp/mcp-harness/index.mjs\"]}}}"
+          >> "$GITHUB_OUTPUT"
+
       - name: Run Strands Agent
         uses: ./.github/actions/strands-action
         with:
@@ -102,6 +135,7 @@ jobs:
           provider: 'bedrock'
           model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0'
           tools: 'strands_tools:shell,retrieve'
+          mcp_servers: ${{ steps.mcp-config.outputs.mcp_servers || '' }}
           aws_role_arn: ${{ secrets.AWS_ROLE_ARN }}
           aws_region: 'us-west-2'
           pat_token: ${{ secrets.GITHUB_TOKEN }}
@@ -110,6 +144,14 @@ jobs:
           S3_SESSION_BUCKET: ${{ secrets.AGENT_SESSIONS_BUCKET }}
           BRANCH_NAME: ${{ steps.process-inputs.outputs.branch_name }}
 
+      - name: Upload TUI screenshots
+        if: always() && steps.process-inputs.outputs.mode == 'tester'
+        uses: actions/upload-artifact@v4
+        with:
+          name: tui-screenshots
+          path: /tmp/tui-screenshots/
+          if-no-files-found: ignore
+
       - name: Remove strands-running label
         if: always()
         uses: actions/github-script@v8