diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000..76efdbc --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://anthropic.com/claude-code/marketplace.schema.json", + "name": "shipwright-marketplace", + "description": "RAI's curated Claude Code plugin marketplace", + "owner": { + "name": "Owais Mohamed", + "email": "owais.mohamed@relational.ai" + }, + "plugins": [ + { + "name": "dockyard", + "description": "Standalone skills and commands for engineering workflows — brownfield analysis, observability, debugging, codebase profiling", + "source": "./plugins/dockyard" + }, + { + "name": "shipwright", + "description": "Orchestrated agentic development framework — TDD-enforced bug fix workflows with triage, implementation, review, and validation agents", + "source": "./plugins/shipwright" + } + ] +} diff --git a/.claude/commands/doc-digest.md b/.claude/commands/doc-digest.md deleted file mode 100644 index b421206..0000000 --- a/.claude/commands/doc-digest.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -description: Walk through any document section by section for interactive review -argument-hint: ---- - -You are the Doc Digest agent. Your job is to walk the user through a document one section at a time for interactive review. - -## Setup - -1. Read the document at `$ARGUMENTS`. -2. Parse it into sections by splitting on `##` headings. If the document has no `##` headings, split on `#` headings instead. If it has no headings at all, treat the whole document as one section. -3. Count the total number of sections. - -## Presenting sections - -For each section: -1. Show the section number and total (e.g., "**Section 3 of 12: The Tier System**") -2. Present the full content of that section -3. Ask: *"Does this look right, or do you have feedback?"* -4. Wait for the user's response before moving on - -## Handling responses - -- **Approval** ("looks good", "fine", "next", etc.): Mark the section as approved and move to the next one. -- **Feedback**: Discuss the feedback, propose changes if appropriate, then ask if they want to move on or keep iterating on this section. -- **Question**: Clarify in plain language. If the user is still confused after one clarification, flag it as a doc quality problem — the doc should be clearer, not the reader smarter. -- **"Punt for later"** or **"skip"**: Mark the section as punted with a note, move to the next one. - -## Rules - -- Present ONE section at a time. Never dump the whole document. -- Don't be defensive about the document. If something is confusing, that's the doc's fault. -- Don't summarize sections — show them in full so the user sees exactly what's written. -- Track status for each section: approved, has feedback, or punted. - -## Finishing up - -After the last section, show a summary: -- How many sections approved -- Which sections have open feedback -- Which sections were punted for later - -Ask if they want to revisit any section or if the review is complete. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..5e21609 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,45 @@ +# CLAUDE.md + +This is **shipwright-marketplace** — RAI's curated Claude Code plugin marketplace. + +## Structure + +``` +plugins/ +├── dockyard/ ← standalone skills, commands, agents +└── shipwright/ ← orchestrated workflows (depends on dockyard) +``` + +- Marketplace registry: `.claude-plugin/marketplace.json` +- Plugin manifests: `plugins//.claude-plugin/plugin.json` +- Smoke tests: `plugins/dockyard/tests/smoke/` (validates both plugins) + +## Plugins + +**Dockyard** — standalone tools that work without orchestration: +- Skills: `brownfield-analysis`, `code-review`, `review-and-submit`, `observability` +- Commands: `codebase-analyze`, `code-review`, `doc-digest`, `investigate`, `review-and-submit`, `feedback` +- Agents: `doc-digest` + +**Shipwright** — orchestrated bug-fix pipeline (requires dockyard): +- Commands: `shipwright`, `feedback` +- Internal agents: `triage`, `implementer`, `reviewer`, `validator` +- Internal skills: `tdd`, `systematic-debugging`, `verification-before-completion`, `anti-rationalization`, `decision-categorization` +- Dependency enforced via SessionStart hook (`plugins/shipwright/hooks/check-dockyard.sh`) + +## Testing + +```bash +bash plugins/dockyard/tests/smoke/run-all.sh +``` + +Runs 4 suites: structure, skills, agents, commands. Validates both plugins. + +## Key Conventions + +- Commands use YAML frontmatter with `description:` and optional `argument-hint:` +- Skills live in `skills//SKILL.md` +- Agents are markdown files in `agents/` (dockyard) or `internal/agents/` (shipwright) +- Cross-plugin skill references use `dockyard:` notation +- `${CLAUDE_PLUGIN_ROOT}` works in hooks.json but NOT in markdown files +- Bump `version` in plugin.json for users to receive updates (cache is keyed on version) diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..7163a75 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,6 @@ +# Marketplace registry -- plugin additions require CODEOWNERS approval +/.claude-plugin/marketplace.json @omohamed-rai + +# Plugin-level ownership +/plugins/dockyard/ @omohamed-rai +/plugins/shipwright/ @omohamed-rai diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7729710 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,43 @@ +# Contributing to Shipwright Marketplace + +Shipwright Marketplace is RAI's curated Claude Code plugin marketplace. It hosts two plugins: + +- **dockyard** -- standalone skills and commands (codebase analysis, debugging, doc digest, etc.) +- **shipwright** -- orchestrated workflows that compose dockyard skills into multi-agent pipelines (triage, implement, review, validate) + +## Which Plugin Should I Contribute To? + +| If your contribution is... | Add it to | +|---|---| +| A standalone skill or command (usable on its own) | `plugins/dockyard/` | +| An agent, workflow, or orchestration that composes skills | `plugins/shipwright/` | + +## How to Contribute a Skill or Agent + +1. **Fork and branch** from `main`. +2. **Add your skill** under the appropriate plugin directory following the required structure: + - Skills: `plugins//skills//SKILL.md` + - Agents: `plugins//agents/.md` + - Commands: `plugins//commands/.md` +3. **Follow the templates** in `templates/SKILL_TEMPLATE.md` or `templates/AGENT_TEMPLATE.md`. +4. **Bump the version** in the plugin's `plugin.json`. Claude Code caches plugins by version string, so users will not receive your update unless the version is incremented. +5. **Open a PR** against `main`. + +## Quality Gates + +Every PR must pass: + +- **Template compliance** -- skill/agent files follow the required format +- **Smoke test** -- the skill or agent can be invoked without errors +- **CODEOWNERS review** -- at least one approval from a designated code owner + +## Requesting a New Plugin + +New plugins require CODEOWNERS approval. To request one: + +1. Open an issue describing the plugin's purpose and scope. +2. A code owner will review and, if approved, add the plugin entry to `marketplace.json`. + +## Version Bumps + +Claude Code caches installed plugins by version string. If you change any skill, agent, command, or configuration in a plugin, you **must** bump the `version` field in that plugin's `plugin.json`. Without this, users will not see the update until they manually reinstall. diff --git a/README.md b/README.md index 74d9154..a34c3f3 100644 --- a/README.md +++ b/README.md @@ -1,80 +1,46 @@ -# Shipwright +# Shipwright Marketplace -Adaptive agentic development framework for engineering teams. Claude Code plugin. +RAI's curated Claude Code plugin marketplace. -## What It Does +## Plugins -Shipwright orchestrates a disciplined bug fix workflow: triage, TDD implementation, code review, and validation -- with codebase context and crash recovery. +| Plugin | Description | Commands | +|--------|-------------|----------| +| **dockyard** | Standalone skills & commands | `codebase-analyze`, `code-review`, `doc-digest`, `investigate`, `review-and-submit`, `feedback` | +| **shipwright** | Orchestrated bug-fix workflows (requires dockyard) | `shipwright`, `feedback` | ## Installation ```bash -# Add the RAI marketplace (one-time setup) -/plugin marketplace add RelationalAI/claude-plugins +# 1. Add the marketplace (one-time) +/plugin marketplace add https://github.com/RelationalAI/shipwright -# Install the beta plugin -/plugin install shipwright-beta@rai-claude-plugins -``` - -> **Stable release** coming soon. For now, Shipwright is available as a beta plugin. - -## Usage +# 2. Install dockyard (standalone skills and commands) +/plugin install dockyard@shipwright-marketplace -### Orchestrated Workflow +# 3. (Optional) Install shipwright (orchestrated workflows -- requires dockyard) +/plugin install shipwright@shipwright-marketplace +# 4. Restart your Claude session to activate ``` -/shipwright # Start -- Triage asks what you're working on -/shipwright fix null pointer on details click # Start with context -/shipwright fix bug RAI-9874 # Start from Jira ticket (requires Atlassian MCP) -``` - -**Flow:** Triage -> Implementer -> Reviewer -> Validator - -### Standalone Commands -| Command | What it does | -|---------|-------------| -| `/shipwright:codebase-analyze` | Full codebase analysis -- writes 7 profile docs to `docs/codebase-profile/` | -| `/shipwright:doc-digest ` | Walk through any document section by section | -| `/shipwright:debug` | Standalone systematic debugging (4-phase) | -| `/shipwright:report [description]` | File bugs, feedback, and suggestions on Shipwright | -| `/promote [comment]` | Cherry-pick skills/agents/commands from beta to stable | +## Team Setup -## Agents +Add to your project's `.claude/settings.json`: -| Agent | Role | -|-------|------| -| **Triage** | Reads codebase profiles, brainstorms with user, categorizes decisions, confirms tier | -| **Implementer** | Root cause investigation, TDD, systematic debugging, fix verification | -| **Reviewer** | Spec compliance review, code quality, approve/challenge/escalate | -| **Validator** | Full regression testing, fix confirmation | -| **Doc Digest** | Interactive document walkthrough | - -## Skills - -| Skill | Purpose | Source | -|-------|---------|--------| -| TDD | Test-driven development discipline | [Superpowers](https://github.com/obra/superpowers) | -| Verification | Evidence before claims | [Superpowers](https://github.com/obra/superpowers) | -| Systematic Debugging | 4-phase root cause investigation | [Superpowers](https://github.com/obra/superpowers) | -| Anti-rationalization | Resist shortcuts and "LGTM" | [Superpowers](https://github.com/obra/superpowers) | -| Decision Categorization | LOCKED/DEFERRED/DISCRETION decisions | [GSD](https://github.com/gsd-build/get-shit-done) | -| Brownfield Analysis | 7-doc codebase profiling | [GSD](https://github.com/gsd-build/get-shit-done) | +```json +{ + "extraKnownMarketplaces": [ + "https://github.com/RelationalAI/shipwright" + ] +} +``` -## Design Docs +## Contributing -- [Design doc](docs/design/shipwright-design-v1.md) -- the full design -- [Comparison](docs/research/shipwright-vs-others-v1.md) -- how Shipwright compares to Superpowers, GSD, and Beads -- [Ideas from Beads/GSD](docs/research/shipwright-ideas-from-beads-gsd-v1.md) -- ideas reviewed, adopted, and deferred -- [M1 Milestone](docs/milestones/m1-tier1-bugfix.md) -- Tier 1 bug fix scope and plan +See [CONTRIBUTING.md](CONTRIBUTING.md). ## Attribution -Shipwright builds on the work of: - -- **[Superpowers](https://github.com/obra/superpowers)** by Jesse Vincent -- TDD, verification-before-completion, systematic debugging, and anti-rationalization skills. Licensed under Apache 2.0. -- **[GSD (Get Shit Done)](https://github.com/gsd-build/get-shit-done)** by gsd-build -- Decision categorization and brownfield codebase analysis patterns. - -## License - -[TBD] +- **[Superpowers](https://github.com/obra/superpowers)** by Jesse Vincent -- TDD, verification, systematic debugging, anti-rationalization skills. Apache 2.0. +- **[GSD](https://github.com/gsd-build/get-shit-done)** by gsd-build -- Decision categorization and brownfield analysis patterns. diff --git a/commands/debug.md b/commands/debug.md deleted file mode 100644 index 34cfc64..0000000 --- a/commands/debug.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -description: Systematic 4-phase debugging — root cause, pattern analysis, hypothesis testing, fix -argument-hint: [optional bug description] ---- - -You are running the Shipwright Debug command. This provides systematic debugging without the full orchestrated workflow. - -## Behavior -- This is a standalone command -- no orchestrator, no recovery, no .workflow/ files -- No Triage, Reviewer, or Validator agents are involved -- The user drives the process directly - -## Setup - -Load these skills from the Shipwright plugin root before starting: -1. `internal/skills/systematic-debugging.md` -- the core 4-phase debugging process -2. `internal/skills/tdd.md` -- needed for Phase 4 (creating a failing test before fixing) - -Read both files and internalize their rules. They are non-negotiable during this session. - -## Getting the Bug Description - -If `$ARGUMENTS` is provided, use it as the initial bug description and proceed directly to Phase 1. - -If no arguments are provided, ask the user to describe the bug, error, or unexpected behavior they are seeing. Do not proceed until you have a clear description of the problem. - -## The Four Phases - -Follow the systematic-debugging skill strictly. Complete each phase before moving to the next. - -### Phase 1: Root Cause Investigation -- Read error messages carefully and completely -- Reproduce the issue consistently -- Check recent changes (git diff, recent commits) -- Gather evidence at component boundaries if multi-component -- Trace data flow backward from the error to the source -- Do NOT propose any fix during this phase - -### Phase 2: Pattern Analysis -- Find working examples of similar code in the codebase -- Compare working vs broken code -- Identify every difference, however small -- Understand dependencies and assumptions - -### Phase 3: Hypothesis and Testing -- State a single, specific hypothesis: "I think X is the root cause because Y" -- Test with the smallest possible change -- One variable at a time -- If hypothesis is wrong, form a new one -- do not stack fixes -- If 3+ hypotheses fail, stop and discuss architecture with the user - -### Phase 4: Implementation -- Write a failing test that reproduces the bug (use the TDD skill) -- Watch the test fail for the expected reason -- Implement a single fix targeting the root cause -- Verify the test passes and no other tests break -- If the fix does not work, return to Phase 1 with new information - -## Rules -- Never skip phases. Never propose fixes before completing Phase 1. -- One fix at a time. No "while I'm here" changes. -- If you catch yourself guessing, stop and return to Phase 1. -- After 3 failed fix attempts, stop and question the architecture with the user. diff --git a/commands/promote.md b/commands/promote.md deleted file mode 100644 index 537a94e..0000000 --- a/commands/promote.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -description: Promote skills, agents, or commands from beta to stable -argument-hint: "[optional comment]" ---- - -# Promote to Stable - -You are running the Shipwright promote command. Your job is to help the user cherry-pick skills, agents, and commands from the beta plugin into the stable plugin. - -## Directory Layout - -- **Beta** (active development): `skills/`, `agents/`, `commands/`, `internal/skills/`, `internal/agents/` -- **Stable** (promoted): `stable/skills/`, `stable/agents/`, `stable/commands/`, `stable/internal/skills/`, `stable/internal/agents/` -- **Beta manifest**: `.claude-plugin/plugin.json` (name: `shipwright`) -- **Stable manifest**: `internal/plugin.stable.json` (name: `shipwright`) — not active until copied to `.claude-plugin/` during release - -## Workflow - -### Step 1: Show current state - -List what exists in beta vs stable for each category (skills, internal skills, agents, internal agents, commands). - -Use a table like: - -| Type | File | Beta | Stable | Changed? | -|------|------|------|--------|----------| - -- **Beta**: exists in `skills/`, `internal/skills/`, `agents/`, `internal/agents/`, or `commands/` -- **Stable**: exists in `stable/skills/`, `stable/internal/skills/`, `stable/agents/`, `stable/internal/agents/`, or `stable/commands/` -- **Changed?**: if the file exists in both, compare contents. Show "yes" if they differ, "no" if identical, "-" if only in one place. - -### Step 2: Ask what to promote - -Ask the user to pick which items to promote. They can: -- Name specific files (e.g., "tdd.md and systematic-debugging.md") -- Say "all skills" or "all agents" or "all commands" -- Say "everything" - -If `$ARGUMENTS` contains a comment, note it for the commit message later. - -### Step 3: Confirm - -Show exactly what will be copied: -``` -Will copy to stable: - skills/tdd.md -> stable/skills/tdd.md - skills/systematic-debugging.md -> stable/skills/systematic-debugging.md -``` - -Ask for confirmation before proceeding. - -### Step 4: Copy - -For each selected file, copy from beta to stable: -- `skills/` -> `stable/skills/` -- `internal/skills/` -> `stable/internal/skills/` -- `agents/` -> `stable/agents/` -- `internal/agents/` -> `stable/internal/agents/` -- `commands/` -> `stable/commands/` - -Create the `stable/` subdirectories if they don't exist. - -This is a straight file copy — the stable version is an exact snapshot of beta at promotion time. - -### Step 5: Summary - -Show what was promoted and remind the user: -- The changes are local — commit and push when ready -- To publish to the marketplace, update `plugins/shipwright/` in `RelationalAI/claude-plugins` - -## Rules - -1. **Never auto-promote** — always confirm with the user -2. **Never modify beta files** — promote is a one-way copy from beta to stable -3. **Overwrite is OK** — if a file already exists in stable, overwrite it (the user confirmed) -4. **No partial file promotion** — each file is promoted as a whole diff --git a/commands/report.md b/commands/report.md deleted file mode 100644 index 5b3e7cf..0000000 --- a/commands/report.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -description: File bugs, feedback, and suggestions on Shipwright -argument-hint: [optional description] ---- - -You are running the Shipwright Report command. Your job is to help the user file an issue on the Shipwright repository. - -## Rules -- This is a standalone command -- no orchestrator, no recovery, no .workflow/ files -- The target repo is always `RelationalAI/shipwright` -- Valid issue labels: `bug`, `feature`, `suggestion`, `feedback` -- Always confirm details with the user before creating the issue -- Use `gh issue create` to file the issue - -## Mode 1: No arguments provided - -If `$ARGUMENTS` is empty: - -1. Ask the user to pick an issue type: - - **bug** -- something is broken - - **feature** -- a new capability request - - **suggestion** -- an improvement to something that already works - - **feedback** -- general feedback about the project -2. Ask for a short title (one line) -3. Ask for a description (can be multiple lines, or "none" to skip) -4. Show the assembled issue for confirmation: - - Type (label) - - Title - - Body -5. On confirmation, create the issue (see "Creating the issue" below) - -## Mode 2: Freeform text provided - -If `$ARGUMENTS` contains text (e.g., `/shipwright:report clicking more details throws null pointer`): - -1. Read the freeform text and auto-detect the most likely issue type: - - Text mentioning errors, crashes, broken behavior, exceptions -> `bug` - - Text requesting new functionality or capabilities -> `feature` - - Text proposing improvements to existing behavior -> `suggestion` - - General comments or opinions -> `feedback` -2. Draft a title from the text (clean it up, capitalize properly, keep it concise) -3. Use the original text as the issue body, adding any structure that helps (e.g., "Steps to reproduce" for bugs) -4. Show the assembled issue and your detected type to the user for confirmation -5. Let the user correct the type, title, or body before proceeding -6. On confirmation, create the issue (see "Creating the issue" below) - -## Creating the issue - -Run the following command: - -``` -gh issue create --repo RelationalAI/shipwright --title "" --body "<body>" --label <type> -``` - -Where `<type>` is one of: `bug`, `feature`, `suggestion`, `feedback`. - -After creation, show the user the issue URL returned by `gh`. - -## Error handling - -- If `gh` is not authenticated or the command fails, show the error and suggest the user run `gh auth login` first. -- If the repo is not accessible, say so clearly -- do not retry silently. diff --git a/docs/milestones/m1-tier1-bugfix.md b/docs/milestones/m1-tier1-bugfix.md deleted file mode 100644 index b104f5b..0000000 --- a/docs/milestones/m1-tier1-bugfix.md +++ /dev/null @@ -1,237 +0,0 @@ -# Milestone 1 — Tier 1 Bug Fix Workflow - -**Date:** 2026-02-24 -**Status:** Design complete. Ready for implementation. -**Goal:** A working Tier 1 bug fix workflow, installable via plugin marketplace. - ---- - -## What M1 Delivers - -A developer installs Shipwright, runs `/shipwright`, describes a bug (or passes a Jira ticket), and gets a disciplined fix: root-cause investigation, TDD, verification, code review — with codebase context and crash recovery. Three standalone commands are also available outside the orchestrated workflow. - ---- - -## Skills (6) - -All skills are adapted from existing open-source projects with attribution. - -| Skill | Source | Source file | Adaptation | -|-------|--------|------------|------------| -| TDD | [Superpowers](https://github.com/obra/superpowers) by Jesse Vincent | `skills/test-driven-development/SKILL.md` | Rename, add attribution header | -| Verification-before-completion | [Superpowers](https://github.com/obra/superpowers) by Jesse Vincent | `skills/verification-before-completion/SKILL.md` | Rename, add attribution | -| Systematic debugging | [Superpowers](https://github.com/obra/superpowers) by Jesse Vincent | `skills/systematic-debugging/SKILL.md` | Rename, add attribution | -| Anti-rationalization (standalone) | [Superpowers](https://github.com/obra/superpowers) by Jesse Vincent | Extracted from TDD + debugging skills | New lightweight file for Reviewer/Validator; anti-rationalization remains embedded in TDD and debugging for Implementer | -| Decision categorization | [GSD](https://github.com/gsd-build/get-shit-done) by gsd-build | `commands/gsd/discuss-phase.md` | Adapt CONTEXT.md output to Shipwright's decision log (LOCKED/DEFERRED/DISCRETION) | -| Brownfield analysis | [GSD](https://github.com/gsd-build/get-shit-done) by gsd-build | `agents/gsd-codebase-mapper.md` + `commands/gsd/map-codebase.md` | Adapt to 7-doc structure in `docs/codebase-profile/`, add staleness check logic. See `docs/skills/brownfield/mvp-requirements.md` | - -### Anti-rationalization approach - -Anti-rationalization is handled two ways: -- **Embedded** in TDD and systematic debugging skills (as Superpowers does) — these include "Red Flags" tables and rationalization counters. The Implementer gets this automatically. -- **Standalone** lightweight version for Reviewer and Validator, who don't get TDD or debugging skills but still need to resist shortcuts like "looks fine" without verification. - -### Brownfield analysis artifacts - -Adapted from GSD's 7-doc structure. Produced in `docs/codebase-profile/`: - -| File | Content | -|------|---------| -| `STACK.md` | Languages, frameworks, dependencies, build tools | -| `INTEGRATIONS.md` | External APIs, databases, services, auth providers | -| `ARCHITECTURE.md` | Module structure, key abstractions, data flow | -| `STRUCTURE.md` | Directory layout, file locations, where to add new code | -| `CONVENTIONS.md` | Naming, patterns, file organization, code style | -| `TESTING.md` | Test framework, commands, file organization, mocking | -| `CONCERNS.md` | Known debt, fragile areas, security-sensitive zones | -| `.last-analyzed` | JSON tracking last full and fast-path commit SHAs | - -Staleness check runs on every `/shipwright` invocation. See `docs/skills/brownfield/mvp-requirements.md` for full requirements. - ---- - -## Agents (5) - -Each agent is a markdown prompt template loaded into a fresh, ephemeral subagent. The orchestrator spawns it, injects the prompt, the subagent does its work and dies. - -| Agent | Prompt file | Skills injected | Responsibilities | -|-------|------------|-----------------|------------------| -| **Triage** | `agents/triage.md` | Brownfield analysis, Decision categorization | Read codebase profiles (run analysis if stale), do deeper code-level analysis as needed, brainstorm with user, categorize decisions, confirm Tier 1 | -| **Implementer** | `agents/implementer.md` | TDD, Verification, Systematic debugging (anti-rationalization embedded) | Investigate root cause, write failing test, fix, verify | -| **Reviewer** | `agents/reviewer.md` | Anti-rationalization (standalone) | Review for spec compliance then code quality, approve/challenge (once)/escalate to human | -| **Validator** | `agents/validator.md` | Verification, Anti-rationalization (standalone) | Run full regression, confirm fix | -| **Doc Digest** | `agents/doc-digest.md` | — | Walk user through documents section by section | - -### Validator test discovery - -The Validator needs to know how to run tests. It uses a cascading lookup: - -1. **State file** — if Triage recorded the test command during analysis -2. **Brownfield `TESTING.md`** — documents test framework and commands -3. **CLAUDE.md** — many repos already specify test commands here -4. **Ask the user** — fallback if none of the above resolve it - ---- - -## Orchestrator - -Single file: `commands/shipwright.md`. Pure dispatcher — never does work itself. - -### Entry point parsing - -`/shipwright` accepts optional inline context: - -| Input | Example | Behavior | -|-------|---------|----------| -| No args | `/shipwright` | Triage asks the user what they're working on | -| Natural language | `/shipwright fix null pointer when user clicks more details` | Pass description to Triage as initial context | -| Jira ticket | `/shipwright fix bug RAI-9874` | Detect `[A-Z]+-\d+` pattern. Check if Atlassian MCP is available. If yes, fetch ticket details (title, description, acceptance criteria) and pass to Triage. If no, warn user that Atlassian MCP is not configured and ask them to paste the ticket details manually. | - -### Flow - -``` -/shipwright [optional context] - → Parse input (no args / natural language / Jira ticket) - → If Jira ticket: check Atlassian MCP availability → fetch or warn - → Read recovery files (state.json + CONTEXT.md) — resume if exists - → Spawn Triage (brownfield staleness check → brainstorm → confirm Tier 1) - → Spawn Implementer (root cause → failing test → fix → verify) - → Spawn Reviewer (spec compliance → code quality → approve/challenge/escalate) - → Spawn Validator (full regression → confirm fix) - → Done -``` - -### Orchestrator responsibilities - -- Never does work itself — only routes -- Reads recovery files before every subagent spawn -- Writes `state.json` + `CONTEXT.md` after every step -- Detects Jira ticket patterns and checks for Atlassian MCP before attempting fetch - ---- - -## Recovery (Layer 1 + 4) - -Two files in `.workflow/` (gitignored): - -### Layer 1 — State file - -`.workflow/state.json` (~500 tokens, updated every step): - -```json -{ - "session_id": "uuid", - "tier": 1, - "phase": "implement", - "step": "tdd-fix", - "status": "in_progress", - "active_agent": "implementer", - "feature_branch": "fix/RAI-9874", - "test_command": "npm test", - "input_context": "RAI-9874: null pointer on more details click", - "artifacts": ["docs/codebase-profile/"] -} -``` - -### Layer 4 — Rolling context - -`.workflow/CONTEXT.md` (capped at 200 lines, rewritten not appended): - -- What we're fixing and why -- Current phase and what just happened -- What's next -- Key decisions made so far -- Open blockers - -### Recovery behavior - -- Orchestrator reads both files before every subagent spawn -- If session exists and status is `in_progress`, resume from last step -- If no session exists, start fresh - ---- - -## Standalone Commands - -Three commands usable outside the orchestrated workflow. Stateless — no `.workflow/`, no recovery, no orchestrator. - -| Command | Agent | Skill | What it does | -|---------|-------|-------|-------------| -| `/shipwright:codebase-analyze` | Triage | Brownfield analysis | Full codebase analysis regardless of staleness. Writes 7 profile docs to `docs/codebase-profile/`. | -| `/shipwright:doc-digest` | Doc Digest | — | Walk through any document section by section for interactive review. | -| `/shipwright:debug` | Implementer | Systematic debugging | Standalone 4-phase debugging: root cause → pattern analysis → hypothesis testing → fix. No Triage/Reviewer/Validator. | -| `/shipwright:report` | — | — | File bugs, enhancements, suggestions, and feedback as GitHub issues on `RelationalAI/shipwright`. | - -### `/shipwright:report` behavior - -| Input | Example | Behavior | -|-------|---------|----------| -| No args | `/shipwright:report` | Ask user to pick type (bug, feature, suggestion, feedback), then collect title and description | -| Free-form | `/shipwright:report clicking more details throws a null pointer` | Decipher the type from the text (this is a bug), confirm with the user, then collect any missing details | - -Creates a GitHub issue on `RelationalAI/shipwright` using `gh issue create` with the appropriate label (bug, feature, suggestion, feedback). - ---- - -## Plugin Structure - -``` -plugins/shipwright/ - .claude-plugin/plugin.json - skills/ - tdd.md - verification-before-completion.md - systematic-debugging.md - anti-rationalization.md - decision-categorization.md - brownfield-analysis.md - agents/ - triage.md - implementer.md - reviewer.md - validator.md - doc-digest.md - commands/ - shipwright.md # main orchestrated workflow - shipwright-codebase-analyze.md # standalone brownfield analysis - shipwright-doc-digest.md # standalone doc walkthrough - shipwright-debug.md # standalone systematic debugging - shipwright-report.md # file issues on Shipwright repo -``` - -**Install (beta):** -```bash -/plugin marketplace add RelationalAI/claude-plugins -/plugin install shipwright-beta@rai-claude-plugins -# restart session -/shipwright -``` - ---- - -## Not in M1 - -| What | Why deferred | -|------|-------------| -| Tiers 2 and 3 | M1 is Tier 1 only | -| Planner, Doc Writer, Security Assessor, Cost Analyzer, Researcher, Requirements agents | Tier 2/3 agents | -| Recovery Layers 2 and 3 (decision log compaction, checkpoints) | Tier 1 sessions are short — basic recovery is sufficient | -| Wave-based parallel execution | No planning phase in Tier 1 | -| Goal-backward verification | Deferred to M2 | -| Cost reporting | Tier 1 sessions are short and cheap — add when Tier 2 brings longer sessions | -| Standalone security-review, security-threat-model, code-review, pr-review | Tier 2/3 assessment commands | - ---- - -## Success Criteria - -- A developer can install Shipwright from the RAI plugin marketplace (beta) -- `/shipwright` starts a Tier 1 workflow for a bug fix -- `/shipwright fix bug RAI-XXXX` fetches Jira ticket details (if Atlassian MCP available) -- Triage reads codebase profiles (runs analysis if stale) and confirms tier -- Implementer uses TDD and systematic debugging to fix the bug -- Reviewer reviews the fix -- Validator discovers the test command and runs full regression -- If context is lost mid-session, the orchestrator recovers from state.json + CONTEXT.md -- `/shipwright:codebase-analyze`, `/shipwright:doc-digest`, `/shipwright:debug`, and `/shipwright:report` work standalone -- `/shipwright:report` creates a GitHub issue on `RelationalAI/shipwright` with the correct label diff --git a/docs/plans/2026-02-27-marketplace-conversion-design.md b/docs/plans/2026-02-27-marketplace-conversion-design.md new file mode 100644 index 0000000..945a400 --- /dev/null +++ b/docs/plans/2026-02-27-marketplace-conversion-design.md @@ -0,0 +1,498 @@ +# Shipwright Marketplace — Conversion Design + +**Date:** 2026-02-27 +**Author:** Owais Mohamed +**Status:** Draft + +--- + +## 1. What We're Building + +Convert the existing Shipwright repo (`RelationalAI/shipwright`) into **shipwright-marketplace** — RAI's curated Claude Code plugin marketplace. This sits alongside the existing community marketplace (`RelationalAI/claude-plugins`), serving as the official, quality-gated tier. + +### Relationship to claude-plugins + +| Marketplace | Purpose | Governance | +|-------------|---------|------------| +| `claude-plugins` | Community/personal plugins from anyone at RAI | Open | +| `shipwright-marketplace` | Curated, supported plugins | CODEOWNERS-gated | + +Both coexist. Neither replaces the other. + + +--- + +## 2. Marketplace Architecture + +### Approach: Hybrid Monorepo + +All plugins live in a `plugins/` directory within this repo. The marketplace also supports external GitHub-sourced plugins for future additions. + +``` +shipwright-marketplace/ +├── .claude-plugin/ +│ └── marketplace.json +├── README.md +├── CONTRIBUTING.md +├── THIRD_PARTY_NOTICES +├── templates/ +│ ├── SKILL_TEMPLATE.md +│ └── AGENT_TEMPLATE.md +└── plugins/ + ├── dockyard/ + └── shipwright/ +``` + +### Why hybrid? + +- Local plugins (`./plugins/X`) are simple to manage in a monorepo +- External plugins (`github: owner/repo`) can be added later for third-party contributions +- External GitHub sources support `ref` and `sha` fields for version pinning — a post-launch mandatory requirement (see Section 11.1). The hybrid approach makes this migration straightforward. +- This is the same pattern used by `claude-plugins` today + + +--- + +## 3. Plugins at Launch + +Two plugins ship at launch. Shipwright depends on Dockyard. + +``` +┌────────────────────────────┐ +│ shipwright-marketplace │ +│ │ +│ ┌────────────────┐ │ +│ │ dockyard │ │ ← standalone skills + commands +│ └────────────────┘ │ +│ ▲ │ +│ │ depends │ +│ ┌────────────────┐ │ +│ │ shipwright │ │ ← orchestration + internal agents +│ └────────────────┘ │ +│ │ +└────────────────────────────┘ +``` + +### Dockyard — Standalone Skills & Commands + +Everything that works independently, without orchestration. + +**Skills:** +- `brownfield-analysis` — 7-doc codebase profiling +- `code-review` — Structured 3-pass code review with confidence scoring +- `review-and-submit` — Local dev flow from "done coding" to "draft PR ready" +- `observability` — Query logs, spans, metrics for incident investigation + +**Commands:** +- `/dockyard:codebase-analyze` — Generate codebase profile docs +- `/dockyard:code-review` — Standalone structured 3-pass code review +- `/dockyard:doc-digest` — Interactive section-by-section document review +- `/dockyard:investigate` — Observability-driven live service investigation +- `/dockyard:review-and-submit` — Code review, auto-fix, and draft PR creation + +**Agents:** +- `doc-digest` — Document walkthrough agent + +**Commands (shared pattern):** +- `/dockyard:feedback` — File bugs/feedback on the dockyard plugin + +### Shipwright — Orchestrated Workflows + +Everything that requires the multi-agent orchestration pipeline. + +**Commands:** +- `/shipwright:shipwright` — Main orchestrator (Triage → Implement → Review → Validate) +- `/shipwright:feedback` — File bugs/feedback on the shipwright plugin + +**Internal agents** (not user-facing): +- `triage` — Understand bug and codebase +- `implementer` — Root cause, TDD, fix, verify +- `reviewer` — 2-pass code review +- `validator` — Regression and fix verification + +**Internal skills** (used by agents, not directly by users): +- `tdd` +- `systematic-debugging` +- `verification-before-completion` +- `anti-rationalization` +- `decision-categorization` + + +--- + +## 4. Directory Structure + +### Full tree + +``` +shipwright-marketplace/ +├── .claude-plugin/ +│ └── marketplace.json +├── README.md +├── CONTRIBUTING.md +├── CODEOWNERS +├── THIRD_PARTY_NOTICES +├── docs/ +│ └── plans/ +│ └── 2026-02-27-marketplace-conversion-design.md +├── templates/ +│ ├── SKILL_TEMPLATE.md +│ └── AGENT_TEMPLATE.md +│ +├── plugins/ +│ ├── dockyard/ +│ │ ├── .claude-plugin/ +│ │ │ └── plugin.json +│ │ ├── skills/ +│ │ │ ├── brownfield-analysis/ +│ │ │ │ └── SKILL.md +│ │ │ ├── code-review/ +│ │ │ │ └── SKILL.md +│ │ │ ├── review-and-submit/ +│ │ │ │ └── SKILL.md +│ │ │ └── observability/ +│ │ │ ├── SKILL.md +│ │ │ └── RESEARCH.md +│ │ ├── commands/ +│ │ │ ├── codebase-analyze.md +│ │ │ ├── code-review.md +│ │ │ ├── doc-digest.md +│ │ │ ├── investigate.md +│ │ │ ├── review-and-submit.md +│ │ │ └── feedback.md +│ │ ├── agents/ +│ │ │ └── doc-digest.md +│ │ ├── docs/ +│ │ │ └── skills/ +│ │ │ └── brownfield/ +│ │ │ └── mvp-requirements.md +│ │ └── tests/ +│ │ ├── fixtures/ +│ │ │ └── sample-app/ +│ │ └── smoke/ +│ │ +│ └── shipwright/ +│ ├── .claude-plugin/ +│ │ └── plugin.json +│ ├── hooks/ +│ │ ├── hooks.json +│ │ └── check-dockyard.sh +│ ├── commands/ +│ │ ├── shipwright.md +│ │ └── feedback.md +│ ├── internal/ +│ │ ├── agents/ +│ │ │ ├── triage.md +│ │ │ ├── implementer.md +│ │ │ ├── reviewer.md +│ │ │ └── validator.md +│ │ └── skills/ +│ │ ├── tdd/SKILL.md +│ │ ├── systematic-debugging/SKILL.md +│ │ ├── verification-before-completion/SKILL.md +│ │ ├── anti-rationalization/SKILL.md +│ │ └── decision-categorization/SKILL.md +│ ├── docs/ +│ │ ├── design/ +│ │ │ └── shipwright-design-v1.md +│ │ ├── milestones/ +│ │ │ └── m1-verification-report.md +│ │ └── research/ +│ │ ├── shipwright-vs-others-v1.md +│ │ └── shipwright-ideas-from-beads-gsd-v1.md +│ └── tests/ +``` + + +--- + +## 5. Marketplace Registry + +### marketplace.json + +```json +{ + "$schema": "https://anthropic.com/claude-code/marketplace.schema.json", + "name": "shipwright-marketplace", + "description": "RAI's curated Claude Code plugin marketplace", + "owner": { + "name": "Owais Mohamed", + "email": "owais.mohamed@relational.ai" + }, + "plugins": [ + { + "name": "dockyard", + "description": "Standalone skills and commands for engineering workflows — brownfield analysis, observability, debugging, codebase profiling", + "source": "./plugins/dockyard" + }, + { + "name": "shipwright", + "description": "Orchestrated agentic development framework — TDD-enforced bug fix workflows with triage, implementation, review, and validation agents", + "source": "./plugins/shipwright" + } + ] +} +``` + +### Adding external plugins later + +Future third-party plugins use GitHub sources: + +```json +{ + "name": "raicode", + "description": "Julia development tools for the RAI engine", + "source": { + "source": "github", + "repo": "RelationalAI/raicode-plugin", + "ref": "main" + } +} +``` + + +--- + +## 6. Plugin Manifests + +### Dockyard — `plugins/dockyard/.claude-plugin/plugin.json` + +```json +{ + "name": "dockyard", + "description": "Standalone skills and commands for engineering workflows — brownfield analysis, observability, debugging, codebase profiling", + "version": "0.1.0", + "author": { + "name": "Owais Mohamed", + "email": "owais.mohamed@relational.ai" + }, + "homepage": "https://github.com/RelationalAI/shipwright", + "repository": "https://github.com/RelationalAI/shipwright", + "license": "TBD", + "keywords": ["skills", "debugging", "observability", "brownfield-analysis", "codebase-profiling"] +} +``` + +### Shipwright — `plugins/shipwright/.claude-plugin/plugin.json` + +```json +{ + "name": "shipwright", + "description": "Orchestrated agentic development framework — TDD-enforced bug fix workflows with triage, implementation, review, and validation agents", + "version": "0.1.0", + "author": { + "name": "Owais Mohamed", + "email": "owais.mohamed@relational.ai" + }, + "homepage": "https://github.com/RelationalAI/shipwright", + "repository": "https://github.com/RelationalAI/shipwright", + "license": "TBD", + "keywords": ["workflow", "tdd", "code-review", "orchestration", "agents"] +} +``` + + +--- + +## 7. Dependency Enforcement + +Shipwright requires Dockyard. This is enforced at session startup via a hook. + +### How it works + +Shipwright ships a `SessionStart` hook that reads `~/.claude/plugins/installed_plugins.json` and checks for Dockyard. If missing, the hook exits with code 2, which hard-blocks the session. + +### hooks/hooks.json + +```json +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup|resume|clear|compact", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/check-dockyard.sh" + } + ] + } + ] + } +} +``` + +### hooks/check-dockyard.sh + +```bash +#!/usr/bin/env bash +REGISTRY="$HOME/.claude/plugins/installed_plugins.json" + +if [ ! -f "$REGISTRY" ]; then + echo "ERROR: Shipwright requires the 'dockyard' plugin." + echo "Install it with: /plugin install dockyard@shipwright-marketplace" + exit 2 +fi + +if ! grep -q '"dockyard@' "$REGISTRY" 2>/dev/null; then + echo "ERROR: Shipwright requires the 'dockyard' plugin." + echo "Install it with: /plugin install dockyard@shipwright-marketplace" + exit 2 +fi +``` + +### User experience + +**Missing Dockyard:** +``` +> claude +⚠ Plugin error (shipwright): Missing required dependency. + The 'dockyard' plugin is required by shipwright but is not installed. + Install it with: /plugin install dockyard@shipwright-marketplace + Then restart your session. +``` + +**Dockyard already installed:** +``` +> claude +✓ All plugins loaded. ← hook passes silently +``` + + +--- + +## 8. Installation + +### For users + +```bash +# Step 1: Add the marketplace +/plugin marketplace add https://github.com/RelationalAI/shipwright + +# Step 2: Install Dockyard (standalone skills — works on its own) +/plugin install dockyard@shipwright-marketplace + +# Step 3 (optional): Install Shipwright (orchestration — requires Dockyard) +/plugin install shipwright@shipwright-marketplace + +# Step 4: Restart Claude session +``` + +### For teams (auto-configure via repo settings) + +Add to the project's `.claude/settings.json`: + +```json +{ + "extraKnownMarketplaces": ["https://github.com/RelationalAI/shipwright"], + "enabledPlugins": [ + "dockyard@shipwright-marketplace", + "shipwright@shipwright-marketplace" + ] +} +``` + + +--- + +## 9. Governance + +### Plugin curation + +New plugins are added to `marketplace.json` only with CODEOWNERS approval. This controls what enters the curated marketplace. + +### Skill and agent contributions + +Anyone at RAI can submit skills or agents to existing plugins via PR. + +**Quality gates (all three required):** + +1. **Template compliance** — Must follow `templates/SKILL_TEMPLATE.md` or `templates/AGENT_TEMPLATE.md` +2. **Smoke test** — Must include a basic test or demo showing the skill works +3. **PR review** — Approved by CODEOWNERS + +### Where to contribute + +| Contribution type | Target plugin | +|-------------------|---------------| +| Standalone skill (no orchestration needed) | `dockyard` | +| Standalone command (no orchestration needed) | `dockyard` | +| Orchestration-related agent or skill | `shipwright` | +| New plugin | Requires CODEOWNERS approval in `marketplace.json` | + +Each plugin ships its own feedback command (`/dockyard:feedback` and `/shipwright:feedback`) for users to file bugs and suggestions against that specific plugin. + +### Open gap: Smoke test definition + +The smoke test quality gate is referenced but not yet defined. What constitutes an acceptable smoke test (manual demo script, automated test, recording, etc.) needs to be specified in `templates/` and `CONTRIBUTING.md` in a later development cycle. + + +--- + +## 10. Naming Alternatives Considered + +The chosen naming is **marketplace = `shipwright-marketplace`, plugin = `shipwright`**. Other options were evaluated: + +| Option | Install command | Pros | Cons | +|--------|----------------|------|------| +| **`shipwright-marketplace` + `shipwright`** | `/plugin install shipwright@shipwright-marketplace` | Clear branding. Plugin keeps its name. Marketplace is obviously a marketplace. | Slightly verbose install command. | +| `shipwright` + `shipwright-core` | `/plugin install shipwright-core@shipwright` | Short marketplace name. | Plugin loses its name. `core` is generic. | +| `shipwright` + `shipwright` | `/plugin install shipwright@shipwright` | Shortest possible. | Awkward duplication. Confusing to users. | + + +--- + +## 11. Post-Launch Requirements + +These are mandatory follow-ups, not optional improvements. + +### 11.1 Separate repos for version pinning + +**Ticket:** [RAI-47775](https://relationalai.atlassian.net/browse/RAI-47775) + +**Problem:** Local plugins (`"source": "./plugins/X"`) always resolve to HEAD. There is no way to pin a local plugin to a specific version or tag. + +**Solution:** Extract plugins into standalone repos. Reference them in marketplace.json as GitHub sources with `ref` and `sha` fields for pinnable versioning. + +```json +{ + "name": "shipwright", + "source": { + "source": "github", + "repo": "RelationalAI/shipwright-plugin", + "ref": "v1.0.0", + "sha": "abc123..." + } +} +``` + +### 11.2 Author/maintainer → team ownership + +**Ticket:** [RAI-47776](https://relationalai.atlassian.net/browse/RAI-47776) + +**Problem:** Plugin manifests currently list an individual author. + +**Solution:** Move ownership to a group or team identifier. Specifics depend on how Claude Code evolves its author schema, or can be handled via CODEOWNERS at the repo level. + +### 11.3 Contribution templates and smoke test definition + +**Ticket:** [RAI-47777](https://relationalai.atlassian.net/browse/RAI-47777) + +**Problem:** The quality gates require template compliance and smoke tests, but neither is defined yet. Contributors won't know what format to follow or what constitutes an acceptable smoke test (manual demo script, automated test, recording, etc.). + +**Solution:** Create `templates/SKILL_TEMPLATE.md` and `templates/AGENT_TEMPLATE.md` with required sections, naming conventions, and examples. Define smoke test expectations in `CONTRIBUTING.md` with concrete examples of passing vs failing submissions. + + +--- + +## 12. Technical Constraints + +Discovered during research, recorded for reference. + +| Constraint | Impact | Mitigation | +|------------|--------|------------| +| No native plugin dependency field in Claude Code | Can't declaratively say "shipwright needs dockyard" | SessionStart hook with exit code 2 | +| Local plugins can't be version-pinned | Users always get HEAD | Post-launch: move to separate repos with GitHub source refs | +| `${CLAUDE_PLUGIN_ROOT}` doesn't work in markdown files | Can't use plugin-relative paths in skill/command content | Only use in hooks.json and MCP config files | +| Marketplace schema URL returns 404 | Schema reference is decorative | Known upstream bug (anthropics/claude-code#9686) | +| Plugin cache keyed on version string | Must bump version in plugin.json for users to receive updates | Document in CONTRIBUTING.md | diff --git a/docs/plans/2026-02-27-marketplace-conversion-plan.md b/docs/plans/2026-02-27-marketplace-conversion-plan.md new file mode 100644 index 0000000..bddb025 --- /dev/null +++ b/docs/plans/2026-02-27-marketplace-conversion-plan.md @@ -0,0 +1,622 @@ +# Marketplace Conversion Implementation Plan + +> **Note:** This plan was executed during the marketplace conversion. It references external skills that may no longer be available in this repo. + +**Goal:** Restructure the shipwright repo from a single plugin into a marketplace containing two plugins (dockyard + shipwright). + +**Architecture:** Hybrid monorepo marketplace. All content moves under `plugins/dockyard/` or `plugins/shipwright/`. Root level holds marketplace infrastructure (marketplace.json, README, CONTRIBUTING, CODEOWNERS, templates). Existing `.claude-plugin/plugin.json` is replaced with `marketplace.json`. + +**Tech Stack:** Claude Code plugin system, bash (hooks), JSON (manifests), Markdown (skills/commands/agents) + +**Design doc:** `docs/plans/2026-02-27-marketplace-conversion-design.md` + +--- + +## File Move Map + +This is the complete mapping from current location to target location. Reference this for every task. + +### Root level (marketplace infrastructure) + +| Current | Target | Action | +|---------|--------|--------| +| `.claude-plugin/plugin.json` | `.claude-plugin/marketplace.json` | Replace with marketplace.json | +| `README.md` | `README.md` | Rewrite for marketplace | +| `THIRD_PARTY_NOTICES` | `THIRD_PARTY_NOTICES` | Stays | +| `.gitignore` | `.gitignore` | Stays | +| N/A | `CONTRIBUTING.md` | Create new | +| N/A | `CODEOWNERS` | Create new | +| N/A | `templates/SKILL_TEMPLATE.md` | Create placeholder | +| N/A | `templates/AGENT_TEMPLATE.md` | Create placeholder | +| `docs/plans/*` | `docs/plans/*` | Stays at marketplace root | + +### Dockyard plugin (`plugins/dockyard/`) + +| Current | Target | +|---------|--------| +| `skills/brownfield-analysis/SKILL.md` | `plugins/dockyard/skills/brownfield-analysis/SKILL.md` | +| `skills/observability/SKILL.md` | `plugins/dockyard/skills/observability/SKILL.md` | +| `skills/observability/RESEARCH.md` | `plugins/dockyard/skills/observability/RESEARCH.md` | +| `commands/debug.md` | `plugins/dockyard/commands/debug.md` | +| `commands/codebase-analyze.md` | `plugins/dockyard/commands/codebase-analyze.md` | +| `commands/doc-digest.md` | `plugins/dockyard/commands/doc-digest.md` | +| `commands/investigate.md` | `plugins/dockyard/commands/investigate.md` | +| `agents/doc-digest.md` | `plugins/dockyard/agents/doc-digest.md` | +| `docs/skills/brownfield/mvp-requirements.md` | `plugins/dockyard/docs/skills/brownfield/mvp-requirements.md` | +| `tests/fixtures/sample-app/*` | `plugins/dockyard/tests/fixtures/sample-app/*` | +| `tests/smoke/*` | `plugins/dockyard/tests/smoke/*` | +| N/A | `plugins/dockyard/.claude-plugin/plugin.json` (create) | +| N/A | `plugins/dockyard/commands/feedback.md` (create) | + +### Shipwright plugin (`plugins/shipwright/`) + +| Current | Target | +|---------|--------| +| `commands/shipwright.md` | `plugins/shipwright/commands/shipwright.md` | +| `internal/agents/triage.md` | `plugins/shipwright/internal/agents/triage.md` | +| `internal/agents/implementer.md` | `plugins/shipwright/internal/agents/implementer.md` | +| `internal/agents/reviewer.md` | `plugins/shipwright/internal/agents/reviewer.md` | +| `internal/agents/validator.md` | `plugins/shipwright/internal/agents/validator.md` | +| `internal/skills/tdd/SKILL.md` | `plugins/shipwright/internal/skills/tdd/SKILL.md` | +| `internal/skills/systematic-debugging/SKILL.md` | `plugins/shipwright/internal/skills/systematic-debugging/SKILL.md` | +| `internal/skills/verification-before-completion/SKILL.md` | `plugins/shipwright/internal/skills/verification-before-completion/SKILL.md` | +| `internal/skills/anti-rationalization/SKILL.md` | `plugins/shipwright/internal/skills/anti-rationalization/SKILL.md` | +| `internal/skills/decision-categorization/SKILL.md` | `plugins/shipwright/internal/skills/decision-categorization/SKILL.md` | +| `docs/design/shipwright-design-v1.md` | `plugins/shipwright/docs/design/shipwright-design-v1.md` | +| `docs/milestones/m1-tier1-bugfix.md` | `plugins/shipwright/docs/milestones/m1-tier1-bugfix.md` | +| `docs/milestones/m1-verification-report.md` | `plugins/shipwright/docs/milestones/m1-verification-report.md` | +| `docs/research/shipwright-vs-others-v1.md` | `plugins/shipwright/docs/research/shipwright-vs-others-v1.md` | +| `docs/research/shipwright-ideas-from-beads-gsd-v1.md` | `plugins/shipwright/docs/research/shipwright-ideas-from-beads-gsd-v1.md` | +| N/A | `plugins/shipwright/.claude-plugin/plugin.json` (create) | +| N/A | `plugins/shipwright/hooks/hooks.json` (create) | +| N/A | `plugins/shipwright/hooks/check-dockyard.sh` (create) | +| N/A | `plugins/shipwright/commands/feedback.md` (create) | + +### Files to delete after move + +| File | Reason | +|------|--------| +| `commands/report.md` | Replaced by `/feedback` in each plugin | +| `commands/promote.md` | Removed from design | +| `internal/plugin.stable.json` | Replaced by marketplace.json | +| `.claude/commands/doc-digest.md` | Local override, no longer needed | +| `.claude/commands/investigate.md` | Local override, no longer needed | + +--- + +## Tasks + +### Task 1: Create marketplace directory scaffold + +**Files:** +- Create: `plugins/dockyard/.claude-plugin/` (directory) +- Create: `plugins/dockyard/skills/` (directory) +- Create: `plugins/dockyard/commands/` (directory) +- Create: `plugins/dockyard/agents/` (directory) +- Create: `plugins/dockyard/docs/` (directory) +- Create: `plugins/dockyard/tests/` (directory) +- Create: `plugins/shipwright/.claude-plugin/` (directory) +- Create: `plugins/shipwright/hooks/` (directory) +- Create: `plugins/shipwright/commands/` (directory) +- Create: `plugins/shipwright/internal/agents/` (directory) +- Create: `plugins/shipwright/internal/skills/` (directory) +- Create: `plugins/shipwright/docs/` (directory) +- Create: `plugins/shipwright/tests/` (directory) +- Create: `templates/` (directory) + +**Step 1: Create all directories** + +```bash +mkdir -p plugins/dockyard/{.claude-plugin,skills,commands,agents,docs,tests} +mkdir -p plugins/shipwright/{.claude-plugin,hooks,commands,docs,tests} +mkdir -p plugins/shipwright/internal/{agents,skills} +mkdir -p templates +``` + +**Step 2: Commit** + +```bash +# Add .gitkeep files so empty dirs are tracked +touch plugins/dockyard/tests/.gitkeep plugins/shipwright/tests/.gitkeep +git add plugins/ templates/ +git commit -m "scaffold: Create marketplace directory structure" +``` + +--- + +### Task 2: Create marketplace.json (replace plugin.json) + +**Files:** +- Delete: `.claude-plugin/plugin.json` +- Create: `.claude-plugin/marketplace.json` + +**Step 1: Create marketplace.json** + +Write `.claude-plugin/marketplace.json` with the exact content from design doc Section 5. + +**Step 2: Delete old plugin.json** + +```bash +git rm .claude-plugin/plugin.json +``` + +**Step 3: Commit** + +```bash +git add .claude-plugin/marketplace.json +git commit -m "feat: Replace plugin.json with marketplace.json registry" +``` + +--- + +### Task 3: Create Dockyard plugin manifest + +**Files:** +- Create: `plugins/dockyard/.claude-plugin/plugin.json` + +**Step 1: Write plugin.json** + +Write `plugins/dockyard/.claude-plugin/plugin.json` with exact content from design doc Section 6 (Dockyard manifest). + +**Step 2: Commit** + +```bash +git add plugins/dockyard/.claude-plugin/plugin.json +git commit -m "feat: Add dockyard plugin manifest" +``` + +--- + +### Task 4: Create Shipwright plugin manifest + +**Files:** +- Create: `plugins/shipwright/.claude-plugin/plugin.json` + +**Step 1: Write plugin.json** + +Write `plugins/shipwright/.claude-plugin/plugin.json` with exact content from design doc Section 6 (Shipwright manifest). + +**Step 2: Commit** + +```bash +git add plugins/shipwright/.claude-plugin/plugin.json +git commit -m "feat: Add shipwright plugin manifest" +``` + +--- + +### Task 5: Move Dockyard skills + +**Files:** +- Move: `skills/brownfield-analysis/` -> `plugins/dockyard/skills/brownfield-analysis/` +- Move: `skills/observability/` -> `plugins/dockyard/skills/observability/` + +**Step 1: Move files using git mv** + +```bash +git mv skills/brownfield-analysis plugins/dockyard/skills/brownfield-analysis +git mv skills/observability plugins/dockyard/skills/observability +``` + +**Step 2: Verify files exist at new location** + +```bash +ls plugins/dockyard/skills/brownfield-analysis/SKILL.md +ls plugins/dockyard/skills/observability/SKILL.md +ls plugins/dockyard/skills/observability/RESEARCH.md +``` + +**Step 3: Commit** + +```bash +git commit -m "refactor: Move skills to dockyard plugin" +``` + +--- + +### Task 6: Move Dockyard commands + +**Files:** +- Move: `commands/debug.md` -> `plugins/dockyard/commands/debug.md` +- Move: `commands/codebase-analyze.md` -> `plugins/dockyard/commands/codebase-analyze.md` +- Move: `commands/doc-digest.md` -> `plugins/dockyard/commands/doc-digest.md` +- Move: `commands/investigate.md` -> `plugins/dockyard/commands/investigate.md` + +**Step 1: Move files using git mv** + +```bash +git mv commands/debug.md plugins/dockyard/commands/debug.md +git mv commands/codebase-analyze.md plugins/dockyard/commands/codebase-analyze.md +git mv commands/doc-digest.md plugins/dockyard/commands/doc-digest.md +git mv commands/investigate.md plugins/dockyard/commands/investigate.md +``` + +**Step 2: Commit** + +```bash +git commit -m "refactor: Move standalone commands to dockyard plugin" +``` + +--- + +### Task 7: Move Dockyard agent + +**Files:** +- Move: `agents/doc-digest.md` -> `plugins/dockyard/agents/doc-digest.md` + +**Step 1: Move file** + +```bash +git mv agents/doc-digest.md plugins/dockyard/agents/doc-digest.md +``` + +**Step 2: Commit** + +```bash +git commit -m "refactor: Move doc-digest agent to dockyard plugin" +``` + +--- + +### Task 8: Move Dockyard docs and tests + +**Files:** +- Move: `docs/skills/brownfield/` -> `plugins/dockyard/docs/skills/brownfield/` +- Move: `tests/fixtures/` -> `plugins/dockyard/tests/fixtures/` +- Move: `tests/smoke/` -> `plugins/dockyard/tests/smoke/` + +**Step 1: Move files** + +```bash +git mv docs/skills plugins/dockyard/docs/skills +git mv tests/fixtures plugins/dockyard/tests/fixtures +git mv tests/smoke plugins/dockyard/tests/smoke +``` + +**Step 2: Remove empty parent dirs and leftover files** + +```bash +# tests/smoke/run-all.sh references may need path updates -- check in Task 14 +rm -f plugins/dockyard/tests/.gitkeep +``` + +**Step 3: Commit** + +```bash +git add -A +git commit -m "refactor: Move docs and tests to dockyard plugin" +``` + +--- + +### Task 9: Move Shipwright command + +**Files:** +- Move: `commands/shipwright.md` -> `plugins/shipwright/commands/shipwright.md` + +**Step 1: Move file** + +```bash +git mv commands/shipwright.md plugins/shipwright/commands/shipwright.md +``` + +**Step 2: Commit** + +```bash +git commit -m "refactor: Move orchestrator command to shipwright plugin" +``` + +--- + +### Task 10: Move Shipwright internal agents and skills + +**Files:** +- Move: `internal/agents/` -> `plugins/shipwright/internal/agents/` +- Move: `internal/skills/` -> `plugins/shipwright/internal/skills/` + +**Step 1: Move files** + +```bash +git mv internal/agents/triage.md plugins/shipwright/internal/agents/triage.md +git mv internal/agents/implementer.md plugins/shipwright/internal/agents/implementer.md +git mv internal/agents/reviewer.md plugins/shipwright/internal/agents/reviewer.md +git mv internal/agents/validator.md plugins/shipwright/internal/agents/validator.md +git mv internal/skills/tdd plugins/shipwright/internal/skills/tdd +git mv internal/skills/systematic-debugging plugins/shipwright/internal/skills/systematic-debugging +git mv internal/skills/verification-before-completion plugins/shipwright/internal/skills/verification-before-completion +git mv internal/skills/anti-rationalization plugins/shipwright/internal/skills/anti-rationalization +git mv internal/skills/decision-categorization plugins/shipwright/internal/skills/decision-categorization +``` + +**Step 2: Commit** + +```bash +git add -A +git commit -m "refactor: Move internal agents and skills to shipwright plugin" +``` + +--- + +### Task 11: Move Shipwright docs + +**Files:** +- Move: `docs/design/` -> `plugins/shipwright/docs/design/` +- Move: `docs/milestones/` -> `plugins/shipwright/docs/milestones/` +- Move: `docs/research/` -> `plugins/shipwright/docs/research/` + +**Step 1: Move files** + +```bash +git mv docs/design plugins/shipwright/docs/design +git mv docs/milestones plugins/shipwright/docs/milestones +git mv docs/research plugins/shipwright/docs/research +``` + +**Step 2: Commit** + +```bash +git commit -m "refactor: Move design docs to shipwright plugin" +``` + +--- + +### Task 12: Delete obsolete files + +**Files:** +- Delete: `commands/report.md` (replaced by /feedback) +- Delete: `commands/promote.md` (removed from design) +- Delete: `internal/plugin.stable.json` (replaced by marketplace.json) +- Delete: `.claude/commands/doc-digest.md` (local override, no longer needed) +- Delete: `.claude/commands/investigate.md` (local override, no longer needed) + +**Step 1: Remove files** + +```bash +git rm commands/report.md +git rm commands/promote.md +git rm internal/plugin.stable.json +git rm .claude/commands/doc-digest.md +git rm .claude/commands/investigate.md +``` + +**Step 2: Clean up any empty directories** + +```bash +# Remove empty dirs left behind +rmdir commands/ 2>/dev/null || true +rmdir internal/ 2>/dev/null || true +rmdir agents/ 2>/dev/null || true +rmdir skills/ 2>/dev/null || true +rmdir tests/ 2>/dev/null || true +rmdir docs/skills/ 2>/dev/null || true +``` + +**Step 3: Commit** + +```bash +git add -A +git commit -m "cleanup: Remove obsolete files (report, promote, stable manifest, local overrides)" +``` + +--- + +### Task 13: Create dependency enforcement hook + +**Files:** +- Create: `plugins/shipwright/hooks/hooks.json` +- Create: `plugins/shipwright/hooks/check-dockyard.sh` + +**Step 1: Write hooks.json** + +Write `plugins/shipwright/hooks/hooks.json` with exact content from design doc Section 7. + +**Step 2: Write check-dockyard.sh** + +Write `plugins/shipwright/hooks/check-dockyard.sh` with exact content from design doc Section 7. + +**Step 3: Make script executable** + +```bash +chmod +x plugins/shipwright/hooks/check-dockyard.sh +``` + +**Step 4: Commit** + +```bash +git add plugins/shipwright/hooks/ +git commit -m "feat: Add SessionStart hook to enforce dockyard dependency" +``` + +--- + +### Task 14: Create feedback commands + +**Files:** +- Create: `plugins/dockyard/commands/feedback.md` +- Create: `plugins/shipwright/commands/feedback.md` + +**Step 1: Write Dockyard feedback command** + +Write `plugins/dockyard/commands/feedback.md` -- a command that files bugs/feedback against the dockyard plugin on the `RelationalAI/shipwright` repo. Base it on the existing `report.md` pattern but target dockyard-specific labels. + +**Step 2: Write Shipwright feedback command** + +Write `plugins/shipwright/commands/feedback.md` -- same pattern but for the shipwright plugin with shipwright-specific labels. + +**Step 3: Commit** + +```bash +git add plugins/dockyard/commands/feedback.md plugins/shipwright/commands/feedback.md +git commit -m "feat: Add /feedback command to both plugins" +``` + +--- + +### Task 15: Create marketplace root files + +**Files:** +- Create: `CODEOWNERS` +- Create: `CONTRIBUTING.md` +- Create: `templates/SKILL_TEMPLATE.md` (placeholder) +- Create: `templates/AGENT_TEMPLATE.md` (placeholder) +- Rewrite: `README.md` + +**Step 1: Write CODEOWNERS** + +``` +# Marketplace registry -- plugin additions require CODEOWNERS approval +.claude-plugin/marketplace.json @RelationalAI/eng-ai-agents + +# Plugin-level ownership +plugins/dockyard/ @RelationalAI/eng-ai-agents +plugins/shipwright/ @RelationalAI/eng-ai-agents +``` + +Note: Verify the correct GitHub team name. Use `@omohamed-rai` as fallback if team does not exist. + +**Step 2: Write CONTRIBUTING.md** + +Cover: +- How to add skills/agents to existing plugins via PR +- Which plugin to contribute to (standalone -> dockyard, orchestration -> shipwright) +- Quality gates (template compliance, smoke test, PR review) +- How to request a new plugin (CODEOWNERS approval required) +- Version bump requirement for cache invalidation + +**Step 3: Write template placeholders** + +Create `templates/SKILL_TEMPLATE.md` and `templates/AGENT_TEMPLATE.md` with basic structure placeholders and a note that full templates are tracked in RAI-47777. + +**Step 4: Rewrite README.md** + +Cover: +- What shipwright-marketplace is +- Available plugins (dockyard, shipwright) with descriptions +- Installation instructions (individual + team settings.json) +- Link to CONTRIBUTING.md + +**Step 5: Commit** + +```bash +git add CODEOWNERS CONTRIBUTING.md templates/ README.md +git commit -m "feat: Add marketplace root files (CODEOWNERS, CONTRIBUTING, README, templates)" +``` + +--- + +### Task 16: Update smoke tests for new structure + +**Files:** +- Modify: `plugins/dockyard/tests/smoke/validate-structure.sh` +- Modify: `plugins/dockyard/tests/smoke/validate-commands.sh` +- Modify: `plugins/dockyard/tests/smoke/validate-skills.sh` +- Modify: `plugins/dockyard/tests/smoke/validate-agents.sh` +- Modify: `plugins/dockyard/tests/smoke/run-all.sh` + +**Step 1: Read each smoke test to understand current paths** + +Read all 5 smoke test files. + +**Step 2: Update all path references** + +Update file path assertions in each test to reflect the new `plugins/dockyard/` and `plugins/shipwright/` structure. The smoke tests should validate: +- marketplace.json exists at `.claude-plugin/marketplace.json` +- Both plugin.json files exist +- All skills, commands, agents are at their new paths +- hooks/ directory exists for shipwright +- CODEOWNERS and CONTRIBUTING.md exist + +**Step 3: Run smoke tests** + +```bash +cd /Users/omohamed/code/shipwright +bash plugins/dockyard/tests/smoke/run-all.sh +``` + +Expected: All checks pass. + +**Step 4: Commit** + +```bash +git add plugins/dockyard/tests/smoke/ +git commit -m "test: Update smoke tests for marketplace directory structure" +``` + +--- + +### Task 17: Final verification + +**Step 1: Verify directory structure matches design** + +```bash +find plugins/ -type f | sort +``` + +Compare output against the design doc Section 4 tree. + +**Step 2: Verify no orphaned files at root** + +```bash +ls -la commands/ agents/ skills/ internal/ 2>/dev/null +``` + +Expected: None of these directories should exist. + +**Step 3: Verify marketplace.json is valid JSON** + +```bash +jq . .claude-plugin/marketplace.json +``` + +Expected: Valid JSON output with two plugins listed. + +**Step 4: Verify both plugin.json files are valid JSON** + +```bash +jq . plugins/dockyard/.claude-plugin/plugin.json +jq . plugins/shipwright/.claude-plugin/plugin.json +``` + +Expected: Valid JSON for both. + +**Step 5: Verify hook script is executable** + +```bash +test -x plugins/shipwright/hooks/check-dockyard.sh && echo "OK" || echo "FAIL" +``` + +Expected: OK + +**Step 6: Run smoke tests one final time** + +```bash +bash plugins/dockyard/tests/smoke/run-all.sh +``` + +Expected: All checks pass. + +**Step 7: Commit any fixes if needed, then done** + +--- + +## Task Summary + +| Task | Description | Type | +|------|-------------|------| +| 1 | Create directory scaffold | Setup | +| 2 | Create marketplace.json | Create | +| 3 | Create Dockyard plugin manifest | Create | +| 4 | Create Shipwright plugin manifest | Create | +| 5 | Move Dockyard skills | Move | +| 6 | Move Dockyard commands | Move | +| 7 | Move Dockyard agent | Move | +| 8 | Move Dockyard docs and tests | Move | +| 9 | Move Shipwright command | Move | +| 10 | Move Shipwright internal agents and skills | Move | +| 11 | Move Shipwright docs | Move | +| 12 | Delete obsolete files | Cleanup | +| 13 | Create dependency enforcement hook | Create | +| 14 | Create feedback commands | Create | +| 15 | Create marketplace root files | Create | +| 16 | Update smoke tests | Test | +| 17 | Final verification | Verify | diff --git a/internal/plugin.stable.json b/internal/plugin.stable.json deleted file mode 100644 index 4192fee..0000000 --- a/internal/plugin.stable.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "name": "shipwright", - "description": "Adaptive agentic development framework for RAI engineering teams", - "version": "0.1.0", - "author": { - "name": "Owais Mohamed", - "email": "owais.mohamed@relational.ai" - }, - "homepage": "https://github.com/RelationalAI/shipwright", - "repository": "https://github.com/RelationalAI/shipwright", - "license": "TBD", - "keywords": ["workflow", "tdd", "debugging", "code-review", "brownfield-analysis"] -} diff --git a/package.json b/package.json deleted file mode 100644 index b6b9a02..0000000 --- a/package.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "shipwright", - "version": "0.1.0", - "description": "Adaptive agentic development framework for engineering teams", - "private": true, - "repository": { - "type": "git", - "url": "https://github.com/RelationalAI/shipwright.git" - } -} diff --git a/plugins/dockyard/.claude-plugin/plugin.json b/plugins/dockyard/.claude-plugin/plugin.json new file mode 100644 index 0000000..f61fb43 --- /dev/null +++ b/plugins/dockyard/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "dockyard", + "description": "Standalone skills and commands for engineering workflows — brownfield analysis, observability, debugging, codebase profiling", + "version": "0.1.0", + "author": { + "name": "Owais Mohamed", + "email": "owais.mohamed@relational.ai" + }, + "homepage": "https://github.com/RelationalAI/shipwright", + "repository": "https://github.com/RelationalAI/shipwright", + "license": "TBD", + "keywords": ["skills", "debugging", "observability", "brownfield-analysis", "codebase-profiling"] +} diff --git a/agents/doc-digest.md b/plugins/dockyard/agents/doc-digest.md similarity index 57% rename from agents/doc-digest.md rename to plugins/dockyard/agents/doc-digest.md index 1545537..b0fd971 100644 --- a/agents/doc-digest.md +++ b/plugins/dockyard/agents/doc-digest.md @@ -1,24 +1,13 @@ # Doc Digest Agent -You are the Doc Digest agent for Shipwright. Your job is to walk a user through a document one section at a time for interactive review. You help teams review design docs, specs, and plans by presenting content incrementally, collecting feedback, and tracking the status of each section. +You are the Doc Digest agent for Dockyard. Your job is to walk a user through a document one section at a time for interactive review. You help teams review design docs, specs, and plans by presenting content incrementally, collecting feedback, and tracking the status of each section. You are self-contained and do not require any external skills or injections. -## Invocation Modes - -This agent can be invoked in two ways: - -### Standalone (via slash command) -When invoked directly by a user, the document path is provided as the command argument. Read the document at the path the user provides. - -### Orchestrator-spawned -When spawned by the Shipwright orchestrator, the document path is provided in the context below under `DOCUMENT_PATH`. Read the document at that path. When the review is complete, return a structured summary to the orchestrator (see "Returning Results to Orchestrator" below). - ## Setup 1. Determine the document path: - - If `DOCUMENT_PATH` is set in your context, use that. - - Otherwise, use the path provided by the user as the command argument. + - Use the path provided by the user as the command argument. - If no path is available, ask the user for the document path before proceeding. 2. Read the document at that path. 3. Parse it into sections by splitting on `##` headings. If the document has no `##` headings, split on `#` headings instead. If it has no headings at all, treat the whole document as one section. @@ -49,7 +38,7 @@ For each section: - If the user asks to jump to a specific section by number or name, go there. - If the user asks to see the current status tracker at any point, show it. -## Finishing Up +## Output After the last section, show a summary: @@ -59,27 +48,3 @@ After the last section, show a summary: - Which sections were punted for later (list section numbers and titles with any notes) Ask if the user wants to revisit any section or if the review is complete. - -## Returning Results to Orchestrator - -When the review is complete and this agent was spawned by the orchestrator, return a structured result in the following format so the orchestrator can record the outcome: - -``` -DOC_DIGEST_RESULT: - document: <path to the document> - total_sections: <number> - approved: <number> - has_feedback: <number> - punted: <number> - sections: - - number: <n> - title: "<heading text>" - status: approved | has-feedback | punted - notes: "<brief note if feedback or punted, empty otherwise>" - ... - review_complete: true | false -``` - -If the user ends the review early (before all sections are covered), set `review_complete: false` and mark unreviewed sections as `pending` in the sections list. - -When running standalone (not orchestrator-spawned), skip the structured result block and simply end with the human-readable summary described in "Finishing Up." diff --git a/plugins/dockyard/commands/code-review.md b/plugins/dockyard/commands/code-review.md new file mode 100644 index 0000000..2ebceea --- /dev/null +++ b/plugins/dockyard/commands/code-review.md @@ -0,0 +1,37 @@ +--- +description: Run a structured 3-pass code review on committed changes +argument-hint: "[optional: base branch, default main]" +--- + +# Code Review + +You are running the Dockyard Code Review command. This is a standalone review — no fix loop, no PR creation. Use `/dockyard:review-and-submit` if you want the full flow. + +## Setup + +Read the code-review skill from `skills/code-review/SKILL.md` in the Dockyard plugin directory. That skill defines the 3-pass review process, confidence scoring, and output format. Follow all of its rules. + +## Execution + +### Determine the base branch and diff + +```bash +BASE_BRANCH="${ARGUMENTS:-main}" +git diff "$BASE_BRANCH"...HEAD +``` + +If the diff is empty, stop: "No committed changes found relative to $BASE_BRANCH." + +### Gather context + +- Read `CLAUDE.md` from the project root (if it exists) +- Scan `docs/plans/` for recently modified files related to this work +- Collect commit messages: `git log "$BASE_BRANCH"..HEAD --format="%s%n%b"` + +### Run the review + +Invoke the code-review skill with the diff and gathered context. Present findings to the developer in the format defined by the skill. + +### After the review + +Present the findings and overall recommendation (APPROVE / NEEDS_CHANGES). Do not auto-fix anything — this is review only. If the developer wants fixes and a PR, suggest `/dockyard:review-and-submit`. diff --git a/commands/codebase-analyze.md b/plugins/dockyard/commands/codebase-analyze.md similarity index 91% rename from commands/codebase-analyze.md rename to plugins/dockyard/commands/codebase-analyze.md index 2dbe8aa..8be6ddd 100644 --- a/commands/codebase-analyze.md +++ b/plugins/dockyard/commands/codebase-analyze.md @@ -9,7 +9,7 @@ You are running as a **standalone command**. There is no orchestrator, no recove ## Setup -Read the brownfield analysis skill from `skills/brownfield-analysis.md` in the Shipwright plugin directory. That skill defines the 7 output documents, their templates, the exploration strategy, and the forbidden-files list. Follow all of its rules. +Read the brownfield analysis skill from `skills/brownfield-analysis/SKILL.md` in the Dockyard plugin directory. That skill defines the 7 output documents, their templates, the exploration strategy, and the forbidden-files list. Follow all of its rules. ## Execution diff --git a/commands/doc-digest.md b/plugins/dockyard/commands/doc-digest.md similarity index 78% rename from commands/doc-digest.md rename to plugins/dockyard/commands/doc-digest.md index 6aa68f4..679cc7f 100644 --- a/commands/doc-digest.md +++ b/plugins/dockyard/commands/doc-digest.md @@ -3,11 +3,11 @@ description: Walk through any document section by section for interactive review argument-hint: <doc-path> --- -You are running the Shipwright Doc Digest command. Spawn the Doc Digest agent to walk the user through the document at `$ARGUMENTS`. +You are running the Dockyard Doc Digest command. Spawn the Doc Digest agent to walk the user through the document at `$ARGUMENTS`. ## Behavior - This is a standalone command -- no orchestrator, no recovery, no .workflow/ files -- Load the Doc Digest agent prompt from `agents/doc-digest.md` (relative to the Shipwright plugin root) +- Load the Doc Digest agent prompt from `agents/doc-digest.md` (relative to the Dockyard plugin root) - Pass `$ARGUMENTS` as the document path - The agent handles everything from there: reading the document, splitting into sections, presenting them one at a time, collecting feedback, and producing a final summary diff --git a/plugins/dockyard/commands/feedback.md b/plugins/dockyard/commands/feedback.md new file mode 100644 index 0000000..31c18dd --- /dev/null +++ b/plugins/dockyard/commands/feedback.md @@ -0,0 +1,68 @@ +--- +description: File a bug report, feature request, or feedback for the Dockyard plugin +argument-hint: "[optional: describe your bug, feature request, or feedback]" +--- + +# Dockyard Feedback + +You help the user file issues against the Dockyard plugin on the `RelationalAI/shipwright` repository. + +## Detect Input + +Parse `$ARGUMENTS`: + +| Input | Action | +|-------|--------| +| Empty | Ask the user what type of issue (bug / feature / suggestion / general feedback) and gather a description | +| Freeform text | Auto-detect issue type from keywords and draft a title (see below) | + +### Auto-Detection Rules + +- **bug** -- text mentions errors, crashes, broken, failing, wrong, unexpected +- **feature** -- text mentions "add", "support", "would be nice", "wish", "enable" +- **suggestion** -- text mentions "improve", "better", "consider", "could" +- **feedback** -- anything that does not match the above + +## Draft the Issue + +1. Generate a concise title (under 80 characters). +2. Write a body using this template: + +``` +## Description +<user's description, cleaned up> + +## Type +<bug | feature | suggestion | feedback> + +## Plugin +dockyard + +## Steps to Reproduce (bugs only) +<if applicable> + +## Expected vs Actual (bugs only) +<if applicable> +``` + +3. Show the draft to the user and ask for confirmation or edits. + +## Create the Issue + +Once the user confirms, run: + +```bash +gh issue create \ + --repo RelationalAI/shipwright \ + --title "<title>" \ + --body "<body>" \ + --label "plugin:dockyard" +``` + +Print the resulting issue URL so the user can track it. + +## Rules + +- Always add the `plugin:dockyard` label. +- Never create an issue without user confirmation. +- If `gh` CLI is not authenticated, tell the user to run `gh auth login` and stop. diff --git a/plugins/dockyard/commands/investigate.md b/plugins/dockyard/commands/investigate.md new file mode 100644 index 0000000..20a01a5 --- /dev/null +++ b/plugins/dockyard/commands/investigate.md @@ -0,0 +1,80 @@ +--- +description: Investigate live service issues using observability data (logs, spans, metrics) +argument-hint: [incident key, transaction ID, error description, or service name] +--- + +You are running the Dockyard Investigate command. This provides observability-driven investigation of live service issues. + +## Behavior +- This is a standalone command -- no orchestrator, no recovery, no .workflow/ files +- The user drives the process directly +- Focus is on querying, correlating, and interpreting observability data -- not code fixes + +## Setup + +Load this skill from the Dockyard plugin root before starting: +1. `skills/observability/SKILL.md` -- domain context, tools, workflow, and runbooks + +Read the file and internalize its rules, domain context, and runbooks. They are non-negotiable during this session. + +## Getting the Investigation Context + +If `$ARGUMENTS` is provided, determine the input type and follow the matching path: + +### Path A: JIRA Incident Key (e.g., NCDNTS-12795, PROJ-123) + +If the argument matches a JIRA issue key pattern (`[A-Z]+-\d+`): + +1. **Read the incident** — Use `mcp__claude_ai_Atlassian__getJiraIssue` with `expand: "renderedFields"` to get the full issue including comments. Use `relationalai.atlassian.net` as the cloudId. +2. **Read remote links** — Use `mcp__claude_ai_Atlassian__getJiraIssueRemoteIssueLinks` to get linked issues and external URLs. +3. **Extract investigation anchors** from the description, comments, and links: + - Transaction IDs (`rai_transaction_id`) + - Engine names (`rai_engine_name`) + - Customer accounts (`org_alias`, `account_alias`) + - Observe dashboard/query URLs + - Confluence page links (read these with `mcp__claude_ai_Atlassian__getConfluencePage` if they look relevant) + - Error messages, stack traces, symptoms +4. **Summarize the incident** to the user: what is reported, who reported it, current status, and what anchors you found. +5. **Proceed to Runbook Selection** using the extracted anchors and symptoms. + +### Path B: Direct Input (transaction ID, symptom, service name) + +Use directly as the starting point for Runbook Selection. + +### No Arguments + +Ask the user what they want to investigate. Useful starting points: +- A JIRA incident key (e.g., `NCDNTS-12795`) +- A `rai_transaction_id` +- An engine name +- A customer account (`org_alias` / `account_alias`) +- A symptom ("data isn't loading", "transaction failed", "engine crashed") + +## Runbook Selection + +Based on the input (or extracted from JIRA), select the matching runbook from the skill: + +| Input | Runbook | +|-------|---------| +| Transaction ID or "what happened with transaction X?" | **Runbook 1: Transaction Investigation** | +| Engine crash, abort, OOM, hang | **Runbook 2: Engine Failure Investigation** | +| Stale data, CDC issues, slow sync | **Runbook 3: Data Pipeline Investigation** | +| Issue spanning SQL layer and ERP, or Snowflake query ID | **Runbook 4: Cross-Service Correlation** | + +If unclear, start with **Runbook 1** — it routes to the others based on findings. + +## Execution + +Follow the selected runbook step by step: +1. Run the prescribed queries (in parallel where indicated) +2. Interpret results using the runbook's guidance +3. Route to another runbook if the findings point there +4. Present a summary with timeline, root cause (or current hypothesis), and next steps + +## Rules +- Always present the Observe link returned by query tools +- Convert durations from nanoseconds to human-readable units +- Extract key data points from large results -- do not dump raw output +- Limit to 5 queries before pausing to analyze, unless the user asks for more +- If a query returns no data, use retry strategies from the skill before giving up +- When investigating from a JIRA incident, tie findings back to the reported symptoms diff --git a/plugins/dockyard/commands/review-and-submit.md b/plugins/dockyard/commands/review-and-submit.md new file mode 100644 index 0000000..6b70a15 --- /dev/null +++ b/plugins/dockyard/commands/review-and-submit.md @@ -0,0 +1,20 @@ +--- +description: Review code, auto-fix findings, generate PR description, and create a draft PR +argument-hint: "[optional: base branch, default main]" +--- + +# Review and Submit + +You are running the Dockyard Review and Submit command. This is the local developer flow from "done coding" to "draft PR ready." + +## Setup + +Read the review-and-submit skill from `skills/review-and-submit/SKILL.md` in the Dockyard plugin directory. That skill defines the full 5-step flow: gather context, run code review, fix loop, generate PR description, and create draft PR. Follow all of its rules. + +## Execution + +Follow the skill exactly. There are no overrides or shortcuts. + +### Base branch + +If `$ARGUMENTS` is provided, treat it as the base branch name (e.g., "develop", "release/v2"). Otherwise default to "main". diff --git a/docs/plans/2026-02-26-code-review-design.md b/plugins/dockyard/docs/plans/2026-02-26-code-review-design.md similarity index 93% rename from docs/plans/2026-02-26-code-review-design.md rename to plugins/dockyard/docs/plans/2026-02-26-code-review-design.md index a98aeb8..a48078c 100644 --- a/docs/plans/2026-02-26-code-review-design.md +++ b/plugins/dockyard/docs/plans/2026-02-26-code-review-design.md @@ -1,5 +1,7 @@ # Code Review System Design +> **Note:** This is a pre-marketplace-split design document (2026-02-26). Paths, command names, and plugin references reflect the original single-plugin structure. The code-review skill now lives in `plugins/dockyard/skills/code-review/SKILL.md` and the submit flow is `/dockyard:review-and-submit`. + ## Problem Two compounding problems: @@ -13,7 +15,7 @@ These compound: bad PRs waste reviewer time, and there are more PRs than ever. W A two-layer AI-assisted review system: -- **Local submit flow** (new Shipwright code) — `/shipwright:submit` bundles review + fix + PR description generation + draft PR creation. Raises the floor on what gets submitted. +- **Local submit flow** (new Shipwright code) — `/dockyard:review-and-submit` bundles review + fix + PR description generation + draft PR creation. Raises the floor on what gets submitted. - **CI review** (upgrade to [`dev-review-agent`](https://github.com/RealEstateAU/dev-review-agent)) — Runs automatically when a PR is marked ready for review (and on subsequent pushes). Posts inline comments + summary with an APPROVE/NEEDS_CHANGES recommendation. Makes the human reviewer dramatically faster. Both layers consume a shared code-review skill (`skills/code-review/SKILL.md`) that lives in Shipwright as the single source of truth for review logic. The local flow reads it natively as a Claude Code skill. The CI flow pulls it in as a versioned dependency (see [Shared Skill Dependency](#shared-skill-dependency)). @@ -27,7 +29,7 @@ The core review logic, consumed by both the local and CI flows. Lives in Shipwri **How each layer consumes it:** - **Local (submit flow):** Claude Code reads the skill natively — it's a standard Shipwright skill file. -- **CI (dev-review-agent):** The agent depends on Shipwright via a git-based npm dependency pinned to a specific version tag. A build-time script reads the skill markdown from `node_modules/shipwright/skills/code-review/SKILL.md` and embeds it as a string constant in the bundle. The `InstructionsBuilder` injects this content into the system prompt alongside CI-specific framing (tool instructions, comment formatting). See [Shared Skill Dependency](#shared-skill-dependency) for details. +- **CI (dev-review-agent):** The agent depends on Shipwright via a git-based npm dependency pinned to a specific version tag. A build-time script reads the skill markdown from `plugins/dockyard/skills/code-review/SKILL.md` and embeds it as a string constant in the bundle. The `InstructionsBuilder` injects this content into the system prompt alongside CI-specific framing (tool instructions, comment formatting). See [Shared Skill Dependency](#shared-skill-dependency) for details. **Review focus areas:** @@ -153,7 +155,7 @@ The existing `dev-review-agent` is a TypeScript/LangChain GitHub Action that alr { "dependencies": { "shipwright": "github:RelationalAI/shipwright#v1.2.3" } } ``` -A build-time script reads `node_modules/shipwright/skills/code-review/SKILL.md` and generates a TypeScript constant: +A build-time script reads `plugins/dockyard/skills/code-review/SKILL.md` and generates a TypeScript constant: ```typescript // src/generated/review-skill.ts (generated, not hand-edited) @@ -259,7 +261,7 @@ skills/code-review/SKILL.md │ (read natively) │ (git-based npm dep, pinned version) ▼ ▼ Local (developer): CI (automated): -/shipwright:submit dev-review-agent (GitHub Action) +/dockyard:review-and-submit dev-review-agent (GitHub Action) │ │ ├── code review (Opus) ├── code review (Sonnet) │ skill content + local framing │ skill content + CI framing @@ -274,7 +276,7 @@ Local (developer): CI (automated): ## PR Lifecycle ``` -Developer runs /shipwright:submit +Developer runs /dockyard:review-and-submit → Code reviewed locally (Opus), blockers fixed → PR description generated with rationale → Draft PR created @@ -334,4 +336,4 @@ New files in the Shipwright repo. - **Notifications** — Results live on the PR only. - **Monorepo support** — No special handling for multi-package repos. - **Non-code file review** — Docs, config, IaC changes are out of scope. -- **Reviewer-invoked local command** (`/shipwright:pr-review`) — Cut from v1. Reviewers use the CI bot's findings on the PR. +- **Reviewer-invoked local command** (`/dockyard:review-and-submit`) — Cut from v1. Reviewers use the CI bot's findings on the PR. diff --git a/docs/plans/2026-02-26-code-review-implementation.md b/plugins/dockyard/docs/plans/2026-02-26-code-review-implementation.md similarity index 99% rename from docs/plans/2026-02-26-code-review-implementation.md rename to plugins/dockyard/docs/plans/2026-02-26-code-review-implementation.md index c2b6cd7..e3ca157 100644 --- a/docs/plans/2026-02-26-code-review-implementation.md +++ b/plugins/dockyard/docs/plans/2026-02-26-code-review-implementation.md @@ -1,6 +1,6 @@ # Code Review System Implementation Plan -> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. +> **Note:** This is a pre-marketplace-split implementation plan (2026-02-26). Paths and references reflect the original single-plugin structure. The code-review skill now lives in `plugins/dockyard/skills/code-review/SKILL.md`. **Goal:** Implement the two-layer AI-assisted code review system (local submit + CI automation) defined in `docs/plans/2026-02-26-code-review-design.md`. @@ -269,7 +269,7 @@ git commit -m "feat: add code-review skill — three-pass review with confidence ### Task 3: Add root package.json for npm installability -The dev-review-agent (Phase 2) will depend on Shipwright as a git-based npm dependency. For `npm install github:RelationalAI/shipwright#<tag>` to work and for the skill file to appear at `node_modules/shipwright/skills/code-review/SKILL.md`, Shipwright needs a root `package.json`. +The dev-review-agent (Phase 2) will depend on Shipwright as a git-based npm dependency. For `npm install github:RelationalAI/shipwright#<tag>` to work and for the skill file to appear at `plugins/dockyard/skills/code-review/SKILL.md`, Shipwright needs a root `package.json`. **Files:** - Create: `package.json` @@ -367,7 +367,7 @@ In `package.json`, add to `dependencies`: Run: `npm install` -Verify: `ls node_modules/shipwright/skills/code-review/SKILL.md` — should exist. +Verify: `ls plugins/dockyard/skills/code-review/SKILL.md` — should exist. **Step 4: Create the generate-skill script** @@ -379,7 +379,7 @@ import * as path from "node:path"; const SKILL_PATH = path.resolve( __dirname, - "../node_modules/shipwright/skills/code-review/SKILL.md", + "../plugins/dockyard/skills/code-review/SKILL.md", ); const OUTPUT_PATH = path.resolve( __dirname, @@ -1877,7 +1877,7 @@ Fixes applied. Updated review: [updated findings, if any...] Options: -1. Fix more manually and re-run /shipwright:submit +1. Fix more manually and re-run /dockyard:review-and-submit 2. Proceed to PR creation (remaining findings will be noted in the PR description) ``` diff --git a/docs/skills/brownfield/mvp-requirements.md b/plugins/dockyard/docs/skills/brownfield/mvp-requirements.md similarity index 97% rename from docs/skills/brownfield/mvp-requirements.md rename to plugins/dockyard/docs/skills/brownfield/mvp-requirements.md index 8e72715..d77b725 100644 --- a/docs/skills/brownfield/mvp-requirements.md +++ b/plugins/dockyard/docs/skills/brownfield/mvp-requirements.md @@ -45,7 +45,7 @@ Each file targets one aspect of the codebase. CLAUDE.md references the directory - Triggered by: fast-path threshold exceeded (10+ commits since last full), or manual re-run ### 4. Manual re-run -- `/shipwright:codebase-analyze` runs a full analysis regardless of staleness +- `/dockyard:codebase-analyze` runs a full analysis regardless of staleness - Uses the existing standalone assessment command from the design doc ### Tracking file diff --git a/skills/brownfield-analysis/SKILL.md b/plugins/dockyard/skills/brownfield-analysis/SKILL.md similarity index 98% rename from skills/brownfield-analysis/SKILL.md rename to plugins/dockyard/skills/brownfield-analysis/SKILL.md index d8b114e..e967d6f 100644 --- a/skills/brownfield-analysis/SKILL.md +++ b/plugins/dockyard/skills/brownfield-analysis/SKILL.md @@ -2,7 +2,7 @@ ## Purpose -Analyze an existing codebase and produce focused profile documents covering technology stack, architecture, conventions, and concerns. These profiles give every Shipwright agent baseline context about the repository they are working in. +Analyze an existing codebase and produce focused profile documents covering technology stack, architecture, conventions, and concerns. These profiles give every agent baseline context about the repository they are working in. All profile documents live in `docs/codebase-profile/` and are committed to git. They are human-readable reference material useful for both agents and human onboarding. @@ -69,7 +69,7 @@ Fast-path is cheap but can drift over many incremental runs. The 10-commit thres ### 3. Full Analysis -Triggered when 10 or more commits have accumulated since the last full analysis, when `.last-analyzed` is missing, or on manual re-run via `/shipwright:codebase-analyze`. +Triggered when 10 or more commits have accumulated since the last full analysis, when `.last-analyzed` is missing, or on manual re-run via `/dockyard:codebase-analyze`. - Analyze the entire repository across all 7 documents - Rewrite all profile files completely @@ -77,7 +77,7 @@ Triggered when 10 or more commits have accumulated since the last full analysis, ### Manual Re-Run -`/shipwright:codebase-analyze` forces a full analysis regardless of staleness state. +`/dockyard:codebase-analyze` forces a full analysis regardless of staleness state. --- diff --git a/skills/code-review/SKILL.md b/plugins/dockyard/skills/code-review/SKILL.md similarity index 100% rename from skills/code-review/SKILL.md rename to plugins/dockyard/skills/code-review/SKILL.md diff --git a/plugins/dockyard/skills/observability/RESEARCH.md b/plugins/dockyard/skills/observability/RESEARCH.md new file mode 100644 index 0000000..78a2aee --- /dev/null +++ b/plugins/dockyard/skills/observability/RESEARCH.md @@ -0,0 +1,488 @@ +# Observability Skill — Research Notes + +This document captures all research gathered from Confluence documentation, Observe knowledge graph exploration, and live querying during the design of the observability skill. It serves as the source of truth for playbook design. + +--- + +## 1. Transaction Terminal States + +Transaction status is NOT determined by `maxlevel`. The ERP sets terminal states explicitly: + +- **COMPLETED** — transaction succeeded +- **ABORTED** — transaction failed, with a specific abort reason + +`maxlevel` only indicates the highest severity log event within the transaction (info, warning, error). A transaction can have `maxlevel = error` but still COMPLETE, or `maxlevel = info` and be ABORTED (e.g., engine failed before logs were emitted). + +### Abort Reasons + +- `engine failed` — ERP didn't receive heartbeat for 20 minutes +- `engine is in DELETING state` — user deleted engine during transaction +- `engine is in SUSPENDING state` — engine was suspended during transaction +- `engine is in UPGRADING state` — engine was being upgraded +- `internal server error` — gracefully handled error (e.g., stack overflow) + +--- + +## 2. Transaction Types + +There are three distinct transaction contexts: + +### User-Initiated Transactions +- Triggered via `api.exec()` or `api.exec_into()` SQL procedures +- Run on user-created named engines +- Flow: SQL procedure → SPCS Control Plane → rai-server engine → results + +### CDC Transactions +- Background data sync triggered by `process_batches` task (runs every 1 minute) +- Always run on `CDC_MANAGED_ENGINE` (a single app-managed engine) +- Flow: Source SF Table → SF Stream → Data Stream Task (1min) → CSV to Stage → process_batches → CDC_MANAGED_ENGINE → RAI Database +- One transaction per target DB at a time; next queued item starts immediately after completion +- Generated Rel transactions avoid stdlib dependencies + +### Graph Index Transactions +- Automated resource management triggered by `use_index` (called internally by PyRel) +- Flow: use_index (sql-lib) → prepareIndex (ERP) → transaction execution +- Important: NO end-to-end trace propagation between sql-lib and ERP due to Snowflake constraints +- Must manually correlate via `pyrel_program_id`, `sf.query_id`/`request_id`, or hashed DB name + +--- + +## 3. Six Engine Failure Patterns + +From the Engine Oncall Runbook (Confluence page 806748161). + +### Pattern A: Engine Crash (Segfault, Abort, Stack Overflow) + +**Signals:** +- "Engine last termination reason" card: constant value of 1 with label `Failed` or `Done` +- "Engine container restarts" card: shows restart via `spcs.container.restarts.total` +- Log messages containing `"segmentation fault"` at error level +- Core dumps generated on segfaults + +**Escalation:** +- Julia runtime segfaults → Julia team +- Storage/network stack segfaults → Storage team +- Stack overflows in metadata layer → Backend team + +### Pattern B: Out of Memory (OOM) + +**Signals:** +- "Engine last termination reason" card: constant value of 1 with label `FailedWithOOM` +- "Engine container restarts" card: shows restart +- Jemalloc profile dumps near OOM time: + - `"[Jemalloc] absolute profile"` — total allocations per function + - `"[Jemalloc] relative profile"` — delta between successive dumps +- If no Jemalloc dump: use CPU profile + +**Escalation:** Determined by Jemalloc/CPU profile analysis + +### Pattern C: Brownout (Interactive Thread Blocking) + +**Definition:** Engine's interactive threads blocked or executing non-yielding task. Engine can't accept new transactions, progress existing ones, or emit heartbeats. + +**Signals:** +- "Server heartbeats per second" card: bars drop below 1 for extended period +- "ERP: Transaction heartbeats received" card: missing bars (when brownout > 30s) +- "Julia GC time" card: high values → long GC cycles +- "Julia compilation time" card: high values → compilation on interactive threads + +**Special case — PyRel XS engine brownouts:** +- Continuous brownout of 20+ minutes on XS engines named `pyrel_test_*` in spcs-int +- Engine stops emitting metrics entirely +- OTEL collector metrics show: increased blocked threads, increased CPU wait time, increased pending disk operations, increased close_wait connections +- When engine recovers, logs `"heartbeat was lost for XXXX seconds"` +- Tracked in NCDNTS-4522, RAI-28970 + +**Escalation:** +- Julia GC/compilation → Julia team +- External function calls (Solver/HiGHS) → respective library team +- All other → ERP team + +### Pattern D: Long Transaction Heartbeat Requests to the ERP + +**Signals:** +- "Transaction logs overview" card: thin purple vertical lines stop +- Trace span `bot_keepalive_write_to_kvs`: abnormally long duration (can take up to 1 hour, tracked in RAI-29423) +- When a periodic task hasn't finished within its scheduled period, no subsequent task runs — one slow heartbeat blocks all future heartbeats + +**Additional checks:** +- Non-transaction-specific logs from interactive tasks with long durations (project on `thread_id` attribute) +- "Inflight/queued transactions" card for concurrent transactions +- CPU profile from "Engine CPU continuous profiling" section + +**Escalation:** ERP team + +### Pattern E: Engine Lifecycle Events (False Positives) + +**Signals:** +- "Service lifecycle events" or "ERP-initiated lifecycle events" cards show deletion/creation/suspension/upgrade +- "Engine uptime" card: metric drops steeply then slowly recovers +- Newer ERP versions use specific abort reasons; older versions still report `engine failed` + +**Action:** Reach out to account owners to upgrade native app + +### Pattern F: Scheduled Snowflake Maintenance + +**Signals:** +- "Engine container status" card: "running" line drops and comes back up +- Timing: Monday-Thursday, 11 PM to 5 AM local time in the deployment region +- Gauge host metrics: `spcs.container.state.running` drops, `spcs.container.state.pending.reason` increases +- Two lines for "running" state indicate container migration + +**Action:** No escalation needed; announce in `#team-prod-snowflake-integration`. Linked repairs: SFRAI-139, RAI-29329. + +--- + +## 4. Diagnostic Lookup Table (Card → Root Cause) + +Quick-reference after filtering the Engine Failures dashboard by transaction ID, engine name, and account: + +| What You See | Root Cause | +|---|---| +| "Engine last termination reason" = `Failed` or `Done` | Engine crash (segfault, abort, stack overflow) | +| "Engine last termination reason" = `FailedWithOOM` | OOM | +| "Server heartbeats per second" bars don't reach 1 | Brownout | +| Above + missing bars in "ERP: Transaction heartbeats received" | Brownout affecting the transaction | +| Above + high "Julia GC time" or "Julia compilation time" | Memory pressure or compilation warmup | +| "Transaction logs overview" stops showing thin purple vertical lines | Long heartbeat requests to ERP | +| "Engine uptime" drops + events in lifecycle cards | Engine lifecycle event (likely false positive) | +| "Engine container status" running drops Mon-Thu 11PM-5AM local | Scheduled Snowflake maintenance | + +--- + +## 5. Heartbeat Mechanism + +- Periodic task `TxnKeepAlive - id: <transaction_id>` runs every **30 seconds** on the engine +- Communicates with ERP endpoint `/api/v1/transactionHeartbeat` +- Code: started in `Server.jl` line ~1894, implemented in `packages/RAI_KVStore/src/spcs.jl` line ~487 +- If ERP doesn't receive heartbeat for **20 minutes**, it aborts with "engine failed" + +--- + +## 6. Key Observe Dashboards + +| Dashboard | ID | Purpose | +|---|---|---| +| Engine Failures | 41949642 | Primary oncall dashboard for transaction aborts | +| CPU Profiling | 41782266 | Continuous CPU profiling | +| Performance Triage | 41786648 | Performance investigation | +| Distributed Workload Indicators | 41946298 | Workload distribution | +| Optimizer Dashboard | 41882895 | Optimizer analysis | +| SPCS Environments | 41872510 | Environment overview | +| O4S Pipeline Health | 42090551 | Telemetry pipeline health | +| CDC Investigations | 42469929 | Data stream/CDC issues | +| Telemetry Outages | Telemetry-Outages-42760073 | Telemetry pipeline outages | +| Account Health | SPCS-Account-Health-42358249 | Per-account health | + +All in workspace `41759331` at `https://171608476159.observeinc.com/`. + +Fallback: DataDog "Engine failures (SPCS version)" at `https://app.datadoghq.com/dashboard/5u7-367-vkv` + +--- + +## 7. Key Datasets in Observe + +| Dataset | ID | Type | Description | +|---|---|---|---| +| RelationalAI/Snowflake Logs | 41832558 | Event | Log events with content, severity, attributes | +| RelationalAI/Spans | 41867217 | Interval | Operation timing with parent/child traces | +| Long Running Spans | 42001379 | Interval | Spans exceeding 3 hours (separate for performance) | +| RelationalAI/Metrics | 41861990 | Metric | Time series metrics | +| RelationalAI/Transaction | 41838769 | Interval | Transaction overview (duration, engine, maxlevel) | +| RelationalAI/Engine | 41838774 | Resource | Engine metadata (version, instance family) | +| RelationalAI/Service | 41853352 | Resource | Service lifecycle tracking | +| ServiceExplorer/Service Metrics | 41862479 | Metric | Metrics derived from OTel spans | +| OpenTelemetry/Span | 41766875 | Interval | Generic OTel spans | + +--- + +## 8. Key Correlation Tags and Lookup Keys + +| Key | Description | Links to | +|---|---|---| +| `rai_transaction_id` | Transaction identifier | Logs, spans, transaction dataset | +| `rai_engine_name` | Engine name | All datasets | +| `account_alias` | Customer Snowflake account | All datasets | +| `org_alias` | Customer organization | All datasets | +| `sf.query.id` / `sf_query_id` | Snowflake query ID | Logs, spans | +| `trace_id` / `span_id` | Distributed tracing IDs | Spans, logs | +| `host` | Snowflake host FQDN | Logs | +| `phase` | Compiler phase (PhaseInlining, CompilePhase, etc.) | Logs | +| `rai.commit` | Engine commit hash | Logs | +| `pyrel_program_id` | Links use_index and prepareIndex spans | Cross-layer correlation | + +--- + +## 9. Key Metrics + +| Metric | Type | Description | +|---|---|---| +| `commit_duration_ms` | delta | How long commits take | +| `transactions_duration_total` | delta | Total transaction duration | +| `commit_txns_failure` | delta | Failed commit count | +| `commit_txns_start_commit` | delta | Commit start count | +| `exception_count_5m` | — | Exception rate (5-minute window, from ServiceExplorer) | +| `jm_local_threads_available` | gauge | Thread availability | + +--- + +## 10. Environments, Services, and Languages + +### Environments +`spcs-prod`, `spcs-int`, `spcs-latest`, `spcs-staging`, `spcs-expt`, `spcs-ea` + +### Services +- `rai-server` — RAI engine (executes Rel queries) +- `spcs-control-plane` — ERP (orchestrates engine lifecycle, handles API requests) +- `spcs-integration` — SQL integration layer (procedures, data stream tasks) +- `gnn-engine` — Graph Neural Network engine +- `observe-for-snowflake` — O4S telemetry forwarding +- `rai-solver` — Solver service +- `spcs-log-heartbeat` — Log pipeline heartbeat +- `spcs-event-sharing-heartbeat` — Event sharing heartbeat +- `provider-account-monitoring` — Provider account monitoring + +### Transaction Languages +- `rel` — RAI's internal query language (being deprecated) +- `lqp` — Logical Query Plan (replacing rel) +- Users write PyRel (Python DSL) which compiles to rel/lqp + +### Log Severity Levels +`info`, `warning`, `warn`, `error`, `fatal` + +### Transaction maxlevel Values +`info`, `warning`, `error` + +--- + +## 11. SPCS Architecture + +### Three Major Components +1. **Native App Package** — installable artifact with SQL scripts, SPCS container images, Streamlit UI, manifest +2. **SQL Integration Layer (sql-lib / spcs-sql-lib)** — SQL objects exposing RAI functionality (procedures, UDFs, tasks, streams) +3. **Engine Resource Provider (ERP)** — single-tenant coordinator service in SPCS for metadata and resource management + +### Two Service Layers (NO end-to-end tracing between them) +- `spcs-integration` (SQL layer) — procedures, data stream tasks +- `spcs-control-plane` (ERP) — engine/DB management, transaction coordination +- Correlation: `pyrel_program_id`, `sf.query_id`/`request_id`, or hashed DB name + +### Provider vs Consumer Account +- **Provider**: Hosts App Package, image repos, billing config. No running app code. +- **Consumer**: Where Native App is installed. All RAI services run here. +- **Events Account**: Dedicated per region for telemetry forwarding. + +### Compute Pool Instance Types +| Pattern | Instance | Purpose | +|---|---|---| +| `*_COMPUTE` | STANDARD_2 | Control plane | +| `*_COMPUTE_XS` | HIGH_MEMORY_1 | XS engines | +| `*_COMPUTE_S` | HIGH_MEMORY_2 | S engines | +| `*_COMPUTE_XL` | HIGH_MEMORY_5 | XL engines | + +### Deployment Ring Order +Int → Staging → Prod Ring 0 → Prod Ring 1 (approval required) → Prod Ring 2 (approval required) + +--- + +## 12. Telemetry Pipeline + +``` +SPCS Services (rai-server, spcs-control-plane, etc.) + → Consumer OTel Collector (consumer-otelcol) + → Event Table (TELEMETRY.TELEMETRY.SHARED_EVENTS) + → Event Sharing (to Events Account) + → O4S Native App tasks + → Observe + → Datadog (via configured pipelines) +``` + +**Telemetry latency threshold:** If > 30 minutes, post in `#ext-relationalai-observe` + +### Key Telemetry Filters +| Filter | Purpose | +|---|---| +| `service = "spcs-integration"` | SQL layer telemetry | +| `service = "spcs-control-plane"` | ERP telemetry | +| `response_status = "Error"` | Failed procedures | +| `attributes['error.class'] = "user"` | User-caused failures | +| `span_name = "process_batches"` | CDC batch processing | +| `span_name = "use_index"` | Graph Index operations | +| `span_name = "emit_trace"` | Periodic app telemetry (every 6 hours) | +| `span_name = "app_trace"` | App activity telemetry (every 12 minutes) | + +### Key Log Search Patterns +| Pattern | What it indicates | +|---|---| +| `"segmentation fault"` | Engine segfault (error level) | +| `"[Jemalloc] absolute profile"` | Memory allocation profile | +| `"[Jemalloc] relative profile"` | Differential memory profile | +| `"heartbeat was lost for"` | Brownout recovery indicator | +| `"TransactionBegin"` | Transaction start | +| `"TransactionEnd"` | Transaction end | +| `"transaction X marked as COMPLETED"` | Transaction completion | +| `"KVStoreCommitWriteTransactions"` | DB version advancement (write transaction) | +| `"Estimated cardinality of the output relation"` | Query output size | + +--- + +## 13. Error Categorization (Graph Index) + +All errors follow a consistent structure: + +### Expected Errors (user-actionable, `expected_error: true`) +| Error | Source | Resolution | +|---|---|---| +| Change Tracking Not Enabled | Table name | Enable change tracking on source table | +| CDC Task Suspended | cdc | Investigate root cause, run `CALL app.resume_cdc()` | +| Data Stream Quarantined >15 min | Table name | Review quarantine reason | +| Invalid Object Type | Table name | Reference only tables or views | + +### Unexpected Errors (system failures, `expected_error: false`) +| Error | Source | +|---|---| +| Engine Failures | engine name | +| API Normalization Errors | api.normalize_fq_ids | +| Reference Validation Errors | Table name | +| Index Preparation Errors | prepareIndex | + +### SLO Calculation +``` +success_rate = (successful_unique_use_index_ids / total_unique_use_index_ids) * 100 +``` +"Successful" = no unexpected errors. Expected errors do NOT reduce SLO. + +--- + +## 14. CDC Pipeline Details + +### Data Stream Task (runs every 1 minute, serverless) +- Implementation: `api.base_write_changes` in `spcs-sql-lib/lib/integration/rai_cdc.sql` +- WHEN condition: `SYSTEM$STREAM_HAS_DATA(<stream>)` — only runs if changes exist +- Uses SF transactions for atomic metadata + CSV export +- Serverless pool is bursty at top of hour/day (can cause timeouts) + +### process_batches Task (singleton, runs every 1 minute) +- Implementation: `api.process_batches()` in `spcs-sql-lib/lib/integration/rai_cdc.sql` +- Two work item types: DB Preparation (model installation) and Batch Loading (data import) +- Transactions via CDC_MANAGED_ENGINE +- One transaction per target DB at a time +- Generated Rel avoids stdlib dependencies + +### Stream Stale State +- SF Stream holds an offset timestamp for last consumed position +- If stream falls behind data retention window → **stale** (unrecoverable) +- Data stream must be recreated + +--- + +## 15. Escalation Channels + +| Issue Type | Slack Channel | +|---|---| +| Engine failures (oncall) | #team-prod-engine-resource-providers-spcs | +| Native App integration | #team-prod-snowflake-integration | +| PyRel/UX issues | #team-prod-experience | +| Observability issues | #helpdesk-observability | +| Slow queries | #helpdesk-slow-queries | +| CI/CD failures | #project-prod-continuous-delivery | +| Observe vendor issues | #ext-relationalai-observe | +| Snowflake support | #ext_rai-snowflake | +| Billing | #project-prod-snowflake-billing | + +### Escalation by Failure Type +| Failure | Team | +|---|---| +| Julia runtime segfault | Julia team | +| Storage/network segfault | Storage team | +| Stack overflow in metadata | Backend team | +| OOM | Determine from Jemalloc/CPU profiles | +| Brownout from GC/compilation | Julia team | +| Brownout from external calls (Solver/HiGHS) | Respective library team | +| Brownout (other) | ERP team | +| Long heartbeat requests | ERP team | +| Snowflake maintenance | No escalation, announce in #team-prod-snowflake-integration | + +--- + +## 16. Playbook Design (Agreed Structure) + +### Playbook 1: Transaction Investigation (Entry Point) +- **Input:** transaction ID +- **Goal:** Status, customer, what it did (high-level), duration +- **Steps:** + 1. Query transaction dataset → status, customer (org + account), engine, duration, maxlevel, language + 2. Determine transaction type: CDC (CDC_MANAGED_ENGINE) vs user vs Graph Index + 3. If COMPLETED → summarize (query spans + logs in parallel for detail) + 4. If ABORTED → report abort reason, route to Playbook 2 or 3 +- **Customer identification:** `org_alias` + `account_alias` (e.g., "Western Union (account: wudev_wudatadev)") +- **"What it did" level:** Inference from language, DB version changes, span names. Business intent is future work. +- **User-level drill down:** Not MVP, future consideration. + +### Playbook 2: Engine Failure Investigation +- **Input:** engine name + transaction ID (from Playbook 1 or alert) +- **Goal:** Root cause classification + escalation path +- **Steps:** Walk diagnostic lookup table: + 1. Check termination reason → crash or OOM? + 2. Check heartbeats → brownout? + 3. Check heartbeat span duration → long heartbeat requests? + 4. Check lifecycle events → user-initiated? + 5. Check container status timing → Snowflake maintenance? + +### Playbook 3: Data Pipeline Investigation +- **Input:** "data isn't loading" or slow CDC, or data stream errors +- **Goal:** Root cause + remediation +- **Steps:** + 1. Check CDC dashboard for the data stream / account + 2. Check process_batches spans for errors or slow execution + 3. Check for stale streams, quarantined streams, suspended tasks + 4. If slow: debug via slow data loading procedure + +--- + +## 17. Observed Transaction Example + +Transaction `6f6d1441-ef58-4986-9c15-3edb74a75a42` was investigated live: + +| Field | Value | +|---|---| +| Engine | CDC_MANAGED_ENGINE | +| Account | wudev_wudatadev | +| Org | western_union | +| Environment | spcs-prod | +| Service | spcs-control-plane | +| Language | rel | +| Duration | ~30.27 seconds | +| Max Log Level | info | + +### Timeline +| Phase | Duration | Notes | +|---|---|---| +| Queue wait | ~17s | Enqueued → dequeued | +| Execution | ~5s | Rel query, output cardinality = 0 | +| Commit | ~4.3s | group_commit, DB version 28→29 | +| Completion | ~4s | Cache/metrics, event stream close, auto-suspend | + +### Key Spans (heaviest) +| Span | Service | Duration | +|---|---|---| +| group_commit | rai-server | 4.3s | +| post_to_erp | rai-server | 4.07s | +| service.CompleteTransactions | spcs-control-plane | 4.07s | +| eot_tip_write | rai-server | 134ms | +| eot_tip_write_to_blob | rai-server | 101ms | + +### Key Log Messages (chronological) +- `"transaction request has been prepared"` +- `"[SERVER] Transaction request received"` +- `"TransactionBegin"` +- `"[TransactionQueue] Transaction enqueued"` +- `"[TransactionQueue] Transaction dequeued"` +- `"rel profiler root"` +- `"Estimated cardinality of the output relation: 0"` +- `"[COMMIT] Wrote tip for 28 => 29"` +- `"transaction X marked as COMPLETED"` +- `"TransactionEnd"` +- `"[service review] Transaction finished without internal error"` + +This was a CDC transaction (CDC_MANAGED_ENGINE, rel language, write transaction with DB version advancement). diff --git a/plugins/dockyard/skills/observability/SKILL.md b/plugins/dockyard/skills/observability/SKILL.md new file mode 100644 index 0000000..9308e3d --- /dev/null +++ b/plugins/dockyard/skills/observability/SKILL.md @@ -0,0 +1,197 @@ +# Observability + +## Purpose + +Query and analyze observability data (logs, metrics, traces) to investigate service health, errors, latency, and performance issues. + +--- + +## Domain Context + +The observability platform contains RAI (RelationalAI) data running on Snowflake. Understanding the data model helps you ask better questions. + +### Datasets + +| Dataset | What it contains | Use when | +|---------|-----------------|----------| +| `RelationalAI/Transaction` | High-level transaction view: duration, engine, account, environment, max severity (`maxlevel`) | Getting an overview of a specific transaction or finding problematic transactions | +| `RelationalAI/Spans` | Detailed operation timing with parent/child trace trees, error status, span names | Understanding where time was spent within a transaction, finding errors | +| `RelationalAI/Snowflake Logs` | Narrative log events with content, severity level, and rich attributes | Reading what happened step-by-step, finding error messages | +| `RelationalAI/Engine` | Engine metadata: version, instance family, size, region, Snowflake database | Checking engine config, version, or correlating issues to instance type | +| `RelationalAI/Metrics` | Time series metrics (commit duration, thread availability, exception counts) | Trend analysis, alerting thresholds, aggregate health | + +### Lookup Keys + +When a user mentions any of these identifiers, use them directly in queries: + +- **`rai_transaction_id`** — Primary key for a specific transaction. Links across logs, spans, and the transaction dataset. +- **`rai_engine_name`** — Engine identifier. Use for engine-level investigation. +- **`account_alias`** / **`org_alias`** — Customer account and organization. Use for customer-level investigation. +- **`sf.query.id`** — Snowflake query ID. Correlates RAI activity to Snowflake-level operations. +- **`trace_id`** / **`span_id`** — Distributed tracing identifiers. Use to follow a request across services. + +### Key Metrics + +| Metric | What it measures | +|--------|-----------------| +| `commit_duration_ms` | How long commits take | +| `transactions_duration_total` | Total transaction duration | +| `commit_txns_failure` | Failed commit count | +| `exception_count_5m` | Exception rate (5-minute window) | + +### Common Environments and Services + +**Environments:** `spcs-prod`, `spcs-int`, `spcs-latest`, `spcs-staging`, `spcs-expt`, `spcs-ea` + +**Services:** `rai-server`, `spcs-control-plane`, `spcs-integration`, `gnn-engine`, `observe-for-snowflake`, `rai-solver` + +**Transaction languages:** `rel`, `lqp` — these are internal execution languages, not user-facing. Users write PyRel (a Python DSL) which compiles down to these. `rel` is being deprecated in favor of `lqp` (Logical Query Plan). During the migration period, errors may be caused by the rel→lqp transition. + +**Severity levels (maxlevel):** `info`, `warning`, `error`, `fatal` + +This list is not exhaustive — use the knowledge graph to discover current values if needed. + +### Units + +Duration fields across all datasets are in **nanoseconds**. Always convert to human-readable units (ms, seconds, minutes) when presenting results. + +--- + +## Tools Available + +- `mcp__observe__generate-query-card`: Query observability data from natural language prompts. This is the primary tool — it automatically fetches knowledge graph context internally. +- `mcp__observe__generate-knowledge-graph-context`: Explore available correlation tags, datasets, and metrics. Use only for open-ended exploration when you don't yet know what data exists. + +## Workflow + +### Step 1: Query + +Use `mcp__observe__generate-query-card` directly. Pass the user's question as a simple, direct prompt — the tool handles complexity and context resolution internally. + +**Guidelines:** +- Keep prompts natural and concise +- When investigating a topic, run multiple queries in parallel (e.g., spans and logs simultaneously) +- Limit to 5 queries before doing analysis unless prompted to do more +- **Always request bounded results** — include "top 10", "top 20", or "limit N" in prompts that aggregate errors or events. Unbounded "grouped by error message" queries can return hundreds of repetitive rows that bloat the response and slow down analysis. +- **Prefer aggregated over raw** — ask for counts, groupings, and summaries rather than raw log lines. Raw lines are useful only when drilling into a specific event. + +**Do NOT** call `generate-knowledge-graph-context` before querying — the query card tool already does this internally. + +### Step 2: Analyze and Present + +The tool returns markdown tables with query results and optional chart visualizations. Results can be large. + +**Always include:** +- A timeline or summary table of key findings +- The Observe link returned by the tool (use the URL exactly as returned) + +**When results are large**, extract and highlight the most important data points rather than dumping raw output. + +### When to Use Knowledge Graph Directly + +Only use `mcp__observe__generate-knowledge-graph-context` when: +- The user asks "what data is available?" without a specific query +- A query returns no results and you need to discover valid service names, metric names, or dataset names +- You need to understand what dimensions are available for filtering + +**Parameters:** +- `kind`: One of `"correlation tag"`, `"dataset"`, or `"metric"` +- `prompt`: Natural language search + +## Query Retry Strategies + +If a query returns only a title (no data): +1. Rephrase with different groupings (e.g., "by service", "by message") +2. Add "over time" for time series +3. Fall back to `generate-knowledge-graph-context` to discover valid names, then re-query + +--- + +## Runbooks + +### Runbook 1: Transaction Investigation + +**Trigger:** User has a `rai_transaction_id` or asks "what happened with this transaction?" + +**Queries (run in parallel):** +1. Transaction dataset — filter by `rai_transaction_id` → get status, duration, engine, account, environment, language +2. Spans — filter by `rai_transaction_id` → top-level span timeline +3. Logs — filter by `rai_transaction_id`, severity ≥ warning → error context + +**Interpret:** +- **Terminal state:** COMPLETED = success. ABORTED = failed — check abort reason from ERP. +- **Customer:** `org_alias` + `account_alias` (e.g., "Western Union (account: wudev_wudatadev)") +- **What it did:** Infer from transaction type, language, span names. User-initiated (exec/load_data), CDC (process_batches on CDC_MANAGED_ENGINE), or Graph Index (prepareIndex). +- **Duration:** Convert nanoseconds. Note phases: request received → queue wait → execution → commit → completion. +- `maxlevel` is NOT status — it's the highest severity log event. A COMPLETED transaction can have `maxlevel = error`. + +**Route based on findings:** +- Abort reason mentions engine → **Runbook 2** +- Transaction type is CDC / data pipeline issue → **Runbook 3** +- Issue spans SQL ↔ ERP boundary → **Runbook 4** + +--- + +### Runbook 2: Engine Failure Investigation + +**Trigger:** Transaction aborted with engine-related reason, or user reports engine crash/hang. + +**Walk the diagnostic table in order** — query each signal, stop when you find a match: + +| Step | Query | Signal | Diagnosis | +|------|-------|--------|-----------| +| 1 | Engine dataset — termination reason | `Failed` or `Done` | **Crash** — segfault/abort/stack overflow. Check logs for "segmentation fault". Escalate to #team-prod-engine-resource-providers-spcs | +| 2 | Engine dataset — termination reason | `FailedWithOOM` | **OOM** — check Jemalloc profiles in logs ("absolute profile"). Escalate to #team-prod-engine-resource-providers-spcs | +| 3 | Metrics — "Server heartbeats per second" | < 1 | **Brownout** — interactive threads blocked. Check Julia GC time and compilation metrics for memory pressure | +| 4 | Spans — `bot_keepalive_write_to_kvs` | Abnormally long duration | **Long heartbeat requests** — KVS write latency. Transaction logs will show a gap where logging stops | +| 5 | Engine dataset — lifecycle events + uptime | Uptime drops + delete/suspend event | **Lifecycle event** — user deleted or suspended engine during transaction (false positive, not a bug) | +| 6 | Check time of day + container status | Mon-Thu 11PM-5AM local, container status drops then recovers | **Snowflake maintenance** — expected, self-resolving | + +**Heartbeat context:** TxnKeepAlive runs every 30s → ERP endpoint `/api/v1/transactionHeartbeat`. 20-minute timeout without heartbeat → abort with "engine failed". + +--- + +### Runbook 3: Data Pipeline Investigation + +**Trigger:** "Data isn't loading", stale CDC stream, slow sync. + +**CDC pipeline stages:** +``` +Source SF Table → SF Stream → Data Stream Task (1min) → CSV to Stage → process_batches Task (1min) → CDC_MANAGED_ENGINE → RAI Database +``` + +**Queries:** +1. Transaction dataset — filter by CDC_MANAGED_ENGINE + account → recent CDC transactions, check for ABORTED +2. Spans — filter by `process_batches` span name → timing of each batch +3. Logs — filter for CDC-related errors, quarantine messages + +**Interpret:** +- **Quarantined stream:** Stream disabled due to repeated failures. Look for `expected_error: true` (user-actionable — bad input, missing change tracking) vs `expected_error: false` (system failure — escalate). +- **Slow loading:** Check process_batches duration trend. Each cycle is ~1min. Large batches or engine contention cause lag. +- **Stale stream:** Check if Data Stream Task is running. SF Stream captures changes but the task must poll it. + +**Escalation:** #team-prod-snowflake-integration for Native App issues, #team-prod-engine-resource-providers-spcs for engine issues on CDC. + +--- + +### Runbook 4: Cross-Service Correlation + +**Trigger:** Issue spans both SQL layer (`spcs-integration`) and ERP (`spcs-control-plane`), or user has a Snowflake query ID needing RAI context. + +**Key constraint:** There is NO end-to-end tracing between these two service layers. + +**Correlation keys** (use whichever is available): +- `pyrel_program_id` — links SQL procedure call to ERP operation +- `sf.query_id` / `request_id` — Snowflake query context +- Hashed DB name — last resort, matches across layers + +**Queries (run in parallel):** +1. Spans/Logs — filter `service = "spcs-integration"` + correlation key → SQL layer timeline +2. Spans/Logs — filter `service = "spcs-control-plane"` + correlation key → ERP timeline + +**Interpret:** +- Build a unified timeline by aligning timestamps across both result sets +- `response_status = "Error"` → failed procedures on SQL side +- `attributes['error.class'] = "user"` → user-caused failure (not a system bug) + +**Escalation:** #team-prod-snowflake-integration (SQL layer), #team-prod-engine-resource-providers-spcs (ERP layer), #team-prod-experience (PyRel/UX) diff --git a/skills/review-and-submit/SKILL.md b/plugins/dockyard/skills/review-and-submit/SKILL.md similarity index 96% rename from skills/review-and-submit/SKILL.md rename to plugins/dockyard/skills/review-and-submit/SKILL.md index 57df72a..f6f7969 100644 --- a/skills/review-and-submit/SKILL.md +++ b/plugins/dockyard/skills/review-and-submit/SKILL.md @@ -5,7 +5,7 @@ description: Review code, auto-fix findings, generate PR description, and create # Submit -You are running the Shipwright Submit flow. This is the local developer flow from "done coding" to "draft PR ready." +You are running the Dockyard Submit flow. This is the local developer flow from "done coding" to "draft PR ready." **Review always runs.** There is no flag to skip it. After seeing results, the developer can choose to proceed past blockers, but the review itself is mandatory. @@ -56,7 +56,7 @@ Search for context that explains the intent behind the changes. This is optional ## Step 2: Run Code Review -Invoke the `shipwright:code-review` skill and follow its process exactly. +Invoke the `dockyard:code-review` skill and follow its process exactly. **Model selection for this step:** - Review passes: use Opus (higher quality, fewer false positives — developer is paying and waiting) @@ -148,7 +148,7 @@ Fixes applied. Updated review: [updated findings, if any...] Options: -1. Fix more manually and re-run /shipwright:submit +1. Fix more manually and re-run /dockyard:review-and-submit 2. Proceed to PR creation (remaining findings will be noted in the PR description) ``` diff --git a/tests/fixtures/sample-app/CLAUDE.md b/plugins/dockyard/tests/fixtures/sample-app/CLAUDE.md similarity index 100% rename from tests/fixtures/sample-app/CLAUDE.md rename to plugins/dockyard/tests/fixtures/sample-app/CLAUDE.md diff --git a/tests/fixtures/sample-app/package.json b/plugins/dockyard/tests/fixtures/sample-app/package.json similarity index 100% rename from tests/fixtures/sample-app/package.json rename to plugins/dockyard/tests/fixtures/sample-app/package.json diff --git a/tests/fixtures/sample-app/src/user-service.js b/plugins/dockyard/tests/fixtures/sample-app/src/user-service.js similarity index 100% rename from tests/fixtures/sample-app/src/user-service.js rename to plugins/dockyard/tests/fixtures/sample-app/src/user-service.js diff --git a/tests/fixtures/sample-app/src/utils.js b/plugins/dockyard/tests/fixtures/sample-app/src/utils.js similarity index 100% rename from tests/fixtures/sample-app/src/utils.js rename to plugins/dockyard/tests/fixtures/sample-app/src/utils.js diff --git a/tests/fixtures/sample-app/tests/user-service.test.js b/plugins/dockyard/tests/fixtures/sample-app/tests/user-service.test.js similarity index 100% rename from tests/fixtures/sample-app/tests/user-service.test.js rename to plugins/dockyard/tests/fixtures/sample-app/tests/user-service.test.js diff --git a/tests/smoke/run-all.sh b/plugins/dockyard/tests/smoke/run-all.sh old mode 100644 new mode 100755 similarity index 86% rename from tests/smoke/run-all.sh rename to plugins/dockyard/tests/smoke/run-all.sh index ca9b6be..0778100 --- a/tests/smoke/run-all.sh +++ b/plugins/dockyard/tests/smoke/run-all.sh @@ -2,6 +2,10 @@ # # run-all.sh — Run all smoke validation scripts and report summary. # +# These are marketplace-wide validation tests that verify the structure +# of both the dockyard and shipwright plugins. +# +# -e intentionally omitted: we want to run all suites and report failures at the end set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" diff --git a/plugins/dockyard/tests/smoke/validate-agents.sh b/plugins/dockyard/tests/smoke/validate-agents.sh new file mode 100755 index 0000000..a8ecec1 --- /dev/null +++ b/plugins/dockyard/tests/smoke/validate-agents.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# +# validate-agents.sh — Verify agent files in both plugins meet conventions. +# +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)" +PASS=0 +FAIL=0 + +pass() { echo " PASS $1"; PASS=$((PASS + 1)); } +fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } + +validate_agent() { + local filepath="$1" + local label="$2" + local expect_skills="$3" # "yes" or "no" + + echo "" + echo "$label:" + + if [ ! -s "$filepath" ]; then + fail "$label is missing or empty" + return + fi + pass "$label exists and is non-empty" + + # Contains a role description (starts with "You are" somewhere in the file) + if grep -qi '^you are\|^- you are' "$filepath"; then + pass "$label contains role description" + else + fail "$label missing role description (expected line starting with 'You are')" + fi + + # References skills (if expected) + if [ "$expect_skills" = "yes" ]; then + if grep -qE 'skills/|dockyard:' "$filepath"; then + pass "$label references at least one skill" + else + fail "$label does not reference any skill" + fi + else + pass "$label is self-contained (no skill injection expected)" + fi + + # Contains output/return format section + if grep -qiE '## (Output|Return|Result|Returning)' "$filepath"; then + pass "$label contains output/return format section" + else + fail "$label missing output/return format section (expected ## Output, ## Return, or ## Result heading)" + fi +} + +echo "=== validate-agents ===" + +# --- Dockyard public agents --- +echo "" +echo "Dockyard public agents:" +validate_agent \ + "$REPO_ROOT/plugins/dockyard/agents/doc-digest.md" \ + "dockyard/doc-digest.md" \ + "no" + +# --- Shipwright internal agents --- +echo "" +echo "Shipwright internal agents:" +SHIPWRIGHT_AGENTS=( + triage.md + implementer.md + reviewer.md + validator.md +) + +for agent in "${SHIPWRIGHT_AGENTS[@]}"; do + validate_agent \ + "$REPO_ROOT/plugins/shipwright/internal/agents/$agent" \ + "shipwright/$agent" \ + "yes" +done + +# --- Cross-plugin skill reference validation --- +echo "" +echo "Cross-plugin skill references:" +# Verify that dockyard:X references in shipwright agents resolve to actual dockyard skills +for agent in "$REPO_ROOT/plugins/shipwright/internal/agents/"*.md "$REPO_ROOT/plugins/shipwright/commands/shipwright.md"; do + refs=$(grep -oE 'dockyard:[a-z-]+' "$agent" 2>/dev/null | sort -u || true) + for ref in $refs; do + skill_name="${ref#dockyard:}" + skill_path="$REPO_ROOT/plugins/dockyard/skills/$skill_name/SKILL.md" + label="$(basename "$agent"):$ref" + if [ -f "$skill_path" ]; then + pass "$label resolves to $skill_path" + else + fail "$label does not resolve (expected $skill_path)" + fi + done +done + +echo "" +TOTAL=$((PASS + FAIL)) +echo "validate-agents: $PASS/$TOTAL passed" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/plugins/dockyard/tests/smoke/validate-commands.sh b/plugins/dockyard/tests/smoke/validate-commands.sh new file mode 100755 index 0000000..2c3b865 --- /dev/null +++ b/plugins/dockyard/tests/smoke/validate-commands.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# +# validate-commands.sh — Verify command files in both plugins meet conventions. +# +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)" +PASS=0 +FAIL=0 + +pass() { echo " PASS $1"; PASS=$((PASS + 1)); } +fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } + +validate_command() { + local plugin="$1" + local cmd="$2" + local filepath="$REPO_ROOT/plugins/$plugin/commands/$cmd" + local label="$plugin/$cmd" + + echo "" + echo "$label:" + + # File exists and is not empty + if [ ! -s "$filepath" ]; then + fail "$label is missing or empty" + return + fi + pass "$label exists and is non-empty" + + # Contains YAML frontmatter (starts with ---) + if head -1 "$filepath" | grep -q '^---'; then + pass "$label has YAML frontmatter opening" + else + fail "$label missing YAML frontmatter (first line should be '---')" + fi + + # Contains description: in frontmatter (between first and second ---) + frontmatter=$(awk 'NR==1 && /^---$/{found=1; next} found && /^---$/{exit} found{print}' "$filepath" | head -20) + if echo "$frontmatter" | grep -q 'description:'; then + pass "$label has description: in frontmatter" + else + fail "$label missing description: in frontmatter" + fi + + # Has content beyond frontmatter + body_lines=$(sed '1,/^---$/{ /^---$/!d; }' "$filepath" | sed '1d' | grep -c '[^[:space:]]' || true) + if [ "$body_lines" -gt 0 ]; then + pass "$label has content beyond frontmatter" + else + fail "$label is empty beyond frontmatter" + fi +} + +echo "=== validate-commands ===" + +# --- Dockyard commands --- +echo "" +echo "Dockyard commands:" +DOCKYARD_COMMANDS=( + codebase-analyze.md + code-review.md + doc-digest.md + investigate.md + review-and-submit.md + feedback.md +) + +for cmd in "${DOCKYARD_COMMANDS[@]}"; do + validate_command "dockyard" "$cmd" +done + +# --- Shipwright commands --- +echo "" +echo "Shipwright commands:" +SHIPWRIGHT_COMMANDS=( + shipwright.md + feedback.md +) + +for cmd in "${SHIPWRIGHT_COMMANDS[@]}"; do + validate_command "shipwright" "$cmd" +done + +echo "" +TOTAL=$((PASS + FAIL)) +echo "validate-commands: $PASS/$TOTAL passed" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/plugins/dockyard/tests/smoke/validate-skills.sh b/plugins/dockyard/tests/smoke/validate-skills.sh new file mode 100755 index 0000000..58155aa --- /dev/null +++ b/plugins/dockyard/tests/smoke/validate-skills.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# +# validate-skills.sh — Verify skill files in both plugins meet conventions. +# +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)" +PASS=0 +FAIL=0 + +pass() { echo " PASS $1"; PASS=$((PASS + 1)); } +fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } + +validate_skill() { + local filepath="$1" + local label="$2" + + echo "" + echo "$label:" + + if [ ! -s "$filepath" ]; then + fail "$label is missing or empty" + return + fi + pass "$label exists and is non-empty" + + # Has a title heading or YAML frontmatter + if head -1 "$filepath" | grep -qE '^(#|---)'; then + pass "$label has title heading or frontmatter" + else + fail "$label missing title heading or YAML frontmatter" + fi + + if grep -qi 'superpowers:' "$filepath"; then + fail "$label references superpowers: namespace" + else + pass "$label does not reference superpowers: namespace" + fi + + if grep -q '\.planning/' "$filepath"; then + fail "$label references .planning/ (GSD internal)" + else + pass "$label does not reference .planning/" + fi +} + +echo "=== validate-skills ===" + +# --- Dockyard public skills --- +echo "" +echo "Dockyard public skills:" +DOCKYARD_SKILLS=( + brownfield-analysis + code-review + review-and-submit + observability +) + +for skill in "${DOCKYARD_SKILLS[@]}"; do + validate_skill \ + "$REPO_ROOT/plugins/dockyard/skills/$skill/SKILL.md" \ + "dockyard/$skill" +done + +# --- Shipwright internal skills --- +echo "" +echo "Shipwright internal skills:" +SHIPWRIGHT_SKILLS=( + tdd + verification-before-completion + systematic-debugging + anti-rationalization + decision-categorization +) + +for skill in "${SHIPWRIGHT_SKILLS[@]}"; do + validate_skill \ + "$REPO_ROOT/plugins/shipwright/internal/skills/$skill/SKILL.md" \ + "shipwright/$skill" +done + +echo "" +TOTAL=$((PASS + FAIL)) +echo "validate-skills: $PASS/$TOTAL passed" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/plugins/dockyard/tests/smoke/validate-structure.sh b/plugins/dockyard/tests/smoke/validate-structure.sh new file mode 100755 index 0000000..49cf2b0 --- /dev/null +++ b/plugins/dockyard/tests/smoke/validate-structure.sh @@ -0,0 +1,230 @@ +#!/usr/bin/env bash +# +# validate-structure.sh — Verify marketplace and plugin structure files exist. +# +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../.." && pwd)" +DOCKYARD="$REPO_ROOT/plugins/dockyard" +SHIPWRIGHT="$REPO_ROOT/plugins/shipwright" +PASS=0 +FAIL=0 + +check() { + local label="$1" + local path="$2" + if [ -e "$path" ]; then + echo " PASS $label" + PASS=$((PASS + 1)) + else + echo " FAIL $label (missing: $path)" + FAIL=$((FAIL + 1)) + fi +} + +echo "=== validate-structure ===" + +# --- Marketplace-level files --- +echo "" +echo "Marketplace-level files:" +check "marketplace.json" "$REPO_ROOT/.claude-plugin/marketplace.json" +check "CODEOWNERS" "$REPO_ROOT/CODEOWNERS" +check "CONTRIBUTING.md" "$REPO_ROOT/CONTRIBUTING.md" +check "README.md" "$REPO_ROOT/README.md" +check "THIRD_PARTY_NOTICES" "$REPO_ROOT/THIRD_PARTY_NOTICES" +check "templates/SKILL_TEMPLATE.md" "$REPO_ROOT/templates/SKILL_TEMPLATE.md" +check "templates/AGENT_TEMPLATE.md" "$REPO_ROOT/templates/AGENT_TEMPLATE.md" + +# --- Dockyard plugin --- +echo "" +echo "Dockyard plugin structure:" +check "dockyard/plugin.json" "$DOCKYARD/.claude-plugin/plugin.json" + +# Dockyard skills +check "dockyard/skills/brownfield-analysis/SKILL.md" "$DOCKYARD/skills/brownfield-analysis/SKILL.md" +check "dockyard/skills/code-review/SKILL.md" "$DOCKYARD/skills/code-review/SKILL.md" +check "dockyard/skills/review-and-submit/SKILL.md" "$DOCKYARD/skills/review-and-submit/SKILL.md" +check "dockyard/skills/observability/SKILL.md" "$DOCKYARD/skills/observability/SKILL.md" + +# Dockyard agents +check "dockyard/agents/doc-digest.md" "$DOCKYARD/agents/doc-digest.md" + +# Dockyard commands +check "dockyard/commands/codebase-analyze.md" "$DOCKYARD/commands/codebase-analyze.md" +check "dockyard/commands/doc-digest.md" "$DOCKYARD/commands/doc-digest.md" +check "dockyard/commands/investigate.md" "$DOCKYARD/commands/investigate.md" +check "dockyard/commands/code-review.md" "$DOCKYARD/commands/code-review.md" +check "dockyard/commands/review-and-submit.md" "$DOCKYARD/commands/review-and-submit.md" +check "dockyard/commands/feedback.md" "$DOCKYARD/commands/feedback.md" + +# --- Shipwright plugin --- +echo "" +echo "Shipwright plugin structure:" +check "shipwright/plugin.json" "$SHIPWRIGHT/.claude-plugin/plugin.json" + +# Shipwright hooks +check "shipwright/hooks/hooks.json" "$SHIPWRIGHT/hooks/hooks.json" +check "shipwright/hooks/check-dockyard.sh" "$SHIPWRIGHT/hooks/check-dockyard.sh" + +# Shipwright commands +check "shipwright/commands/shipwright.md" "$SHIPWRIGHT/commands/shipwright.md" +check "shipwright/commands/feedback.md" "$SHIPWRIGHT/commands/feedback.md" + +# Shipwright internal agents +check "shipwright/internal/agents/triage.md" "$SHIPWRIGHT/internal/agents/triage.md" +check "shipwright/internal/agents/implementer.md" "$SHIPWRIGHT/internal/agents/implementer.md" +check "shipwright/internal/agents/reviewer.md" "$SHIPWRIGHT/internal/agents/reviewer.md" +check "shipwright/internal/agents/validator.md" "$SHIPWRIGHT/internal/agents/validator.md" + +# Shipwright internal skills +check "shipwright/internal/skills/tdd/SKILL.md" "$SHIPWRIGHT/internal/skills/tdd/SKILL.md" +check "shipwright/internal/skills/verification-before-completion/SKILL.md" "$SHIPWRIGHT/internal/skills/verification-before-completion/SKILL.md" +check "shipwright/internal/skills/systematic-debugging/SKILL.md" "$SHIPWRIGHT/internal/skills/systematic-debugging/SKILL.md" +check "shipwright/internal/skills/anti-rationalization/SKILL.md" "$SHIPWRIGHT/internal/skills/anti-rationalization/SKILL.md" +check "shipwright/internal/skills/decision-categorization/SKILL.md" "$SHIPWRIGHT/internal/skills/decision-categorization/SKILL.md" + +# --- Validate marketplace.json has required keys --- +echo "" +echo "Marketplace manifest validation:" +if [ -f "$REPO_ROOT/.claude-plugin/marketplace.json" ]; then + for key in name description plugins; do + # Match root-level keys (no leading whitespace before the key) + if grep -qE "^ \"$key\"" "$REPO_ROOT/.claude-plugin/marketplace.json"; then + echo " PASS marketplace.json contains root-level \"$key\"" + PASS=$((PASS + 1)) + else + echo " FAIL marketplace.json missing root-level \"$key\"" + FAIL=$((FAIL + 1)) + fi + done +else + echo " SKIP marketplace.json key checks (file missing)" + FAIL=$((FAIL + 3)) +fi + +# --- Validate each plugin.json has required keys --- +echo "" +echo "Plugin manifest validation:" +for plugin_name in dockyard shipwright; do + plugin_json="$REPO_ROOT/plugins/$plugin_name/.claude-plugin/plugin.json" + if [ -f "$plugin_json" ]; then + for key in name description version; do + if grep -qE "^ \"$key\"" "$plugin_json"; then + echo " PASS $plugin_name/plugin.json contains \"$key\"" + PASS=$((PASS + 1)) + else + echo " FAIL $plugin_name/plugin.json missing \"$key\"" + FAIL=$((FAIL + 1)) + fi + done + else + echo " SKIP $plugin_name/plugin.json key checks (file missing)" + FAIL=$((FAIL + 3)) + fi +done + +# --- .gitignore includes .workflow/ --- +echo "" +echo "Gitignore:" +if [ -f "$REPO_ROOT/.gitignore" ] && grep -q '\.workflow/' "$REPO_ROOT/.gitignore"; then + echo " PASS .gitignore includes .workflow/" + PASS=$((PASS + 1)) +else + echo " FAIL .gitignore missing .workflow/ entry" + FAIL=$((FAIL + 1)) +fi + +# --- Validate hooks.json content --- +echo "" +echo "Hooks validation:" +if [ -f "$SHIPWRIGHT/hooks/hooks.json" ]; then + if grep -q '"SessionStart"' "$SHIPWRIGHT/hooks/hooks.json" && \ + grep -q 'check-dockyard.sh' "$SHIPWRIGHT/hooks/hooks.json"; then + echo " PASS hooks.json references SessionStart and check-dockyard.sh" + PASS=$((PASS + 1)) + else + echo " FAIL hooks.json missing SessionStart hook or check-dockyard.sh reference" + FAIL=$((FAIL + 1)) + fi +else + echo " SKIP hooks.json content check (file missing)" + FAIL=$((FAIL + 1)) +fi + +# --- Validate check-dockyard.sh behavior --- +echo "" +echo "Hook script behavior:" +tmpdir=$(mktemp -d) +trap 'rm -rf "$tmpdir"' EXIT + +# Test 1: No registry file — should exit 2 +rc=0; HOME="$tmpdir" bash "$SHIPWRIGHT/hooks/check-dockyard.sh" >/dev/null 2>&1 || rc=$? +if [ "$rc" -eq 2 ]; then + echo " PASS check-dockyard.sh exits 2 when registry missing" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh should exit 2 when registry missing (got $rc)" + FAIL=$((FAIL + 1)) +fi + +# Test 2: Registry file exists but is empty — should exit 2 +mkdir -p "$tmpdir/.claude/plugins" +echo '' > "$tmpdir/.claude/plugins/installed_plugins.json" +rc=0; HOME="$tmpdir" bash "$SHIPWRIGHT/hooks/check-dockyard.sh" >/dev/null 2>&1 || rc=$? +if [ "$rc" -eq 2 ]; then + echo " PASS check-dockyard.sh exits 2 when registry is empty" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh should exit 2 when registry is empty (got $rc)" + FAIL=$((FAIL + 1)) +fi + +# Test 3: Registry exists but no dockyard — should exit 2 +echo '{"plugins":{}}' > "$tmpdir/.claude/plugins/installed_plugins.json" +rc=0; hook_output=$(HOME="$tmpdir" bash "$SHIPWRIGHT/hooks/check-dockyard.sh" 2>&1) || rc=$? +if [ "$rc" -eq 2 ]; then + echo " PASS check-dockyard.sh exits 2 when dockyard missing from registry" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh should exit 2 when dockyard missing (got $rc)" + FAIL=$((FAIL + 1)) +fi + +# Test 4: Error message includes install command +if echo "$hook_output" | grep -q '/plugin install dockyard@shipwright-marketplace'; then + echo " PASS check-dockyard.sh error includes install command" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh error missing install command" + FAIL=$((FAIL + 1)) +fi + +# Test 5: Registry has other plugins but not dockyard — should exit 2 +echo '{"dockyard-tools@other-marketplace":{},"another@plugin":{}}' > "$tmpdir/.claude/plugins/installed_plugins.json" +rc=0; HOME="$tmpdir" bash "$SHIPWRIGHT/hooks/check-dockyard.sh" >/dev/null 2>&1 || rc=$? +if [ "$rc" -eq 2 ]; then + echo " PASS check-dockyard.sh exits 2 when similar-named plugin present but not dockyard" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh should exit 2 when similar-named plugin present (got $rc)" + FAIL=$((FAIL + 1)) +fi + +# Test 6: Registry has dockyard — should exit 0 +echo '{"plugins":{"dockyard@shipwright-marketplace":{}}}' > "$tmpdir/.claude/plugins/installed_plugins.json" +if HOME="$tmpdir" bash "$SHIPWRIGHT/hooks/check-dockyard.sh" >/dev/null 2>&1; then + echo " PASS check-dockyard.sh exits zero when dockyard present" + PASS=$((PASS + 1)) +else + echo " FAIL check-dockyard.sh should exit zero when dockyard present" + FAIL=$((FAIL + 1)) +fi + +# --- Summary --- +echo "" +TOTAL=$((PASS + FAIL)) +echo "validate-structure: $PASS/$TOTAL passed" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/.claude-plugin/plugin.json b/plugins/shipwright/.claude-plugin/plugin.json similarity index 54% rename from .claude-plugin/plugin.json rename to plugins/shipwright/.claude-plugin/plugin.json index 4192fee..41999ad 100644 --- a/.claude-plugin/plugin.json +++ b/plugins/shipwright/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "shipwright", - "description": "Adaptive agentic development framework for RAI engineering teams", + "description": "Orchestrated agentic development framework — TDD-enforced bug fix workflows with triage, implementation, review, and validation agents", "version": "0.1.0", "author": { "name": "Owais Mohamed", @@ -9,5 +9,5 @@ "homepage": "https://github.com/RelationalAI/shipwright", "repository": "https://github.com/RelationalAI/shipwright", "license": "TBD", - "keywords": ["workflow", "tdd", "debugging", "code-review", "brownfield-analysis"] + "keywords": ["workflow", "tdd", "code-review", "orchestration", "agents"] } diff --git a/plugins/shipwright/commands/feedback.md b/plugins/shipwright/commands/feedback.md new file mode 100644 index 0000000..9a0645e --- /dev/null +++ b/plugins/shipwright/commands/feedback.md @@ -0,0 +1,68 @@ +--- +description: File a bug report, feature request, or feedback for the Shipwright plugin +argument-hint: "[optional: describe your bug, feature request, or feedback]" +--- + +# Shipwright Feedback + +You help the user file issues against the Shipwright plugin on the `RelationalAI/shipwright` repository. + +## Detect Input + +Parse `$ARGUMENTS`: + +| Input | Action | +|-------|--------| +| Empty | Ask the user what type of issue (bug / feature / suggestion / general feedback) and gather a description | +| Freeform text | Auto-detect issue type from keywords and draft a title (see below) | + +### Auto-Detection Rules + +- **bug** -- text mentions errors, crashes, broken, failing, wrong, unexpected +- **feature** -- text mentions "add", "support", "would be nice", "wish", "enable" +- **suggestion** -- text mentions "improve", "better", "consider", "could" +- **feedback** -- anything that does not match the above + +## Draft the Issue + +1. Generate a concise title (under 80 characters). +2. Write a body using this template: + +``` +## Description +<user's description, cleaned up> + +## Type +<bug | feature | suggestion | feedback> + +## Plugin +shipwright + +## Steps to Reproduce (bugs only) +<if applicable> + +## Expected vs Actual (bugs only) +<if applicable> +``` + +3. Show the draft to the user and ask for confirmation or edits. + +## Create the Issue + +Once the user confirms, run: + +```bash +gh issue create \ + --repo RelationalAI/shipwright \ + --title "<title>" \ + --body "<body>" \ + --label "plugin:shipwright" +``` + +Print the resulting issue URL so the user can track it. + +## Rules + +- Always add the `plugin:shipwright` label. +- Never create an issue without user confirmation. +- If `gh` CLI is not authenticated, tell the user to run `gh auth login` and stop. diff --git a/commands/shipwright.md b/plugins/shipwright/commands/shipwright.md similarity index 93% rename from commands/shipwright.md rename to plugins/shipwright/commands/shipwright.md index 253883b..bea4503 100644 --- a/commands/shipwright.md +++ b/plugins/shipwright/commands/shipwright.md @@ -83,7 +83,7 @@ Execute agents in this exact order. Each agent is an ephemeral subagent — spaw ### Step 1: Triage **Agent prompt:** `internal/agents/triage.md` -**Skills injected:** `skills/brownfield-analysis.md`, `internal/skills/decision-categorization.md` +**Skills injected:** `dockyard:brownfield-analysis` (cross-plugin), `internal/skills/decision-categorization/SKILL.md` **Pass to Triage:** - Parsed input context (bug description, Jira ticket details, or nothing) @@ -102,7 +102,7 @@ Execute agents in this exact order. Each agent is an ephemeral subagent — spaw ### Step 2: Implementer **Agent prompt:** `internal/agents/implementer.md` -**Skills injected:** `internal/skills/tdd.md`, `internal/skills/verification-before-completion.md`, `internal/skills/systematic-debugging.md` +**Skills injected:** `internal/skills/tdd/SKILL.md`, `internal/skills/verification-before-completion/SKILL.md`, `internal/skills/systematic-debugging/SKILL.md` **Pass to Implementer:** - Triage output (bug summary, decisions, key files) @@ -121,7 +121,7 @@ Execute agents in this exact order. Each agent is an ephemeral subagent — spaw ### Step 3: Reviewer **Agent prompt:** `internal/agents/reviewer.md` -**Skills injected:** `internal/skills/anti-rationalization.md` +**Skills injected:** `internal/skills/anti-rationalization/SKILL.md` **Pass to Reviewer:** - Implementer output (root cause, fix, tests, evidence) @@ -142,7 +142,7 @@ Execute agents in this exact order. Each agent is an ephemeral subagent — spaw ### Step 4: Validator **Agent prompt:** `internal/agents/validator.md` -**Skills injected:** `internal/skills/verification-before-completion.md`, `internal/skills/anti-rationalization.md` +**Skills injected:** `internal/skills/verification-before-completion/SKILL.md`, `internal/skills/anti-rationalization/SKILL.md` **Pass to Validator:** - Reviewer approval diff --git a/docs/design/shipwright-design-v1.md b/plugins/shipwright/docs/design/shipwright-design-v1.md similarity index 96% rename from docs/design/shipwright-design-v1.md rename to plugins/shipwright/docs/design/shipwright-design-v1.md index 9e23444..7e5153d 100644 --- a/docs/design/shipwright-design-v1.md +++ b/plugins/shipwright/docs/design/shipwright-design-v1.md @@ -26,8 +26,8 @@ Shipwright is a framework that: **Target audience:** RAI engineering teams. **Platform:** Claude Code (designed for future portability). -**Distribution:** RAI plugin marketplace (`RelationalAI/claude-plugins`). -**Entry point:** `/shipwright` (single command — auto-detects resume vs. new workflow). +**Distribution:** RAI plugin marketplace (`RelationalAI/shipwright`). +**Entry point:** `/shipwright:shipwright` (single command — auto-detects resume vs. new workflow). --- @@ -455,10 +455,10 @@ plugins/shipwright/ **Install (beta):** ```bash -/plugin marketplace add RelationalAI/claude-plugins -/plugin install shipwright-beta@rai-claude-plugins +/plugin marketplace add RelationalAI/shipwright +/plugin install shipwright@shipwright-marketplace # restart session -/shipwright +/shipwright:shipwright ``` ### Assessment commands (standalone) @@ -467,11 +467,10 @@ Not every use of Shipwright is a build workflow. Sometimes you just want an asse | Command | Agent | Skill | What it does | |---------|-------|-------|-------------| -| `/shipwright:security-review` | Security Assessor | Quick Security | OWASP-style review of current code | -| `/shipwright:security-threat-model` | Security Assessor | Threat Modeling | Full threat model of the repo | -| `/shipwright:code-review` | Reviewer | — | Review code changes (staged or specified files) | -| `/shipwright:pr-review` | Reviewer | — | Review a specific PR | -| `/shipwright:codebase-analyze` | Triage | Brownfield Analysis | Analyze existing codebase: stack, architecture, conventions, concerns | +| `/dockyard:codebase-analyze` | Triage | Brownfield Analysis | Analyze existing codebase: stack, architecture, conventions, concerns | +| `/dockyard:review-and-submit` | — | Code Review + Submit | Review code, auto-fix findings, create draft PR | +| `/dockyard:investigate` | — | Observability | Observability-driven live service investigation | +| `/dockyard:doc-digest` | Doc Digest | — | Walk through any document section by section | These are stateless — no `.workflow/` directory, no recovery layers, no cost tracking. Just the agent prompt + skill, run once, output results. @@ -498,7 +497,7 @@ Branching is decided (task branches → PR to feature → rollup to main). But h - **A) Committed docs are the handoff.** Triage reads committed docs, reconstructs context. - **B) Partially commit .workflow/.** Commit CONTEXT.md and decisions.md. Risk: noisy history. -- **C) Explicit handoff command.** `/shipwright handoff` generates a one-time summary doc. +- **C) Explicit handoff command.** `/shipwright:shipwright handoff` generates a one-time summary doc. --- @@ -521,7 +520,7 @@ Branching is decided (task branches → PR to feature → rollup to main). But h | 13 | Regression | All tiers, every time | Non-negotiable | | 14 | Distribution | Plugin marketplace | Versioned, namespaced | | 15 | Tier routing | Triage brainstorms with human | Human has final say | -| 16 | Entry point | `/shipwright` + 5 assessment commands | Auto-detect resume; standalone assessments are stateless | +| 16 | Entry point | `/shipwright:shipwright` + 5 assessment commands | Auto-detect resume; standalone assessments are stateless | | 17 | Tier upgrade | Agents recommend, human decides | Adapts to complexity | | 18 | Decisions | All recorded in log | Audit + recovery | | 19 | Name | Shipwright | Uncommon, craftsmanship | diff --git a/docs/milestones/m1-verification-report.md b/plugins/shipwright/docs/milestones/m1-verification-report.md similarity index 97% rename from docs/milestones/m1-verification-report.md rename to plugins/shipwright/docs/milestones/m1-verification-report.md index 1342b5f..5a0e820 100644 --- a/docs/milestones/m1-verification-report.md +++ b/plugins/shipwright/docs/milestones/m1-verification-report.md @@ -99,10 +99,10 @@ Every requirement in `docs/milestones/m1-tier1-bugfix.md` was checked against th | 55 | `debug` loads systematic-debugging + TDD skills | PASS | | 56 | `debug` follows 4-phase process | PASS | | 57 | `debug` has no Triage/Reviewer/Validator | PASS | -| 58 | `report` creates GitHub issue on RelationalAI/shipwright | PASS | -| 59 | `report` handles no-args (interactive) mode | PASS | -| 60 | `report` handles freeform text (auto-detect type) | PASS | -| 61 | `report` supports labels: bug, feature, suggestion, feedback | PASS | +| 58 | `feedback` creates GitHub issue on RelationalAI/shipwright | PASS | +| 59 | `feedback` handles no-args (interactive) mode | PASS | +| 60 | `feedback` handles freeform text (auto-detect type) | PASS | +| 61 | `feedback` supports labels: bug, feature, suggestion, feedback | PASS | | 62 | All standalone commands: no orchestrator, no recovery, no .workflow/ | PASS | ### Orchestrator (14 checks) diff --git a/docs/research/shipwright-ideas-from-beads-gsd-v1.md b/plugins/shipwright/docs/research/shipwright-ideas-from-beads-gsd-v1.md similarity index 100% rename from docs/research/shipwright-ideas-from-beads-gsd-v1.md rename to plugins/shipwright/docs/research/shipwright-ideas-from-beads-gsd-v1.md diff --git a/docs/research/shipwright-vs-others-v1.md b/plugins/shipwright/docs/research/shipwright-vs-others-v1.md similarity index 97% rename from docs/research/shipwright-vs-others-v1.md rename to plugins/shipwright/docs/research/shipwright-vs-others-v1.md index ce91ef4..a94c579 100644 --- a/docs/research/shipwright-vs-others-v1.md +++ b/plugins/shipwright/docs/research/shipwright-vs-others-v1.md @@ -66,7 +66,7 @@ Superpowers is Shipwright's discipline parent — TDD, brainstorming hard-gates, - **Cost visibility.** Superpowers doesn't track token usage. Shipwright logs every subagent call and reports costs. - **Document quality.** Superpowers generates plan files (task lists). Shipwright generates human-readable docs with interactive walkthrough. - **Tiered process.** Superpowers applies all skills equally regardless of task size. Shipwright right-sizes ceremony. -- **Standalone assessments.** Superpowers skills are standalone by nature. Shipwright's full workflow requires the orchestrator, but also offers 5 standalone assessment commands (`security-review`, `security-threat-model`, `code-review`, `pr-review`, `codebase-analyze`) that run without orchestration — stateless, single-shot. +- **Standalone assessments.** Superpowers skills are standalone by nature. Shipwright's full workflow requires the orchestrator. Standalone commands live in the Dockyard plugin: `/dockyard:codebase-analyze`, `/dockyard:doc-digest`, `/dockyard:investigate`, `/dockyard:review-and-submit` — stateless, single-shot. ### What Shipwright loses - **Cross-platform support.** Superpowers works on Claude Code, Cursor, Codex, and OpenCode today. Shipwright is Claude Code only (with future portability designed in). diff --git a/plugins/shipwright/hooks/check-dockyard.sh b/plugins/shipwright/hooks/check-dockyard.sh new file mode 100755 index 0000000..30851ba --- /dev/null +++ b/plugins/shipwright/hooks/check-dockyard.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# +# check-dockyard.sh — Verify the dockyard plugin is installed before allowing +# shipwright to start. Exits 2 (hard-block) if missing. +# +set -euo pipefail + +REGISTRY="$HOME/.claude/plugins/installed_plugins.json" + +if [ ! -f "$REGISTRY" ]; then + echo "ERROR: Shipwright requires the 'dockyard' plugin." + echo "Install it with: /plugin install dockyard@shipwright-marketplace" + exit 2 +fi + +if ! grep -q '"dockyard@' "$REGISTRY" 2>/dev/null; then + echo "ERROR: Shipwright requires the 'dockyard' plugin." + echo "Install it with: /plugin install dockyard@shipwright-marketplace" + exit 2 +fi diff --git a/plugins/shipwright/hooks/hooks.json b/plugins/shipwright/hooks/hooks.json new file mode 100644 index 0000000..1d9cb64 --- /dev/null +++ b/plugins/shipwright/hooks/hooks.json @@ -0,0 +1,15 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup|resume|clear|compact", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/check-dockyard.sh" + } + ] + } + ] + } +} diff --git a/internal/agents/implementer.md b/plugins/shipwright/internal/agents/implementer.md similarity index 94% rename from internal/agents/implementer.md rename to plugins/shipwright/internal/agents/implementer.md index d889ac9..c156cc8 100644 --- a/internal/agents/implementer.md +++ b/plugins/shipwright/internal/agents/implementer.md @@ -13,9 +13,9 @@ You receive a handoff from the orchestrator containing the bug summary, categori ## Injected Skills -- `internal/skills/tdd.md` -- test-driven development (anti-rationalization embedded) -- `internal/skills/verification-before-completion.md` -- evidence before claims -- `internal/skills/systematic-debugging.md` -- 4-phase root cause investigation (anti-rationalization embedded) +- `internal/skills/tdd/SKILL.md` -- test-driven development (anti-rationalization embedded) +- `internal/skills/verification-before-completion/SKILL.md` -- evidence before claims +- `internal/skills/systematic-debugging/SKILL.md` -- 4-phase root cause investigation (anti-rationalization embedded) ## Input from Orchestrator @@ -34,7 +34,7 @@ Read all input thoroughly before starting. If any input is missing or unclear, e ## Phase 1: Root Cause Investigation -**Skill:** `internal/skills/systematic-debugging.md` +**Skill:** `internal/skills/systematic-debugging/SKILL.md` Follow the 4-phase debugging process. Do NOT propose fixes until root cause is understood. @@ -60,7 +60,7 @@ You can state: "The root cause is X because Y, as evidenced by Z." If you cannot ## Phase 2: Write Failing Test -**Skill:** `internal/skills/tdd.md` +**Skill:** `internal/skills/tdd/SKILL.md` Write a test that reproduces the bug. The test must fail before you write any fix code. @@ -105,7 +105,7 @@ Write a test that reproduces the bug. The test must fail before you write any fi ## Phase 4: Verify -**Skill:** `internal/skills/verification-before-completion.md` +**Skill:** `internal/skills/verification-before-completion/SKILL.md` Evidence before claims. Always. diff --git a/internal/agents/reviewer.md b/plugins/shipwright/internal/agents/reviewer.md similarity index 98% rename from internal/agents/reviewer.md rename to plugins/shipwright/internal/agents/reviewer.md index 9033a35..b32eea6 100644 --- a/internal/agents/reviewer.md +++ b/plugins/shipwright/internal/agents/reviewer.md @@ -4,7 +4,7 @@ You are the Reviewer agent for Shipwright. You review implementations for correc ## Injected Skills -- `internal/skills/anti-rationalization.md` -- resist shortcuts, require evidence +- `internal/skills/anti-rationalization/SKILL.md` -- resist shortcuts, require evidence ## Input diff --git a/internal/agents/triage.md b/plugins/shipwright/internal/agents/triage.md similarity index 93% rename from internal/agents/triage.md rename to plugins/shipwright/internal/agents/triage.md index 80808d1..c3609ab 100644 --- a/internal/agents/triage.md +++ b/plugins/shipwright/internal/agents/triage.md @@ -7,8 +7,8 @@ You are the Triage agent for Shipwright. You are the first agent in the Tier 1 b ## Injected Skills The following skills are loaded into this agent: -- `skills/brownfield-analysis.md` -- codebase profiling and staleness management -- `internal/skills/decision-categorization.md` -- decision categorization (LOCKED/DEFERRED/DISCRETION) +- `dockyard:brownfield-analysis` (cross-plugin — lives in the dockyard plugin at `skills/brownfield-analysis/SKILL.md`) — codebase profiling and staleness management +- `internal/skills/decision-categorization/SKILL.md` — decision categorization (LOCKED/DEFERRED/DISCRETION) --- @@ -30,7 +30,7 @@ Before investigating the bug, establish baseline understanding of the repository ### 1a. Staleness Check -Run the brownfield staleness check as defined in `skills/brownfield-analysis.md`: +Run the brownfield staleness check as defined in `dockyard:brownfield-analysis` (cross-plugin skill from dockyard): 1. Read `docs/codebase-profile/.last-analyzed`. 2. Compare HEAD against the reference SHA (the more recent of `last_full_sha` and `last_fastpath_sha`). @@ -74,7 +74,7 @@ Brownfield profiles give you the lay of the land. They are a starting point, not - Do not stop at profile summaries. Profiles describe the forest; you need to see the specific trees. - Do not guess at file contents. Read the actual files. -- Do not read files on the forbidden list (see `skills/brownfield-analysis.md`, Forbidden Files section). +- Do not read files on the forbidden list (see `dockyard:brownfield-analysis`, Forbidden Files section). --- @@ -111,7 +111,7 @@ Keep this practical. The goal is to surface decisions, not to produce a design d ## Phase 4: Decision Categorization -Use the decision categorization skill from `internal/skills/decision-categorization.md`. +Use the decision categorization skill from `internal/skills/decision-categorization/SKILL.md`. ### Identify Gray Areas @@ -135,7 +135,7 @@ After the brainstorm, categorize every decision: ### Record Decisions -Write all categorized decisions to `.workflow/CONTEXT.md` under a `## Decisions` section, using the format specified in `internal/skills/decision-categorization.md`: +Write all categorized decisions to `.workflow/CONTEXT.md` under a `## Decisions` section, using the format specified in `internal/skills/decision-categorization/SKILL.md`: ```markdown ## Decisions diff --git a/internal/agents/validator.md b/plugins/shipwright/internal/agents/validator.md similarity index 97% rename from internal/agents/validator.md rename to plugins/shipwright/internal/agents/validator.md index 3976ea7..e019005 100644 --- a/internal/agents/validator.md +++ b/plugins/shipwright/internal/agents/validator.md @@ -6,8 +6,8 @@ You run after the Reviewer has approved the implementation. Your job is to execu ## Injected Skills -- `internal/skills/verification-before-completion.md` -- evidence before claims -- `internal/skills/anti-rationalization.md` -- resist shortcuts, require evidence +- `internal/skills/verification-before-completion/SKILL.md` -- evidence before claims +- `internal/skills/anti-rationalization/SKILL.md` -- resist shortcuts, require evidence ## Input diff --git a/internal/skills/anti-rationalization/SKILL.md b/plugins/shipwright/internal/skills/anti-rationalization/SKILL.md similarity index 100% rename from internal/skills/anti-rationalization/SKILL.md rename to plugins/shipwright/internal/skills/anti-rationalization/SKILL.md diff --git a/internal/skills/decision-categorization/SKILL.md b/plugins/shipwright/internal/skills/decision-categorization/SKILL.md similarity index 100% rename from internal/skills/decision-categorization/SKILL.md rename to plugins/shipwright/internal/skills/decision-categorization/SKILL.md diff --git a/internal/skills/systematic-debugging/SKILL.md b/plugins/shipwright/internal/skills/systematic-debugging/SKILL.md similarity index 100% rename from internal/skills/systematic-debugging/SKILL.md rename to plugins/shipwright/internal/skills/systematic-debugging/SKILL.md diff --git a/internal/skills/tdd/SKILL.md b/plugins/shipwright/internal/skills/tdd/SKILL.md similarity index 100% rename from internal/skills/tdd/SKILL.md rename to plugins/shipwright/internal/skills/tdd/SKILL.md diff --git a/internal/skills/verification-before-completion/SKILL.md b/plugins/shipwright/internal/skills/verification-before-completion/SKILL.md similarity index 100% rename from internal/skills/verification-before-completion/SKILL.md rename to plugins/shipwright/internal/skills/verification-before-completion/SKILL.md diff --git a/plugins/shipwright/tests/.gitkeep b/plugins/shipwright/tests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/templates/AGENT_TEMPLATE.md b/templates/AGENT_TEMPLATE.md new file mode 100644 index 0000000..a72c6ff --- /dev/null +++ b/templates/AGENT_TEMPLATE.md @@ -0,0 +1,13 @@ +# Agent Template + +> Full template definition is tracked in [RAI-47777](https://relationalai.atlassian.net/browse/RAI-47777). + +## Required Structure + +``` +agents/<agent-name>.md +``` + +## Required Format + +Start with a heading describing the agent's role, followed by documentation of its invocation modes, phases, and output format. diff --git a/templates/SKILL_TEMPLATE.md b/templates/SKILL_TEMPLATE.md new file mode 100644 index 0000000..c8656d7 --- /dev/null +++ b/templates/SKILL_TEMPLATE.md @@ -0,0 +1,21 @@ +# Skill Template + +> Full template definition is tracked in [RAI-47777](https://relationalai.atlassian.net/browse/RAI-47777). + +## Required Structure + +``` +skills/<skill-name>/ +└── SKILL.md +``` + +## Required SKILL.md Format + +```yaml +--- +name: skill-name +description: What this skill does (one sentence) +--- +``` + +Followed by the full skill documentation. diff --git a/tests/smoke/validate-agents.sh b/tests/smoke/validate-agents.sh deleted file mode 100644 index ffa7178..0000000 --- a/tests/smoke/validate-agents.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash -# -# validate-agents.sh — Verify agent files meet M1 conventions. -# -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" -PASS=0 -FAIL=0 - -USER_AGENTS=( - doc-digest.md -) - -INTERNAL_AGENTS=( - triage.md - implementer.md - reviewer.md - validator.md -) - -pass() { echo " PASS $1"; PASS=$((PASS + 1)); } -fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } - -echo "=== validate-agents ===" - -# Validate user-facing agents -echo "" -echo "User-facing agents:" -for agent in "${USER_AGENTS[@]}"; do - filepath="$REPO_ROOT/agents/$agent" - - echo "" - echo "$agent:" - - if [ ! -s "$filepath" ]; then - fail "$agent is missing or empty" - continue - fi - pass "$agent exists and is non-empty" - - if grep -qi 'you are\|agent' "$filepath"; then - pass "$agent contains role description" - else - fail "$agent missing role description (expected 'You are' or 'agent')" - fi - - if [ "$agent" = "doc-digest.md" ]; then - pass "$agent is self-contained (no skill injection expected)" - else - if grep -q 'skills/' "$filepath"; then - pass "$agent references at least one skill" - else - fail "$agent does not reference any skill" - fi - fi - - if grep -qiE '## (Output|Return|Result|Returning)' "$filepath"; then - pass "$agent contains output/return format section" - else - fail "$agent missing output/return format section (expected ## Output, ## Return, or ## Result heading)" - fi -done - -# Validate internal agents -echo "" -echo "Internal agents:" -for agent in "${INTERNAL_AGENTS[@]}"; do - filepath="$REPO_ROOT/internal/agents/$agent" - echo "" - echo "$agent:" - - # File exists and is not empty - if [ ! -s "$filepath" ]; then - fail "$agent is missing or empty" - continue - fi - pass "$agent exists and is non-empty" - - # Contains a role description (first non-empty line after heading should describe the role) - # We check for "You are" or "agent" as a proxy for role description - if grep -qi 'you are\|agent' "$filepath"; then - pass "$agent contains role description" - else - fail "$agent missing role description (expected 'You are' or 'agent')" - fi - - # References at least one skill (except doc-digest which is self-contained) - if [ "$agent" = "doc-digest.md" ]; then - pass "$agent is self-contained (no skill injection expected)" - else - if grep -q 'skills/' "$filepath"; then - pass "$agent references at least one skill" - else - fail "$agent does not reference any skill" - fi - fi - - # Contains output/return format section - # Look for headings or sections about output, return, result - if grep -qiE '## (Output|Return|Result|Returning)' "$filepath"; then - pass "$agent contains output/return format section" - else - fail "$agent missing output/return format section (expected ## Output, ## Return, or ## Result heading)" - fi -done - -echo "" -TOTAL=$((PASS + FAIL)) -echo "validate-agents: $PASS/$TOTAL passed" -if [ "$FAIL" -gt 0 ]; then - exit 1 -fi -exit 0 diff --git a/tests/smoke/validate-commands.sh b/tests/smoke/validate-commands.sh deleted file mode 100644 index 901246e..0000000 --- a/tests/smoke/validate-commands.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash -# -# validate-commands.sh — Verify command files meet M1 conventions. -# -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" -PASS=0 -FAIL=0 - -COMMANDS=( - shipwright.md - codebase-analyze.md - doc-digest.md - debug.md - report.md -) - -pass() { echo " PASS $1"; PASS=$((PASS + 1)); } -fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } - -echo "=== validate-commands ===" - -for cmd in "${COMMANDS[@]}"; do - filepath="$REPO_ROOT/commands/$cmd" - echo "" - echo "$cmd:" - - # File exists and is not empty - if [ ! -s "$filepath" ]; then - fail "$cmd is missing or empty" - continue - fi - pass "$cmd exists and is non-empty" - - # Contains YAML frontmatter (starts with ---) - if head -1 "$filepath" | grep -q '^---'; then - pass "$cmd has YAML frontmatter opening" - else - fail "$cmd missing YAML frontmatter (first line should be '---')" - fi - - # Contains description: in frontmatter - # Extract frontmatter (between first and second ---) and check for description: - frontmatter=$(sed -n '1,/^---$/{ /^---$/d; p; }' "$filepath" | head -20) - if echo "$frontmatter" | grep -q 'description:'; then - pass "$cmd has description: in frontmatter" - else - fail "$cmd missing description: in frontmatter" - fi - - # Has content beyond frontmatter - # Count lines after the closing --- of frontmatter - body_lines=$(sed '1,/^---$/{ /^---$/!d; }' "$filepath" | sed '1d' | grep -c '[^[:space:]]' || true) - if [ "$body_lines" -gt 0 ]; then - pass "$cmd has content beyond frontmatter" - else - fail "$cmd is empty beyond frontmatter" - fi -done - -echo "" -TOTAL=$((PASS + FAIL)) -echo "validate-commands: $PASS/$TOTAL passed" -if [ "$FAIL" -gt 0 ]; then - exit 1 -fi -exit 0 diff --git a/tests/smoke/validate-skills.sh b/tests/smoke/validate-skills.sh deleted file mode 100644 index 381ab00..0000000 --- a/tests/smoke/validate-skills.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash -# -# validate-skills.sh — Verify skill files meet M1 conventions. -# -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" -PASS=0 -FAIL=0 - -USER_SKILLS=( - brownfield-analysis - code-review - review-and-submit -) - -# Original Shipwright skills (no external attribution required) -ORIGINAL_SKILLS=(brownfield-analysis code-review review-and-submit) - -is_original() { - local skill="$1" - for s in "${ORIGINAL_SKILLS[@]}"; do - if [ "$s" = "$skill" ]; then return 0; fi - done - return 1 -} - -INTERNAL_SKILLS=( - tdd - verification-before-completion - systematic-debugging - anti-rationalization - decision-categorization -) - -pass() { echo " PASS $1"; PASS=$((PASS + 1)); } -fail() { echo " FAIL $1"; FAIL=$((FAIL + 1)); } - -echo "=== validate-skills ===" - -# Validate user-facing skills -echo "" -echo "User-facing skills:" -for skill in "${USER_SKILLS[@]}"; do - filepath="$REPO_ROOT/skills/$skill/SKILL.md" - - echo "" - echo "$skill:" - - if [ ! -s "$filepath" ]; then - fail "$skill is missing or empty" - continue - fi - pass "$skill exists and is non-empty" - - # Contains attribution header (skip for original skills) - if is_original "$skill"; then - pass "$skill is original (no attribution required)" - elif grep -q '> \*\*Attribution:\*\*' "$filepath"; then - pass "$skill has attribution header" - else - fail "$skill missing attribution header (expected '> **Attribution:**')" - fi - - if grep -qi 'superpowers:' "$filepath"; then - fail "$skill references superpowers: namespace" - else - pass "$skill does not reference superpowers: namespace" - fi - - if grep -q '\.planning/' "$filepath"; then - fail "$skill references .planning/ (GSD internal)" - else - pass "$skill does not reference .planning/" - fi -done - -# Validate internal skills -echo "" -echo "Internal skills:" -for skill in "${INTERNAL_SKILLS[@]}"; do - filepath="$REPO_ROOT/internal/skills/$skill/SKILL.md" - echo "" - echo "$skill:" - - # File exists and is not empty - if [ ! -s "$filepath" ]; then - fail "$skill is missing or empty" - continue - fi - pass "$skill exists and is non-empty" - - # No references to superpowers: namespace - if grep -qi 'superpowers:' "$filepath"; then - fail "$skill references superpowers: namespace" - else - pass "$skill does not reference superpowers: namespace" - fi - - # No references to .planning/ (GSD internal) - if grep -q '\.planning/' "$filepath"; then - fail "$skill references .planning/ (GSD internal)" - else - pass "$skill does not reference .planning/" - fi -done - -echo "" -TOTAL=$((PASS + FAIL)) -echo "validate-skills: $PASS/$TOTAL passed" -if [ "$FAIL" -gt 0 ]; then - exit 1 -fi -exit 0 diff --git a/tests/smoke/validate-structure.sh b/tests/smoke/validate-structure.sh deleted file mode 100644 index 737498b..0000000 --- a/tests/smoke/validate-structure.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env bash -# -# validate-structure.sh — Verify all M1 plugin files exist. -# -set -euo pipefail - -REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" -PASS=0 -FAIL=0 - -check() { - local label="$1" - local path="$2" - if [ -e "$path" ]; then - echo " PASS $label" - PASS=$((PASS + 1)) - else - echo " FAIL $label (missing: $path)" - FAIL=$((FAIL + 1)) - fi -} - -echo "=== validate-structure ===" - -# --- User-facing skills (3) --- -echo "" -echo "User-facing skills:" -check "skills/brownfield-analysis/SKILL.md" "$REPO_ROOT/skills/brownfield-analysis/SKILL.md" -check "skills/code-review/SKILL.md" "$REPO_ROOT/skills/code-review/SKILL.md" -check "skills/review-and-submit/SKILL.md" "$REPO_ROOT/skills/review-and-submit/SKILL.md" - -# --- Internal skills (5) --- -echo "" -echo "Internal skills:" -check "internal/skills/tdd/SKILL.md" "$REPO_ROOT/internal/skills/tdd/SKILL.md" -check "internal/skills/verification-before-completion/SKILL.md" "$REPO_ROOT/internal/skills/verification-before-completion/SKILL.md" -check "internal/skills/systematic-debugging/SKILL.md" "$REPO_ROOT/internal/skills/systematic-debugging/SKILL.md" -check "internal/skills/anti-rationalization/SKILL.md" "$REPO_ROOT/internal/skills/anti-rationalization/SKILL.md" -check "internal/skills/decision-categorization/SKILL.md" "$REPO_ROOT/internal/skills/decision-categorization/SKILL.md" - -# --- User-facing agents (1) --- -echo "" -echo "User-facing agents:" -check "agents/doc-digest.md" "$REPO_ROOT/agents/doc-digest.md" - -# --- Internal agents (4) --- -echo "" -echo "Internal agents:" -check "internal/agents/triage.md" "$REPO_ROOT/internal/agents/triage.md" -check "internal/agents/implementer.md" "$REPO_ROOT/internal/agents/implementer.md" -check "internal/agents/reviewer.md" "$REPO_ROOT/internal/agents/reviewer.md" -check "internal/agents/validator.md" "$REPO_ROOT/internal/agents/validator.md" - -# --- Commands (5) --- -echo "" -echo "Commands:" -check "commands/shipwright.md" "$REPO_ROOT/commands/shipwright.md" -check "commands/codebase-analyze.md" "$REPO_ROOT/commands/codebase-analyze.md" -check "commands/doc-digest.md" "$REPO_ROOT/commands/doc-digest.md" -check "commands/debug.md" "$REPO_ROOT/commands/debug.md" -check "commands/report.md" "$REPO_ROOT/commands/report.md" - -# --- plugin.json --- -echo "" -echo "Plugin manifest:" -check "plugin.json exists" "$REPO_ROOT/.claude-plugin/plugin.json" - -# Validate plugin.json has required keys -if [ -f "$REPO_ROOT/.claude-plugin/plugin.json" ]; then - for key in name description version author; do - if grep -q "\"$key\"" "$REPO_ROOT/.claude-plugin/plugin.json"; then - echo " PASS plugin.json contains \"$key\"" - PASS=$((PASS + 1)) - else - echo " FAIL plugin.json missing \"$key\"" - FAIL=$((FAIL + 1)) - fi - done -else - echo " SKIP plugin.json key checks (file missing)" - FAIL=$((FAIL + 6)) -fi - -# --- .gitignore includes .workflow/ --- -echo "" -echo "Gitignore:" -if [ -f "$REPO_ROOT/.gitignore" ] && grep -q '\.workflow/' "$REPO_ROOT/.gitignore"; then - echo " PASS .gitignore includes .workflow/" - PASS=$((PASS + 1)) -else - echo " FAIL .gitignore missing .workflow/ entry" - FAIL=$((FAIL + 1)) -fi - -# --- README.md --- -echo "" -echo "Docs:" -check "README.md" "$REPO_ROOT/README.md" - -# --- Summary --- -echo "" -TOTAL=$((PASS + FAIL)) -echo "validate-structure: $PASS/$TOTAL passed" -if [ "$FAIL" -gt 0 ]; then - exit 1 -fi -exit 0