diff --git a/.mcp.json b/.mcp.json
index 79f9ddc8..c72d476e 100644
--- a/.mcp.json
+++ b/.mcp.json
@@ -5,7 +5,9 @@
       "args": [
         "serve",
         "--path",
-        "."
+        ".",
+        "--external-runner",
+        "claude"
       ]
     }
   }
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
index 5625056a..91db64fa 100644
--- a/src/deepwork/cli/serve.py
+++ b/src/deepwork/cli/serve.py
@@ -64,25 +64,39 @@ def _load_config(project_path: Path) -> dict:
     default=8000,
     help="Port for SSE transport (default: 8000)",
 )
+@click.option(
+    "--external-runner",
+    type=click.Choice(["claude"]),
+    default=None,
+    help="External runner for quality gate reviews. 'claude' uses Claude CLI subprocess. Default: None (agent self-review).",
+)
 def serve(
     path: Path,
     no_quality_gate: bool,
     transport: str,
     port: int,
+    external_runner: str | None,
 ) -> None:
     """Start the DeepWork MCP server.
 
     Exposes workflow management tools to AI agents via MCP protocol.
     By default uses stdio transport for local integration with Claude Code.
 
-    Quality gate is enabled by default and uses Claude Code to evaluate
-    step outputs against quality criteria.
+    Quality gate is enabled by default. Use --external-runner to specify
+    how quality reviews are executed:
+
+    \b
+    - No flag (default): Agent self-review via instructions file
+    - --external-runner claude: Claude CLI subprocess review
 
     Examples:
 
-        # Start server for current directory
+        # Start server for current directory (agent self-review)
         deepwork serve
 
+        # Start with Claude CLI as quality gate reviewer
+        deepwork serve --external-runner claude
+
         # Start with quality gate disabled
         deepwork serve --no-quality-gate
 
@@ -90,7 +104,7 @@ def serve(
         deepwork serve --path /path/to/project
     """
     try:
-        _serve_mcp(path, not no_quality_gate, transport, port)
+        _serve_mcp(path, not no_quality_gate, transport, port, external_runner)
     except ServeError as e:
         console.print(f"[red]Error:[/red] {e}")
         raise click.Abort() from e
@@ -104,6 +118,7 @@ def _serve_mcp(
     enable_quality_gate: bool,
     transport: str,
     port: int,
+    external_runner: str | None = None,
 ) -> None:
     """Start the MCP server.
 
@@ -112,6 +127,8 @@ def _serve_mcp(
         enable_quality_gate: Whether to enable quality gate evaluation
         transport: Transport protocol (stdio or sse)
         port: Port for SSE transport
+        external_runner: External runner for quality gate reviews.
+            "claude" uses Claude CLI subprocess. None means agent self-review.
 
     Raises:
         ServeError: If server fails to start
@@ -125,6 +142,7 @@ def _serve_mcp(
     server = create_server(
         project_root=project_path,
         enable_quality_gate=enable_quality_gate,
+        external_runner=external_runner,
     )
 
     if transport == "stdio":
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index 9225455f..8bdd8321 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -562,9 +562,10 @@ def register_mcp_server(self, project_path: Path) -> bool:
 
         # Build the new MCP server config
         # Assume deepwork is available in PATH
+        # Include --external-runner claude so quality gate reviews use Claude CLI subprocess
         new_server_config = {
             "command": "deepwork",
-            "args": ["serve", "--path", "."],
+            "args": ["serve", "--path", ".", "--external-runner", "claude"],
         }
 
         # Check if already registered with same config
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 7b749a38..8b055de3 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -58,16 +58,32 @@ class QualityGate:
     """Evaluates step outputs against quality criteria.
 
     Uses ClaudeCLI to invoke a review agent that evaluates outputs
-    and returns structured feedback.
+    and returns structured feedback. Can also build review instructions
+    files for agent self-review when no external runner is configured.
     """
 
-    def __init__(self, cli: ClaudeCLI | None = None):
+    # Default maximum number of files to include inline in the review payload.
+    # Beyond this threshold, only file paths are listed.
+    DEFAULT_MAX_INLINE_FILES = 5
+
+    def __init__(
+        self,
+        cli: ClaudeCLI | None = None,
+        max_inline_files: int | None = None,
+    ):
         """Initialize quality gate.
 
         Args:
-            cli: ClaudeCLI instance. If not provided, a default one is created.
+            cli: ClaudeCLI instance. If None, evaluate() cannot be called
+                but instruction-building methods still work.
+            max_inline_files: Maximum number of files to embed inline in
+                review payloads. Beyond this, only file paths are listed.
+                Defaults to DEFAULT_MAX_INLINE_FILES (5).
         """
-        self._cli = cli or ClaudeCLI()
+        self._cli = cli
+        self.max_inline_files = (
+            max_inline_files if max_inline_files is not None else self.DEFAULT_MAX_INLINE_FILES
+        )
 
     def _build_instructions(
         self,
@@ -202,7 +218,8 @@ async def _read_file_sections(
     # WARNING: REVIEW PERFORMANCE IS SENSITIVE TO PAYLOAD SIZE
     #
     # The payload builder below sends file contents to the review agent (Claude
-    # CLI subprocess). Reviews can get REALLY SLOW if the content gets too big:
+    # CLI subprocess or self-review file). Reviews can get REALLY SLOW if the
+    # content gets too big:
     #
     # - Each file's full content is read and embedded in the prompt
     # - The review agent must process ALL of this content to evaluate criteria
@@ -211,18 +228,18 @@ async def _read_file_sections(
     # - Per-file reviews (run_each: <output_name> with type: files) multiply
     #   the problem — each file gets its own review subprocess
     #
-    # To mitigate this, when more than MAX_INLINE_FILES files are present,
-    # the payload switches to a path-listing mode that only shows file paths
-    # instead of dumping all contents inline. The reviewer can then use its
-    # own tools to read specific files as needed.
+    # To mitigate this, when more than self.max_inline_files files are
+    # present, the payload switches to a path-listing mode that only shows
+    # file paths instead of dumping all contents inline. The reviewer can
+    # then use its own tools to read specific files as needed.
+    #
+    # max_inline_files is configurable per instance:
+    #   - external_runner="claude": 5 (embed small sets, list large ones)
+    #   - external_runner=None (self-review): 0 (always list paths)
     #
     # If you're changing the payload builder, keep payload size in mind.
     # =========================================================================
 
-    # Maximum number of files to include inline in the review payload.
-    # Beyond this threshold, only file paths are listed.
-    MAX_INLINE_FILES = 5
-
     @staticmethod
     def _build_path_listing(file_paths: dict[str, str | list[str]]) -> list[str]:
         """Build a path-only listing for large file sets.
@@ -262,7 +279,7 @@ async def _build_payload(
         parts: list[str] = []
         total_files = len(self._flatten_output_paths(outputs))
 
-        if total_files > self.MAX_INLINE_FILES:
+        if total_files > self.max_inline_files:
             # Too many files — list paths only so the reviewer reads selectively
             path_lines = self._build_path_listing(outputs)
             parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
@@ -318,6 +335,112 @@ def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
                 f"Failed to interpret quality gate result: {e}\nData was: {data}"
             ) from e
 
+    async def build_review_instructions_file(
+        self,
+        reviews: list[dict[str, Any]],
+        outputs: dict[str, str | list[str]],
+        output_specs: dict[str, str],
+        project_root: Path,
+        notes: str | None = None,
+    ) -> str:
+        """Build complete review instructions content for writing to a file.
+
+        Used in self-review mode (no external runner) to generate a file that
+        a subagent can read and follow to evaluate quality criteria.
+
+        Args:
+            reviews: List of review dicts with run_each, quality_criteria,
+                and optional additional_review_guidance
+            outputs: Map of output names to file path(s)
+            output_specs: Map of output names to their type ("file" or "files")
+            project_root: Project root path
+            notes: Optional notes from the agent about work done
+
+        Returns:
+            Complete review instructions as a string
+        """
+        parts: list[str] = []
+
+        parts.append("# Quality Review Instructions")
+        parts.append("")
+        parts.append(
+            "You are an editor responsible for reviewing the outputs of a workflow step. "
+            "Your job is to evaluate whether the outputs meet the specified quality criteria."
+        )
+        parts.append("")
+
+        # Build outputs listing (uses self.max_inline_files to decide inline vs path-only)
+        payload = await self._build_payload(outputs, project_root)
+        parts.append(payload)
+        parts.append("")
+
+        # Build review sections
+        for i, review in enumerate(reviews, 1):
+            run_each = review["run_each"]
+            quality_criteria = review["quality_criteria"]
+            guidance = review.get("additional_review_guidance")
+
+            if len(reviews) > 1:
+                scope = "all outputs together" if run_each == "step" else f"output '{run_each}'"
+                parts.append(f"## Review {i} (scope: {scope})")
+            else:
+                parts.append("## Criteria to Evaluate")
+            parts.append("")
+
+            criteria_list = "\n".join(
+                f"- **{name}**: {question}" for name, question in quality_criteria.items()
+            )
+            parts.append(criteria_list)
+            parts.append("")
+
+            if run_each != "step" and run_each in outputs:
+                output_type = output_specs.get(run_each, "file")
+                output_value = outputs[run_each]
+                if output_type == "files" and isinstance(output_value, list):
+                    parts.append(
+                        f"Evaluate the above criteria for **each file** in output '{run_each}':"
+                    )
+                    for fp in output_value:
+                        parts.append(f"- {fp}")
+                    parts.append("")
+
+            if guidance:
+                parts.append("### Additional Context")
+                parts.append("")
+                parts.append(guidance)
+                parts.append("")
+
+        if notes:
+            parts.append("## Author Notes")
+            parts.append("")
+            parts.append(notes)
+            parts.append("")
+
+        parts.append("## Guidelines")
+        parts.append("")
+        parts.append("- Be strict but fair")
+        parts.append(
+            "- Apply criteria pragmatically. If a criterion is not applicable "
+            "to this step's purpose, pass it."
+        )
+        parts.append("- Only mark a criterion as passed if it is clearly met or not applicable.")
+        parts.append("- Provide specific, actionable feedback for failed criteria.")
+        parts.append(
+            "- The overall review should PASS only if ALL criteria across all reviews pass."
+        )
+        parts.append("")
+        parts.append("## Your Task")
+        parts.append("")
+        parts.append("1. Read each output file listed above")
+        parts.append("2. Evaluate every criterion in every review section")
+        parts.append("3. For each criterion, report **PASS** or **FAIL** with specific feedback")
+        parts.append("4. At the end, clearly state the overall result: **PASSED** or **FAILED**")
+        parts.append(
+            "5. If any criteria failed, provide clear actionable feedback on what needs to change"
+        )
+
+        return "\n".join(parts)
+
     @staticmethod
     def compute_timeout(file_count: int) -> int:
         """Compute dynamic timeout based on number of files.
@@ -368,6 +491,12 @@ async def evaluate(
                 criteria_results=[],
             )
 
+        if self._cli is None:
+            raise QualityGateError(
+                "Cannot evaluate quality gate without a CLI runner. "
+                "Use build_review_instructions_file() for self-review mode."
+            )
+
         instructions = self._build_instructions(
             quality_criteria,
             notes=notes,
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 4a227ed1..ccf1de6e 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -38,6 +38,7 @@ def create_server(
     enable_quality_gate: bool = True,
     quality_gate_timeout: int = 120,
     quality_gate_max_attempts: int = 3,
+    external_runner: str | None = None,
 ) -> FastMCP:
     """Create and configure the MCP server.
 
@@ -46,6 +47,9 @@ def create_server(
         enable_quality_gate: Whether to enable quality gate evaluation (default: True)
         quality_gate_timeout: Timeout in seconds for quality gate (default: 120)
         quality_gate_max_attempts: Max attempts before failing quality gate (default: 3)
+        external_runner: External runner for quality gate reviews.
+            "claude" uses Claude CLI subprocess. None means agent self-review
+            via instructions file. (default: None)
 
     Returns:
         Configured FastMCP server instance
@@ -57,14 +61,20 @@ def create_server(
 
     quality_gate: QualityGate | None = None
     if enable_quality_gate:
-        cli = ClaudeCLI(timeout=quality_gate_timeout)
-        quality_gate = QualityGate(cli=cli)
+        if external_runner == "claude":
+            # Claude CLI subprocess mode: embed up to 5 files inline
+            cli = ClaudeCLI(timeout=quality_gate_timeout)
+            quality_gate = QualityGate(cli=cli, max_inline_files=5)
+        else:
+            # Self-review mode: no CLI, always reference files by path (0 inline)
+            quality_gate = QualityGate(cli=None, max_inline_files=0)
 
     tools = WorkflowTools(
         project_root=project_path,
         state_manager=state_manager,
         quality_gate=quality_gate,
         max_quality_attempts=quality_gate_max_attempts,
+        external_runner=external_runner,
     )
 
     # Create MCP server
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 6bc5c046..cddcf80d 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -12,6 +12,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+import aiofiles
+
 from deepwork.core.parser import (
     JobDefinition,
     OutputSpec,
@@ -57,6 +59,7 @@ def __init__(
         state_manager: StateManager,
         quality_gate: QualityGate | None = None,
         max_quality_attempts: int = 3,
+        external_runner: str | None = None,
     ):
         """Initialize workflow tools.
 
@@ -65,12 +68,15 @@ def __init__(
             state_manager: State manager instance
             quality_gate: Optional quality gate for step validation
             max_quality_attempts: Maximum attempts before failing quality gate
+            external_runner: External runner for quality gate reviews.
+                "claude" uses Claude CLI subprocess. None means agent self-review.
         """
         self.project_root = project_root
         self.jobs_dir = project_root / ".deepwork" / "jobs"
         self.state_manager = state_manager
         self.quality_gate = quality_gate
         self.max_quality_attempts = max_quality_attempts
+        self.external_runner = external_runner
 
     def _load_all_jobs(self) -> list[JobDefinition]:
         """Load all job definitions from the jobs directory.
@@ -391,45 +397,89 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             and current_step.reviews
             and not input_data.quality_review_override_reason
         ):
-            attempts = await self.state_manager.record_quality_attempt(
-                current_step_id, session_id=sid
-            )
-
-            # Build output specs map for evaluate_reviews
+            # Build review dicts and output specs used by both paths
+            review_dicts = [
+                {
+                    "run_each": r.run_each,
+                    "quality_criteria": r.quality_criteria,
+                    "additional_review_guidance": r.additional_review_guidance,
+                }
+                for r in current_step.reviews
+            ]
             output_specs = {out.name: out.type for out in current_step.outputs}
 
-            failed_reviews = await self.quality_gate.evaluate_reviews(
-                reviews=[
-                    {
-                        "run_each": r.run_each,
-                        "quality_criteria": r.quality_criteria,
-                        "additional_review_guidance": r.additional_review_guidance,
-                    }
-                    for r in current_step.reviews
-                ],
-                outputs=input_data.outputs,
-                output_specs=output_specs,
-                project_root=self.project_root,
-                notes=input_data.notes,
-            )
+            if self.external_runner is None:
+                # Self-review mode: build instructions file and return guidance
+                # to the agent to verify its own work via a subagent.
+                review_content = await self.quality_gate.build_review_instructions_file(
+                    reviews=review_dicts,
+                    outputs=input_data.outputs,
+                    output_specs=output_specs,
+                    project_root=self.project_root,
+                    notes=input_data.notes,
+                )
 
-            if failed_reviews:
-                # Check max attempts
-                if attempts >= self.max_quality_attempts:
-                    feedback_parts = [r.feedback for r in failed_reviews]
-                    raise ToolError(
-                        f"Quality gate failed after {self.max_quality_attempts} attempts. "
-                        f"Feedback: {'; '.join(feedback_parts)}"
-                    )
+                # Write instructions to .deepwork/tmp/
+                tmp_dir = self.project_root / ".deepwork" / "tmp"
+                tmp_dir.mkdir(parents=True, exist_ok=True)
+                review_filename = f"quality_review_{sid}_{current_step_id}.md"
+                review_file_path = tmp_dir / review_filename
+                async with aiofiles.open(review_file_path, "w", encoding="utf-8") as f:
+                    await f.write(review_content)
+
+                relative_path = f".deepwork/tmp/{review_filename}"
+                feedback = (
+                    f"Quality review required. Review instructions have been written to: "
+                    f"{relative_path}\n\n"
+                    f"Verify the quality of your work:\n"
+                    f'1. Spawn a subagent with the prompt: "Read the file at '
+                    f"{relative_path} and follow the instructions in it. "
+                    f"Review the referenced output files and evaluate them against "
+                    f'the criteria specified. Report your detailed findings."\n'
+                    f"2. Review the subagent's findings\n"
+                    f"3. Fix any issues identified by the subagent\n"
+                    f"4. Repeat steps 1-3 until all criteria pass\n"
+                    f"5. Once all criteria pass, call finished_step again with "
+                    f"quality_review_override_reason set to describe the "
+                    f"review outcome (e.g. 'Self-review passed: all criteria met')"
+                )
 
-                # Return needs_work status
-                combined_feedback = "; ".join(r.feedback for r in failed_reviews)
                 return FinishedStepResponse(
                     status=StepStatus.NEEDS_WORK,
-                    feedback=combined_feedback,
-                    failed_reviews=failed_reviews,
+                    feedback=feedback,
                     stack=self.state_manager.get_stack(),
                 )
+            else:
+                # External runner mode: use quality gate subprocess evaluation
+                attempts = await self.state_manager.record_quality_attempt(
+                    current_step_id, session_id=sid
+                )
+
+                failed_reviews = await self.quality_gate.evaluate_reviews(
+                    reviews=review_dicts,
+                    outputs=input_data.outputs,
+                    output_specs=output_specs,
+                    project_root=self.project_root,
+                    notes=input_data.notes,
+                )
+
+                if failed_reviews:
+                    # Check max attempts
+                    if attempts >= self.max_quality_attempts:
+                        feedback_parts = [r.feedback for r in failed_reviews]
+                        raise ToolError(
+                            f"Quality gate failed after {self.max_quality_attempts} attempts. "
+                            f"Feedback: {'; '.join(feedback_parts)}"
+                        )
+
+                    # Return needs_work status
+                    combined_feedback = "; ".join(r.feedback for r in failed_reviews)
+                    return FinishedStepResponse(
+                        status=StepStatus.NEEDS_WORK,
+                        feedback=combined_feedback,
+                        failed_reviews=failed_reviews,
+                        stack=self.state_manager.get_stack(),
+                    )
 
         # Mark step as completed
         await self.state_manager.complete_step(
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index c6ab3c43..b2c56c6a 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -46,10 +46,11 @@ def output_file(project_root: Path) -> Path:
 class TestQualityGate:
     """Tests for QualityGate class."""
 
-    def test_init_default_cli(self) -> None:
-        """Test QualityGate creates a default ClaudeCLI if none provided."""
+    def test_init_no_cli(self) -> None:
+        """Test QualityGate with no CLI provided has _cli=None and default max_inline_files."""
         gate = QualityGate()
-        assert isinstance(gate._cli, ClaudeCLI)
+        assert gate._cli is None
+        assert gate.max_inline_files == QualityGate.DEFAULT_MAX_INLINE_FILES
 
     def test_init_custom_cli(self, mock_cli: ClaudeCLI) -> None:
         """Test QualityGate uses provided ClaudeCLI."""
@@ -509,7 +510,7 @@ async def test_per_file_review_passes_guidance_to_each(
 
 
 class TestBuildPayloadLargeFileSet:
-    """Tests for _build_payload behavior when file count exceeds MAX_INLINE_FILES."""
+    """Tests for _build_payload behavior when file count exceeds max_inline_files."""
 
     async def test_payload_lists_paths_when_over_threshold(
         self, quality_gate: QualityGate, project_root: Path
@@ -756,3 +757,299 @@ async def test_mock_records_none_guidance_when_omitted(self, project_root: Path)
         )
 
         assert gate.evaluations[0]["additional_review_guidance"] is None
+
+
+class TestConfigurableMaxInlineFiles:
+    """Tests for configurable max_inline_files on QualityGate."""
+
+    def test_default_max_inline_files(self) -> None:
+        """Test QualityGate defaults to DEFAULT_MAX_INLINE_FILES."""
+        gate = QualityGate()
+        assert gate.max_inline_files == QualityGate.DEFAULT_MAX_INLINE_FILES
+        assert gate.max_inline_files == 5
+
+    def test_custom_max_inline_files(self) -> None:
+        """Test QualityGate respects explicit max_inline_files."""
+        gate = QualityGate(max_inline_files=10)
+        assert gate.max_inline_files == 10
+
+    def test_zero_max_inline_files(self) -> None:
+        """Test QualityGate with max_inline_files=0 always lists paths."""
+        gate = QualityGate(max_inline_files=0)
+        assert gate.max_inline_files == 0
+
+    def test_max_inline_files_none_uses_default(self) -> None:
+        """Test that passing None explicitly uses the default."""
+        gate = QualityGate(max_inline_files=None)
+        assert gate.max_inline_files == QualityGate.DEFAULT_MAX_INLINE_FILES
+
+    def test_cli_and_max_inline_files_independent(self, mock_cli: ClaudeCLI) -> None:
+        """Test that cli and max_inline_files are independent parameters."""
+        gate = QualityGate(cli=mock_cli, max_inline_files=3)
+        assert gate._cli is mock_cli
+        assert gate.max_inline_files == 3
+
+    async def test_zero_max_inline_always_lists_paths(self, project_root: Path) -> None:
+        """Test that max_inline_files=0 uses path listing even for 1 file."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "single.md").write_text("Single file content")
+
+        payload = await gate._build_payload(
+            outputs={"report": "single.md"},
+            project_root=project_root,
+        )
+
+        assert "too many to include inline" in payload
+        assert "single.md" in payload
+        assert "Single file content" not in payload
+
+    async def test_high_max_inline_embeds_many_files(self, project_root: Path) -> None:
+        """Test that a high max_inline_files embeds content for many files."""
+        gate = QualityGate(max_inline_files=100)
+        for i in range(10):
+            (project_root / f"f{i}.md").write_text(f"Embedded content {i}")
+
+        payload = await gate._build_payload(
+            outputs={"files": [f"f{i}.md" for i in range(10)]},
+            project_root=project_root,
+        )
+
+        assert "too many to include inline" not in payload
+        for i in range(10):
+            assert f"Embedded content {i}" in payload
+
+
+class TestEvaluateWithoutCli:
+    """Tests that evaluate() raises when no CLI is configured."""
+
+    async def test_evaluate_raises_without_cli(self, project_root: Path) -> None:
+        """Test that evaluate raises QualityGateError when _cli is None."""
+        gate = QualityGate(cli=None)
+        (project_root / "output.md").write_text("content")
+
+        with pytest.raises(QualityGateError, match="Cannot evaluate.*without a CLI runner"):
+            await gate.evaluate(
+                quality_criteria={"Valid": "Is it valid?"},
+                outputs={"report": "output.md"},
+                project_root=project_root,
+            )
+
+    async def test_evaluate_no_criteria_still_passes_without_cli(self, project_root: Path) -> None:
+        """Test that empty criteria auto-passes even without CLI."""
+        gate = QualityGate(cli=None)
+
+        result = await gate.evaluate(
+            quality_criteria={},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert result.passed is True
+        assert "auto-passing" in result.feedback.lower()
+
+
+class TestBuildReviewInstructionsFile:
+    """Tests for QualityGate.build_review_instructions_file method."""
+
+    async def test_basic_structure(self, project_root: Path) -> None:
+        """Test that the instructions file has the expected structure."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "output.md").write_text("content")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "# Quality Review Instructions" in content
+        assert "editor" in content.lower()
+        assert "BEGIN OUTPUTS" in content
+        assert "END OUTPUTS" in content
+        assert "Complete" in content
+        assert "Is it complete?" in content
+        assert "## Guidelines" in content
+        assert "## Your Task" in content
+        assert "PASS" in content
+        assert "FAIL" in content
+
+    async def test_contains_all_criteria(self, project_root: Path) -> None:
+        """Test that all criteria from all reviews appear in the file."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {
+                        "Accuracy": "Are the facts correct?",
+                        "Completeness": "Is all data present?",
+                    },
+                }
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "**Accuracy**" in content
+        assert "Are the facts correct?" in content
+        assert "**Completeness**" in content
+        assert "Is all data present?" in content
+
+    async def test_multiple_reviews_numbered(self, project_root: Path) -> None:
+        """Test that multiple reviews get numbered sections."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"First": "First check?"},
+                },
+                {
+                    "run_each": "report",
+                    "quality_criteria": {"Second": "Second check?"},
+                },
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "## Review 1" in content
+        assert "## Review 2" in content
+        assert "scope: all outputs together" in content
+        assert "scope: output 'report'" in content
+
+    async def test_single_review_not_numbered(self, project_root: Path) -> None:
+        """Test that a single review uses 'Criteria to Evaluate' heading."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Only": "Only check?"},
+                }
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "## Criteria to Evaluate" in content
+        assert "Review 1" not in content
+
+    async def test_includes_author_notes(self, project_root: Path) -> None:
+        """Test that notes are included when provided."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Check": "Is it ok?"},
+                }
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+            notes="I focused on section 3 the most.",
+        )
+
+        assert "## Author Notes" in content
+        assert "I focused on section 3 the most." in content
+
+    async def test_excludes_notes_when_none(self, project_root: Path) -> None:
+        """Test that notes section is absent when not provided."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Check": "Is it ok?"},
+                }
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "## Author Notes" not in content
+
+    async def test_includes_guidance(self, project_root: Path) -> None:
+        """Test that additional_review_guidance is included."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "out.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Check": "Is it ok?"},
+                    "additional_review_guidance": "Also read config.yml for context.",
+                }
+            ],
+            outputs={"report": "out.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "### Additional Context" in content
+        assert "Also read config.yml for context." in content
+
+    async def test_per_file_review_lists_files(self, project_root: Path) -> None:
+        """Test that per-file reviews list each file to evaluate."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "a.md").write_text("x")
+        (project_root / "b.md").write_text("x")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "pages",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"pages": ["a.md", "b.md"]},
+            output_specs={"pages": "files"},
+            project_root=project_root,
+        )
+
+        assert "each file" in content.lower()
+        assert "a.md" in content
+        assert "b.md" in content
+
+    async def test_output_paths_listed_not_inlined_at_zero(self, project_root: Path) -> None:
+        """Test that with max_inline_files=0, file contents are NOT embedded."""
+        gate = QualityGate(max_inline_files=0)
+        (project_root / "report.md").write_text("SECRET_CONTENT_MARKER")
+
+        content = await gate.build_review_instructions_file(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Check": "Is it ok?"},
+                }
+            ],
+            outputs={"report": "report.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        assert "report.md" in content
+        assert "SECRET_CONTENT_MARKER" not in content
+        assert "too many to include inline" in content
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index f5b7adce..2cdd26c5 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -104,6 +104,7 @@ def tools_with_quality(project_root: Path, state_manager: StateManager) -> Workf
         project_root=project_root,
         state_manager=state_manager,
         quality_gate=MockQualityGate(should_pass=True),
+        external_runner="claude",
     )
 
 
@@ -347,6 +348,7 @@ async def test_finished_step_with_quality_gate_fail(
             project_root=project_root,
             state_manager=state_manager,
             quality_gate=MockQualityGate(should_pass=False, feedback="Needs improvement"),
+            external_runner="claude",
         )
 
         # Start workflow
@@ -375,6 +377,7 @@ async def test_finished_step_quality_gate_max_attempts(
             project_root=project_root,
             state_manager=state_manager,
             quality_gate=MockQualityGate(should_pass=False, feedback="Always fails"),
+            external_runner="claude",
         )
 
         # Start workflow
@@ -409,6 +412,7 @@ async def test_finished_step_quality_gate_override(
             project_root=project_root,
             state_manager=state_manager,
             quality_gate=failing_gate,
+            external_runner="claude",
         )
 
         # Start workflow
@@ -1046,6 +1050,7 @@ async def test_quality_reviewer_receives_only_current_step_outputs(
             project_root=project_root,
             state_manager=state_manager,
             quality_gate=mock_gate,
+            external_runner="claude",
         )
 
         # Start workflow
@@ -1129,6 +1134,7 @@ async def test_additional_review_guidance_reaches_reviewer(
             project_root=project_root,
             state_manager=state_manager,
             quality_gate=mock_gate,
+            external_runner="claude",
         )
 
         await tools.start_workflow(
@@ -1378,3 +1384,337 @@ async def test_abort_workflow_with_session_id(
         assert tools.state_manager.get_stack_depth() == 1
         assert tools.state_manager.get_active_session() is not None
         assert tools.state_manager.get_active_session().session_id == session_b_id
+
+
+class TestExternalRunnerSelfReview:
+    """Tests for self-review mode (external_runner=None) in finished_step."""
+
+    @pytest.fixture
+    def tools_self_review(self, project_root: Path, state_manager: StateManager) -> WorkflowTools:
+        """Create WorkflowTools with quality gate but no external runner (self-review mode)."""
+        from deepwork.mcp.quality_gate import QualityGate
+
+        return WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=QualityGate(cli=None, max_inline_files=0),
+            external_runner=None,
+        )
+
+    async def test_self_review_returns_needs_work(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that self-review mode returns NEEDS_WORK with instructions."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("Some output")
+
+        response = await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.failed_reviews is None  # No actual review results yet
+
+    async def test_self_review_feedback_contains_instructions(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that feedback contains subagent and override instructions."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("Some output")
+
+        response = await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        assert "Quality review required" in response.feedback
+        assert "subagent" in response.feedback.lower()
+        assert "quality_review_override_reason" in response.feedback
+        assert ".deepwork/tmp/quality_review_" in response.feedback
+
+    async def test_self_review_writes_instructions_file(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that an instructions file is written to .deepwork/tmp/."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("Some output")
+
+        await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        review_files = list((project_root / ".deepwork" / "tmp").glob("quality_review_*.md"))
+        assert len(review_files) == 1
+
+    async def test_self_review_file_contains_criteria(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that the instructions file contains the quality criteria from the job."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("Some output")
+
+        await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        review_files = list((project_root / ".deepwork" / "tmp").glob("quality_review_*.md"))
+        content = review_files[0].read_text()
+
+        # step1 has review criteria "Output Valid": "Is the output valid?"
+        assert "Output Valid" in content
+        assert "Is the output valid?" in content
+
+    async def test_self_review_file_references_outputs_not_inline(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that the instructions file lists output paths, not inline content."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("UNIQUE_CONTENT_MARKER_12345")
+
+        await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        review_files = list((project_root / ".deepwork" / "tmp").glob("quality_review_*.md"))
+        content = review_files[0].read_text()
+
+        assert "output1.md" in content
+        assert "UNIQUE_CONTENT_MARKER_12345" not in content
+
+    async def test_self_review_file_named_with_session_and_step(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that review file name includes session and step IDs."""
+        resp = await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        session_id = resp.begin_step.session_id
+        (project_root / "output1.md").write_text("output")
+
+        await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        expected_file = project_root / ".deepwork" / "tmp" / f"quality_review_{session_id}_step1.md"
+        assert expected_file.exists()
+
+    async def test_self_review_then_override_completes_workflow(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that calling finished_step with override after self-review advances the workflow."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        # First call: self-review
+        resp1 = await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+        assert resp1.status == StepStatus.NEEDS_WORK
+
+        # Second call: override, should advance to step2
+        resp2 = await tools_self_review.finished_step(
+            FinishedStepInput(
+                outputs={"output1.md": "output1.md"},
+                quality_review_override_reason="Self-review passed: all criteria met",
+            )
+        )
+        assert resp2.status == StepStatus.NEXT_STEP
+        assert resp2.begin_step.step_id == "step2"
+
+    async def test_self_review_skipped_for_steps_without_reviews(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that steps without reviews skip self-review entirely."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        # Override step1 to advance to step2 (which has no reviews)
+        await tools_self_review.finished_step(
+            FinishedStepInput(
+                outputs={"output1.md": "output1.md"},
+                quality_review_override_reason="Skip",
+            )
+        )
+
+        # step2 has no reviews, so it should complete without self-review
+        (project_root / "output2.md").write_text("step2 output")
+        resp = await tools_self_review.finished_step(
+            FinishedStepInput(outputs={"output2.md": "output2.md"})
+        )
+        assert resp.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_self_review_includes_notes_in_file(
+        self, tools_self_review: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test that agent notes are included in the review instructions file."""
+        await tools_self_review.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        await tools_self_review.finished_step(
+            FinishedStepInput(
+                outputs={"output1.md": "output1.md"},
+                notes="I used the XYZ library for this step.",
+            )
+        )
+
+        review_files = list((project_root / ".deepwork" / "tmp").glob("quality_review_*.md"))
+        content = review_files[0].read_text()
+        assert "I used the XYZ library for this step." in content
+
+
+class TestExternalRunnerClaude:
+    """Tests that external_runner='claude' uses subprocess evaluation (existing behavior)."""
+
+    async def test_claude_runner_calls_quality_gate_evaluate(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that claude runner mode invokes evaluate_reviews on the quality gate."""
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+            external_runner="claude",
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        # Should have called evaluate_reviews and advanced
+        assert response.status == StepStatus.NEXT_STEP
+        assert len(mock_gate.evaluations) > 0
+
+    async def test_claude_runner_does_not_write_instructions_file(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that claude runner mode does NOT write an instructions file."""
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+            external_runner="claude",
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
+
+        review_files = list((project_root / ".deepwork" / "tmp").glob("quality_review_*.md"))
+        assert len(review_files) == 0
+
+    async def test_claude_runner_failing_gate_returns_feedback(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that claude runner with failing gate returns NEEDS_WORK with review feedback."""
+        mock_gate = MockQualityGate(should_pass=False, feedback="Missing detail in section 2")
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+            external_runner="claude",
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.feedback == "Missing detail in section 2"
+        assert response.failed_reviews is not None
+        assert len(response.failed_reviews) == 1
+
+    async def test_claude_runner_records_quality_attempts(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that claude runner mode tracks quality attempt count."""
+        mock_gate = MockQualityGate(should_pass=False, feedback="Fail")
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+            external_runner="claude",
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(goal="Test", job_name="test_job", workflow_name="main")
+        )
+        (project_root / "output1.md").write_text("output")
+
+        # First two attempts: NEEDS_WORK
+        for _ in range(2):
+            resp = await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "output1.md"})
+            )
+            assert resp.status == StepStatus.NEEDS_WORK
+
+        # Third attempt: raises ToolError
+        with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
+
+
+class TestExternalRunnerInit:
+    """Tests for external_runner parameter on WorkflowTools initialization."""
+
+    def test_default_external_runner_is_none(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that external_runner defaults to None."""
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+        assert tools.external_runner is None
+
+    def test_explicit_external_runner(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that external_runner is stored correctly."""
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            external_runner="claude",
+        )
+        assert tools.external_runner == "claude"
+
+    def test_no_quality_gate_no_external_runner_skips_review(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that without quality gate, external_runner is irrelevant."""
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=None,
+            external_runner=None,
+        )
+        assert tools.quality_gate is None
+        assert tools.external_runner is None
diff --git a/tests/unit/test_adapters.py b/tests/unit/test_adapters.py
new file mode 100644
index 00000000..49c1e5bd
--- /dev/null
+++ b/tests/unit/test_adapters.py
@@ -0,0 +1,112 @@
+"""Tests for adapter MCP server registration."""
+
+import json
+from pathlib import Path
+
+import pytest
+
+from deepwork.core.adapters import ClaudeAdapter
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root with .claude dir."""
+    claude_dir = tmp_path / ".claude"
+    claude_dir.mkdir()
+    return tmp_path
+
+
+class TestClaudeAdapterMCPRegistration:
+    """Tests for ClaudeAdapter.register_mcp_server."""
+
+    def test_register_creates_mcp_json(self, project_root: Path) -> None:
+        """Test that register_mcp_server creates .mcp.json."""
+        adapter = ClaudeAdapter(project_root=project_root)
+        result = adapter.register_mcp_server(project_root)
+
+        assert result is True
+        assert (project_root / ".mcp.json").exists()
+
+    def test_register_includes_external_runner_claude(self, project_root: Path) -> None:
+        """Test that .mcp.json args include --external-runner claude."""
+        adapter = ClaudeAdapter(project_root=project_root)
+        adapter.register_mcp_server(project_root)
+
+        config = json.loads((project_root / ".mcp.json").read_text())
+        args = config["mcpServers"]["deepwork"]["args"]
+
+        assert "--external-runner" in args
+        idx = args.index("--external-runner")
+        assert args[idx + 1] == "claude"
+
+    def test_register_full_args(self, project_root: Path) -> None:
+        """Test the complete args list in .mcp.json."""
+        adapter = ClaudeAdapter(project_root=project_root)
+        adapter.register_mcp_server(project_root)
+
+        config = json.loads((project_root / ".mcp.json").read_text())
+        args = config["mcpServers"]["deepwork"]["args"]
+
+        assert args == ["serve", "--path", ".", "--external-runner", "claude"]
+
+    def test_register_command_is_deepwork(self, project_root: Path) -> None:
+        """Test that the command in .mcp.json is 'deepwork'."""
+        adapter = ClaudeAdapter(project_root=project_root)
+        adapter.register_mcp_server(project_root)
+
+        config = json.loads((project_root / ".mcp.json").read_text())
+        assert config["mcpServers"]["deepwork"]["command"] == "deepwork"
+
+    def test_register_is_idempotent(self, project_root: Path) -> None:
+        """Test that registering twice with same config returns False."""
+        adapter = ClaudeAdapter(project_root=project_root)
+
+        assert adapter.register_mcp_server(project_root) is True
+        assert adapter.register_mcp_server(project_root) is False
+
+    def test_register_updates_old_config(self, project_root: Path) -> None:
+        """Test that registering updates an existing .mcp.json with old args."""
+        # Write an old-style config without --external-runner
+        old_config = {
+            "mcpServers": {
+                "deepwork": {
+                    "command": "deepwork",
+                    "args": ["serve", "--path", "."],
+                }
+            }
+        }
+        (project_root / ".mcp.json").write_text(json.dumps(old_config))
+
+        adapter = ClaudeAdapter(project_root=project_root)
+        result = adapter.register_mcp_server(project_root)
+
+        # Should detect the difference and update
+        assert result is True
+        config = json.loads((project_root / ".mcp.json").read_text())
+        assert config["mcpServers"]["deepwork"]["args"] == [
+            "serve",
+            "--path",
+            ".",
+            "--external-runner",
+            "claude",
+        ]
+
+    def test_register_preserves_other_servers(self, project_root: Path) -> None:
+        """Test that registering deepwork preserves other MCP servers."""
+        existing_config = {
+            "mcpServers": {
+                "other_server": {
+                    "command": "other",
+                    "args": ["--flag"],
+                }
+            }
+        }
+        (project_root / ".mcp.json").write_text(json.dumps(existing_config))
+
+        adapter = ClaudeAdapter(project_root=project_root)
+        adapter.register_mcp_server(project_root)
+
+        config = json.loads((project_root / ".mcp.json").read_text())
+        assert "other_server" in config["mcpServers"]
+        assert config["mcpServers"]["other_server"]["command"] == "other"
+        assert "deepwork" in config["mcpServers"]
diff --git a/tests/unit/test_serve_cli.py b/tests/unit/test_serve_cli.py
new file mode 100644
index 00000000..651ea89d
--- /dev/null
+++ b/tests/unit/test_serve_cli.py
@@ -0,0 +1,62 @@
+"""Tests for serve CLI command --external-runner option."""
+
+from unittest.mock import MagicMock, patch
+
+from click.testing import CliRunner
+
+from deepwork.cli.serve import serve
+
+
+class TestServeExternalRunnerOption:
+    """Tests for --external-runner CLI option on the serve command."""
+
+    @patch("deepwork.cli.serve._serve_mcp")
+    @patch("deepwork.cli.serve._load_config")
+    def test_default_external_runner_is_none(
+        self, mock_load: MagicMock, mock_serve: MagicMock, tmp_path: str
+    ) -> None:
+        """Test that --external-runner defaults to None when not specified."""
+        runner = CliRunner()
+        with runner.isolated_filesystem(temp_dir=tmp_path) as td:
+            result = runner.invoke(serve, ["--path", td])
+            if result.exit_code != 0 and result.exception:
+                raise result.exception
+
+        # _serve_mcp should be called with external_runner=None
+        mock_serve.assert_called_once()
+        call_args = mock_serve.call_args
+        assert call_args[0][4] is None or call_args.kwargs.get("external_runner") is None
+
+    @patch("deepwork.cli.serve._serve_mcp")
+    @patch("deepwork.cli.serve._load_config")
+    def test_external_runner_claude(
+        self, mock_load: MagicMock, mock_serve: MagicMock, tmp_path: str
+    ) -> None:
+        """Test that --external-runner claude passes 'claude' through."""
+        runner = CliRunner()
+        with runner.isolated_filesystem(temp_dir=tmp_path) as td:
+            result = runner.invoke(serve, ["--path", td, "--external-runner", "claude"])
+            if result.exit_code != 0 and result.exception:
+                raise result.exception
+
+        mock_serve.assert_called_once()
+        # external_runner is the 5th positional arg (index 4)
+        call_args = mock_serve.call_args[0]
+        assert call_args[4] == "claude"
+
+    def test_external_runner_invalid_choice(self, tmp_path: str) -> None:
+        """Test that invalid --external-runner values are rejected."""
+        runner = CliRunner()
+        result = runner.invoke(serve, ["--path", ".", "--external-runner", "invalid"])
+
+        assert result.exit_code != 0
+        assert "Invalid value" in result.output or "invalid" in result.output.lower()
+
+    def test_help_shows_external_runner(self) -> None:
+        """Test that --help shows the --external-runner option."""
+        runner = CliRunner()
+        result = runner.invoke(serve, ["--help"])
+
+        assert result.exit_code == 0
+        assert "--external-runner" in result.output
+        assert "claude" in result.output