diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml
index 77ad0f4a5..f8694702f 100644
--- a/.github/workflows/docs-publish.yml
+++ b/.github/workflows/docs-publish.yml
@@ -105,12 +105,31 @@ jobs:
         id: audit_coverage
         run: |
           set -o pipefail
-          uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 --quality 2>&1 \
+          uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 2>&1 \
             | tee /tmp/audit_coverage.log
         continue-on-error: ${{ inputs.strict_validation != true }}
 
+      - name: Docstring quality gate
+        id: quality_gate
+        run: |
+          set -o pipefail
+          uv run python tooling/docs-autogen/audit_coverage.py \
+            --docs-dir docs/docs/api \
+            --quality --fail-on-quality --threshold 100 \
+            --orphans \
+            --output /tmp/quality_report.json 2>&1 \
+            | tee /tmp/quality_gate.log
+
       # -- Upload artifact for deploy job --------------------------------------
 
+      - name: Upload quality report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: docstring-quality-report
+          path: /tmp/quality_report.json
+          retention-days: 30
+
       - name: Upload docs artifact
         if: success() || (inputs.strict_validation != true)
         uses: actions/upload-artifact@v7
@@ -141,12 +160,11 @@ jobs:
           markdownlint_outcome = "${{ steps.markdownlint.outcome }}"
           validate_outcome = "${{ steps.validate_mdx.outcome }}"
           coverage_outcome = "${{ steps.audit_coverage.outcome }}"
-          strict = "${{ inputs.strict_validation }}" == "true"
-          mode = "" if strict else " *(soft-fail)*"
-
+          quality_gate_outcome = "${{ steps.quality_gate.outcome }}"
           lint_log = read_log("/tmp/markdownlint.log")
           validate_log = read_log("/tmp/validate_mdx.log")
           coverage_log = read_log("/tmp/audit_coverage.log")
+          quality_gate_log = read_log("/tmp/quality_gate.log")
 
           # Count markdownlint issues (lines matching file:line:col format)
           lint_issues = len([l for l in lint_log.splitlines() if re.match(r'.+:\d+:\d+ ', l)])
@@ -186,45 +204,81 @@ jobs:
 
           mdx_detail = parse_validate_detail(validate_log)
 
-          # Docstring quality annotation emitted by audit_coverage.py into the log
-          # Format: ::notice title=Docstring quality::message
-          #      or ::warning title=Docstring quality::message
-          quality_match = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", coverage_log)
-          if quality_match:
-              quality_level, quality_msg = quality_match.group(1), quality_match.group(2)
-              quality_icon = "✅" if quality_level == "notice" else "⚠️"
-              quality_status = "pass" if quality_level == "notice" else "warning"
-              quality_detail = re.sub(r"\s*—\s*see job summary.*$", "", quality_msg)
-              quality_row = f"| Docstring Quality | {quality_icon} {quality_status}{mode} | {quality_detail} |"
+          # Parse per-kind counts from the quality gate log.
+          # _print_quality_report emits section headers like:
+          #   "  Missing docstrings (12)"
+          #   "  Missing Args section (5)"
+          # Capture label -> count from those lines, then build a compact
+          # per-kind breakdown for the summary table cell.
+          kind_short = {
+              "Missing docstrings":       "missing",
+              "Short docstrings":         "short",
+              "Missing Args section":     "no_args",
+              "Missing Returns section":  "no_returns",
+              "Missing Yields section (generator)": "no_yields",
+              "Missing Raises section":   "no_raises",
+              "Missing class Args section": "no_class_args",
+              "Duplicate Args: in class + __init__ (Option C violation)": "dup_init_args",
+              "Param name mismatches (documented but not in signature)": "param_mismatch",
+              "TypedDict phantom fields (documented but not declared)": "td_phantom",
+              "TypedDict undocumented fields (declared but missing from Attributes:)": "td_undoc",
+              "Missing parameter type annotations (type absent from API docs)": "missing_param_type",
+              "Missing return type annotations (type absent from API docs)": "missing_return_type",
+              "Param type mismatch (docstring vs annotation)": "param_type_mismatch",
+              "Return type mismatch (docstring vs annotation)": "return_type_mismatch",
+          }
+          section_re = re.compile(r"^\s{2}(.+?)\s+\((\d+)\)\s*$", re.MULTILINE)
+          kind_counts = {}
+          for m in section_re.finditer(quality_gate_log):
+              label, count = m.group(1), int(m.group(2))
+              short = kind_short.get(label)
+              if short:
+                  kind_counts[short] = count
+
+          if kind_counts:
+              parts = [f"{v} {k}" for k, v in kind_counts.items()]
+              quality_gate_detail = ", ".join(parts)
           else:
-              quality_row = None
+              # Fall back to the summary annotation message
+              qm = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", quality_gate_log)
+              quality_gate_detail = re.sub(r"\s*—\s*see job summary.*$", "", qm.group(2)) if qm else ""
 
-          # Split coverage log at quality section to avoid duplicate output in collapsibles
-          quality_start = coverage_log.find("🔬 Running docstring quality")
-          if quality_start != -1:
-              quality_log = coverage_log[quality_start:]
-              coverage_display_log = coverage_log[:quality_start].strip()
-          else:
-              quality_log = ""
-              coverage_display_log = coverage_log
+          CONTRIB_URL = (
+              "https://github.com/generative-computing/mellea/blob/main"
+              "/docs/docs/guide/CONTRIBUTING.md"
+          )
+          REPO = "${{ github.repository }}"
+          RUN_ID = "${{ github.run_id }}"
+          ARTIFACT_URL = f"https://github.com/{REPO}/actions/runs/{RUN_ID}#artifacts"
 
           lines = [
               "## Docs Build — Validation Summary\n",
               "| Check | Result | Details |",
               "|-------|--------|---------|",
-              f"| Markdownlint | {icon(markdownlint_outcome)} {markdownlint_outcome}{mode} | {lint_detail} |",
-              f"| MDX Validation | {icon(validate_outcome)} {validate_outcome}{mode} | {mdx_detail} |",
-              f"| API Coverage | {icon(coverage_outcome)} {coverage_outcome}{mode} | {cov_detail} |",
+              f"| Markdownlint | {icon(markdownlint_outcome)} {markdownlint_outcome} | {lint_detail} |",
+              f"| MDX Validation | {icon(validate_outcome)} {validate_outcome} | {mdx_detail} |",
+              f"| API Coverage | {icon(coverage_outcome)} {coverage_outcome} | {cov_detail} |",
+              f"| Docstring Quality | {icon(quality_gate_outcome)} {quality_gate_outcome} | {quality_gate_detail} |",
           ]
-          if quality_row:
-              lines.append(quality_row)
           lines.append("")
 
+          # When the quality gate fails, surface a direct link to the fix reference.
+          # Per-kind Ref: URLs in the log output are inside a ```text``` block and
+          # don't render as links there.
+          if quality_gate_outcome == "failure":
+              lines += [
+                  "> ❌ **Docstring quality gate failed.**  "
+                  f"See the [CI docstring checks reference]({CONTRIB_URL}#ci-docstring-checks-reference) "
+                  "for per-kind fix instructions, or expand **Docstring quality details** below for the full list.  \n"
+                  f"> The full machine-readable report is available as the [`docstring-quality-report` artifact]({ARTIFACT_URL}).",
+                  "",
+              ]
+
           for title, log, limit in [
               ("Markdownlint output", lint_log, 5_000),
               ("MDX validation output", validate_log, 5_000),
-              ("API coverage output", coverage_display_log, 5_000),
-              ("Docstring quality details", quality_log, 1_000_000),
+              ("API coverage output", coverage_log, 5_000),
+              ("Docstring quality details", quality_gate_log, 1_000_000),
           ]:
               if log:
                   lines += [
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 07c61b7d3..97b91a439 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,17 +49,16 @@ repos:
     hooks:
       - id: docs-mdx-validate
         name: Validate generated MDX docs
-        entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/validate.py docs/docs/api --skip-coverage || true'
+        entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/validate.py docs/docs/api --skip-coverage --warn-only || true'
         language: system
         pass_filenames: false
         files: (docs/docs/.*\.mdx$|tooling/docs-autogen/)
-      # TODO(#616): Move to normal commit flow once docstring quality issues reach 0.
-      # Griffe loads the full package (~10s), so this is manual-only for now to avoid
-      # slowing down every Python commit. Re-enable (remove stages: [manual]) and add
-      # --fail-on-quality once quality issues are resolved.
+      # Docstring quality gate — manual only (CI is the hard gate via docs-publish.yml).
+      # Run locally with: pre-commit run docs-docstring-quality --hook-stage manual
+      # Requires generated API docs (run `uv run python tooling/docs-autogen/build.py` first).
       - id: docs-docstring-quality
-        name: Audit docstring quality (informational)
-        entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --docs-dir docs/docs/api || true'
+        name: Audit docstring quality
+        entry: uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --fail-on-quality --threshold 0 --docs-dir docs/docs/api
         language: system
         pass_filenames: false
         files: (mellea/.*\.py$|cli/.*\.py$)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 44032b476..6d7a204d1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -174,6 +174,25 @@ differs in type or behaviour from the constructor input — for example, when a
 argument is wrapped into a `CBlock`, or when a class-level constant is relevant to
 callers. Pure-echo entries that repeat `Args:` verbatim should be omitted.
 
+**`TypedDict` classes are a special case.** Their fields *are* the entire public
+contract, so when an `Attributes:` section is present it must exactly match the
+declared fields. The audit will flag:
+
+- `typeddict_phantom` — `Attributes:` documents a field that is not declared in the `TypedDict`
+- `typeddict_undocumented` — a declared field is absent from the `Attributes:` section
+
+```python
+class ConstraintResult(TypedDict):
+    """Result of a constraint check.
+
+    Attributes:
+        passed: Whether the constraint was satisfied.
+        reason: Human-readable explanation.
+    """
+    passed: bool
+    reason: str
+```
+
 #### Validating docstrings
 
 Run the coverage and quality audit to check your changes before committing:
@@ -194,6 +213,8 @@ Key checks the audit enforces:
 | `no_args` | Standalone function has params but no `Args:` section |
 | `no_returns` | Function has a non-trivial return annotation but no `Returns:` section |
 | `param_mismatch` | `Args:` documents names not present in the actual signature |
+| `typeddict_phantom` | `TypedDict` `Attributes:` documents a field not declared in the class |
+| `typeddict_undocumented` | `TypedDict` has a declared field absent from its `Attributes:` section |
 
 **IDE hover verification** — open any of these existing classes in VS Code and hover
 over the class name or a constructor call to confirm the hover card shows `Args:` once
diff --git a/cli/alora/intrinsic_uploader.py b/cli/alora/intrinsic_uploader.py
index fe7c52a88..ec159b963 100644
--- a/cli/alora/intrinsic_uploader.py
+++ b/cli/alora/intrinsic_uploader.py
@@ -40,7 +40,7 @@ def upload_intrinsic(
         base_model (str): Base model ID or path (e.g.
             ``"ibm-granite/granite-3.3-2b-instruct"``). Must contain at most
             one ``"/"`` separator.
-        type (Literal["lora", "alora"]): Adapter type, used as the leaf
+        type (Literal['lora', 'alora']): Adapter type, used as the leaf
             directory name in the repository layout.
         io_yaml (str): Path to the ``io.yaml`` configuration file for
             intrinsic input/output processing.
diff --git a/cli/alora/train.py b/cli/alora/train.py
index bda54ae39..212e0c6fe 100644
--- a/cli/alora/train.py
+++ b/cli/alora/train.py
@@ -16,7 +16,14 @@
 import typer
 from datasets import Dataset
 from peft import LoraConfig, get_peft_model
-from transformers import AutoModelForCausalLM, AutoTokenizer, TrainerCallback
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TrainerCallback,
+    TrainerControl,
+    TrainerState,
+    TrainingArguments,
+)
 from trl import DataCollatorForCompletionOnlyLM, SFTConfig, SFTTrainer
 
 # Handle MPS with old PyTorch versions on macOS only
@@ -39,7 +46,9 @@
             )
 
 
-def load_dataset_from_json(json_path, tokenizer, invocation_prompt):
+def load_dataset_from_json(
+    json_path: str, tokenizer: AutoTokenizer, invocation_prompt: str
+) -> Dataset:
     """Load a JSONL dataset and format it for SFT training.
 
     Reads ``item``/``label`` pairs from a JSONL file and builds a HuggingFace
@@ -73,7 +82,7 @@ def load_dataset_from_json(json_path, tokenizer, invocation_prompt):
     return Dataset.from_dict({"input": inputs, "target": targets})
 
 
-def formatting_prompts_func(example):
+def formatting_prompts_func(example: dict) -> list[str]:
     """Concatenate input and target columns for SFT prompt formatting.
 
     Args:
@@ -101,7 +110,13 @@ class SaveBestModelCallback(TrainerCallback):
     def __init__(self):
         self.best_eval_loss = float("inf")
 
-    def on_evaluate(self, args, state, control, **kwargs):
+    def on_evaluate(
+        self,
+        args: TrainingArguments,
+        state: TrainerState,
+        control: TrainerControl,
+        **kwargs,
+    ):
         """Save the adapter weights if the current evaluation loss is a new best.
 
         Called automatically by the HuggingFace Trainer after each evaluation
diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py
index a7d35e834..63b043f61 100644
--- a/cli/decompose/decompose.py
+++ b/cli/decompose/decompose.py
@@ -49,6 +49,19 @@ class DecompVersion(StrEnum):
 def reorder_subtasks(
     subtasks: list[DecompSubtasksResult],
 ) -> list[DecompSubtasksResult]:
+    """Topologically sort subtasks by their ``depends_on`` relationships.
+
+    Args:
+        subtasks: List of subtask dicts, each with a ``"tag"`` and optional
+            ``"depends_on"`` field.
+
+    Returns:
+        list[DecompSubtasksResult]: The subtasks reordered so that dependencies
+        come before dependents, with numbering prefixes updated.
+
+    Raises:
+        ValueError: If a circular dependency is detected.
+    """
     subtask_map = {subtask["tag"].lower(): subtask for subtask in subtasks}
 
     graph = {}
@@ -78,6 +91,19 @@ def reorder_subtasks(
 def verify_user_variables(
     decomp_data: DecompPipelineResult, input_var: list[str] | None
 ) -> DecompPipelineResult:
+    """Validate that all required input variables and dependencies exist.
+
+    Args:
+        decomp_data: The decomposition pipeline result containing subtasks.
+        input_var: User-provided input variable names, or ``None`` for none.
+
+    Returns:
+        DecompPipelineResult: The (possibly reordered) decomposition data.
+
+    Raises:
+        ValueError: If a subtask requires an input variable that was not
+            provided, or depends on a subtask tag that does not exist.
+    """
     if input_var is None:
         input_var = []
 
diff --git a/cli/eval/runner.py b/cli/eval/runner.py
index 9fdf55ff3..a456df0d6 100644
--- a/cli/eval/runner.py
+++ b/cli/eval/runner.py
@@ -49,7 +49,7 @@ def __init__(
         self.score = score
         self.validation_reason = validation_reason
 
-    def to_dict(self):
+    def to_dict(self) -> dict:
         """Serialise the input evaluation result to a plain dictionary.
 
         Returns:
@@ -84,7 +84,7 @@ def __init__(self, test_eval: TestBasedEval, input_results: list[InputEvalResult
         self.test_eval = test_eval
         self.input_results = input_results
 
-    def to_dict(self):
+    def to_dict(self) -> dict:
         """Serialise the test evaluation result to a plain dictionary.
 
         Returns:
@@ -366,7 +366,7 @@ def execute_test_eval(
     return test_result
 
 
-def parse_judge_output(judge_output: str):
+def parse_judge_output(judge_output: str) -> tuple[int | None, str]:
     """Parse score and justification from a judge model's output string.
 
     Args:
diff --git a/docs/docs/docs.json b/docs/docs/docs.json
index 8fb65bf95..c9516bc1b 100644
--- a/docs/docs/docs.json
+++ b/docs/docs/docs.json
@@ -161,25 +161,14 @@
                 "pages": [
                   "api/mellea/backends/backend",
                   "api/mellea/backends/backends",
-                  "api/mellea/backends/bedrock",
                   "api/mellea/backends/cache",
-                  "api/mellea/backends/dummy",
-                  "api/mellea/backends/huggingface",
-                  "api/mellea/backends/kv_block_helpers",
-                  "api/mellea/backends/litellm",
                   "api/mellea/backends/model_ids",
                   "api/mellea/backends/model_options",
-                  "api/mellea/backends/ollama",
-                  "api/mellea/backends/openai",
                   "api/mellea/backends/tools",
-                  "api/mellea/backends/utils",
-                  "api/mellea/backends/vllm",
-                  "api/mellea/backends/watsonx",
                   {
                     "group": "adapters",
                     "pages": [
-                      "api/mellea/backends/adapters/adapter",
-                      "api/mellea/backends/adapters/catalog"
+                      "api/mellea/backends/adapters/adapter"
                     ]
                   }
                 ]
@@ -218,9 +207,6 @@
                       {
                         "group": "granite3",
                         "pages": [
-                          "api/mellea/formatters/granite/granite3/constants",
-                          "api/mellea/formatters/granite/granite3/input",
-                          "api/mellea/formatters/granite/granite3/output",
                           "api/mellea/formatters/granite/granite3/types",
                           {
                             "group": "granite32",
@@ -244,18 +230,9 @@
                         "group": "intrinsics",
                         "pages": [
                           "api/mellea/formatters/granite/intrinsics/input",
-                          "api/mellea/formatters/granite/intrinsics/json_util",
                           "api/mellea/formatters/granite/intrinsics/output",
                           "api/mellea/formatters/granite/intrinsics/util"
                         ]
-                      },
-                      {
-                        "group": "retrievers",
-                        "pages": [
-                          "api/mellea/formatters/granite/retrievers/elasticsearch",
-                          "api/mellea/formatters/granite/retrievers/embeddings",
-                          "api/mellea/formatters/granite/retrievers/util"
-                        ]
                       }
                     ]
                   }
@@ -274,9 +251,7 @@
                 "group": "plugins",
                 "pages": [
                   "api/mellea/plugins/base",
-                  "api/mellea/plugins/context",
                   "api/mellea/plugins/decorators",
-                  "api/mellea/plugins/manager",
                   "api/mellea/plugins/plugins",
                   "api/mellea/plugins/pluginset",
                   "api/mellea/plugins/registry",
@@ -305,25 +280,20 @@
                     "group": "components",
                     "pages": [
                       "api/mellea/stdlib/components/chat",
-                      "api/mellea/stdlib/components/genslot",
                       "api/mellea/stdlib/components/instruction",
                       "api/mellea/stdlib/components/mify",
                       "api/mellea/stdlib/components/mobject",
-                      "api/mellea/stdlib/components/react",
                       "api/mellea/stdlib/components/simple",
-                      "api/mellea/stdlib/components/unit_test_eval",
                       {
                         "group": "docs",
                         "pages": [
-                          "api/mellea/stdlib/components/docs/document",
-                          "api/mellea/stdlib/components/docs/richdocument"
+                          "api/mellea/stdlib/components/docs/document"
                         ]
                       },
                       {
                         "group": "intrinsic",
                         "pages": [
-                          "api/mellea/stdlib/components/intrinsic/intrinsic",
-                          "api/mellea/stdlib/components/intrinsic/rag"
+                          "api/mellea/stdlib/components/intrinsic/intrinsic"
                         ]
                       }
                     ]
@@ -353,8 +323,6 @@
                     "group": "sampling",
                     "pages": [
                       "api/mellea/stdlib/sampling/base",
-                      "api/mellea/stdlib/sampling/budget_forcing",
-                      "api/mellea/stdlib/sampling/majority_voting",
                       "api/mellea/stdlib/sampling/sofai",
                       {
                         "group": "sampling_algos",
@@ -375,7 +343,6 @@
               {
                 "group": "telemetry",
                 "pages": [
-                  "api/mellea/telemetry/backend_instrumentation",
                   "api/mellea/telemetry/logging",
                   "api/mellea/telemetry/metrics",
                   "api/mellea/telemetry/telemetry",
@@ -401,10 +368,7 @@
               {
                 "group": "decompose",
                 "pages": [
-                  "api/cli/decompose/__init__",
-                  "api/cli/decompose/decompose",
-                  "api/cli/decompose/pipeline",
-                  "api/cli/decompose/utils"
+                  "api/cli/decompose/decompose"
                 ]
               },
               {
diff --git a/docs/docs/guide/CONTRIBUTING.md b/docs/docs/guide/CONTRIBUTING.md
index fd0a58434..de2110b21 100644
--- a/docs/docs/guide/CONTRIBUTING.md
+++ b/docs/docs/guide/CONTRIBUTING.md
@@ -353,8 +353,62 @@ Add `Attributes:` only when a stored value differs in type or behaviour from the
 input (e.g. a `str` wrapped into a `CBlock`, or a class-level constant).
 Pure-echo entries that repeat `Args:` verbatim should be omitted.
 
+**`TypedDict` classes** are a special case — their fields are the entire public contract,
+so when an `Attributes:` section is present it must exactly match the declared fields.
+The CI audit will fail on phantom fields (documented but not declared) and undocumented
+fields (declared but missing from `Attributes:`).
+
 See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the full validation workflow.
 
+### CI docstring checks reference
+
+The `audit_coverage.py --quality` gate (run in CI on every PR) reports the
+following check kinds. If your PR is blocked by this gate, find the check kind
+in the table below, follow the fix instructions, and re-push.
+
+> **Note on PR diff annotations:** GitHub Actions shows inline annotations
+> directly on the changed lines in your PR diff. These are capped at **10 per
+> check category** to ensure every category gets at least one visible marker.
+> If there are more issues than the cap, a `"... and N more"` notice appears in
+> the job log. The **complete list** of issues is always in the full job log
+> (expand the "Docstring quality gate" step) and in the
+> `docstring-quality-report` artifact (JSON) attached to the workflow run.
+
+#### Missing or short docstrings
+
+| Check kind | What it means | How to fix |
+| ---------- | ------------- | ---------- |
+| `missing` | No docstring present on the symbol | Add a Google-style one-line summary sentence |
+| `short` | Docstring has fewer than 5 words | Expand the summary — describe what the function/class does and why |
+
+#### Args, Returns, Yields, and Raises
+
+| Check kind | What it means | How to fix |
+| ---------- | ------------- | ---------- |
+| `no_args` | Function has named parameters but the docstring has no `Args:` section | Add an `Args:` section listing each parameter name and a short description |
+| `no_returns` | Function has a non-`None` return annotation but the docstring has no `Returns:` section | Add a `Returns:` section describing what is returned and when |
+| `no_yields` | Function returns `Generator` / `Iterator` but the docstring has no `Yields:` section | Add a `Yields:` section — generator functions use `Yields:`, not `Returns:` |
+| `no_raises` | Function source contains `raise` but the docstring has no `Raises:` section | Add a `Raises:` section listing each exception type and the condition that triggers it |
+| `missing_param_type` | `Args:` section exists but one or more parameters have no Python type annotation — the type column is absent from the generated API docs | Add a type annotation to each listed parameter in the function signature (e.g. `def f(x: int)`). Only fires when `no_args` is already satisfied; `*args`/`**kwargs` are excluded. |
+| `missing_return_type` | `Returns:` section is documented but the function has no return type annotation — the return type is absent from the generated API docs | Add a return annotation to the function signature (e.g. `-> str`). Only fires when `no_returns` is already satisfied. |
+| `param_type_mismatch` | A parameter's `Args:` entry states an explicit type (e.g. `x (int): …`) that does not match the Python annotation in the function signature | Align the docstring type with the annotation, or vice versa. The check normalises common equivalents (`Optional[X]` ↔ `X \| None`, `List` ↔ `list`, union ordering) before comparing, so only genuine disagreements are flagged. Only fires when both the docstring and the signature have an explicit type. **Note:** Python's AST normalises string literals to single quotes, so `Literal["a", "b"]` in source is read as `Literal['a', 'b']` — use single quotes in docstrings to match. |
+| `return_type_mismatch` | The `Returns:` section has a type prefix (e.g. `Returns: \n    str: …`) that does not match the Python return annotation | Align the docstring return type with the annotation, or vice versa. Same normalisation rules as `param_type_mismatch`. Only fires when both sides have an explicit type. |
+
+#### Class docstrings (Option C)
+
+| Check kind | What it means | How to fix |
+| ---------- | ------------- | ---------- |
+| `no_class_args` | Class `__init__` has typed parameters but the **class** docstring has no `Args:` section | Add `Args:` to the class docstring (not `__init__`) — see Option C convention above |
+| `duplicate_init_args` | `Args:` appears in **both** the class docstring and the `__init__` docstring | Remove `Args:` from the `__init__` docstring; keep it on the class docstring only |
+| `param_mismatch` | `Args:` section documents parameter names that do not exist in the actual signature | Remove or rename the phantom entries so they exactly match the real parameter names |
+
+#### TypedDict classes
+
+| Check kind | What it means | How to fix |
+| ---------- | ------------- | ---------- |
+| `typeddict_phantom` | `Attributes:` section documents field names not declared in the `TypedDict` | Remove the extra entries — every `Attributes:` entry must match a declared field |
+| `typeddict_undocumented` | `TypedDict` has declared fields that are absent from the `Attributes:` section | Add the missing fields — every declared field must appear in `Attributes:` |
+
 ---
 
 ## Local preview
diff --git a/mellea/backends/tools.py b/mellea/backends/tools.py
index 63f818111..0b83a2eac 100644
--- a/mellea/backends/tools.py
+++ b/mellea/backends/tools.py
@@ -70,7 +70,7 @@ def as_json_tool(self) -> dict[str, Any]:
         return self._as_json_tool.copy()
 
     @classmethod
-    def from_langchain(cls, tool: Any):
+    def from_langchain(cls, tool: Any) -> "MelleaTool":
         """Create a MelleaTool from a LangChain tool object.
 
         Args:
@@ -117,7 +117,7 @@ def parameter_remapper(*args, **kwargs):
             ) from e
 
     @classmethod
-    def from_smolagents(cls, tool: Any):
+    def from_smolagents(cls, tool: Any) -> "MelleaTool":
         """Create a Tool from a HuggingFace smolagents tool object.
 
         Args:
@@ -172,7 +172,7 @@ def tool_call(*args, **kwargs):
             ) from e
 
     @classmethod
-    def from_callable(cls, func: Callable, name: str | None = None):
+    def from_callable(cls, func: Callable, name: str | None = None) -> "MelleaTool":
         """Create a MelleaTool from a plain Python callable.
 
         Introspects the callable's signature and docstring to build an
@@ -379,7 +379,7 @@ def json_extraction(text: str) -> Generator[dict, None, None]:
         index = text.find("{", index)
 
 
-def find_func(d) -> tuple[str | None, Mapping | None]:
+def find_func(d: object) -> tuple[str | None, Mapping | None]:
     """Find the first function in a json-like dictionary.
 
     Most llms output tool requests in the form ``...{"name": string, "arguments": {}}...``
diff --git a/mellea/core/sampling.py b/mellea/core/sampling.py
index 0bf75badf..8dc253e1b 100644
--- a/mellea/core/sampling.py
+++ b/mellea/core/sampling.py
@@ -133,5 +133,5 @@ async def sample(
             tool_calls: True if tool calls should be used during this sampling strategy.
 
         Returns:
-            SamplingResult: A result object indicating the success or failure of the sampling process.
+            SamplingResult[S]: A result object indicating the success or failure of the sampling process.
         """
diff --git a/mellea/core/utils.py b/mellea/core/utils.py
index dfeaafa5d..3823a83a6 100644
--- a/mellea/core/utils.py
+++ b/mellea/core/utils.py
@@ -67,7 +67,7 @@ class JsonFormatter(logging.Formatter):
     process ID, thread ID, and (if present) exception information.
     """
 
-    def format(self, record):  # type: ignore
+    def format(self, record: logging.LogRecord) -> dict:  # type: ignore[override]
         """Formats a log record as a JSON-serialisable dictionary.
 
         Includes timestamp, level, message, module, function name, line number,
@@ -75,6 +75,10 @@ def format(self, record):  # type: ignore
 
         Args:
             record (logging.LogRecord): The log record to format.
+
+        Returns:
+            dict: A dictionary containing timestamp, level, message, module, function,
+            line number, process/thread IDs, and optional exception info.
         """
         log_record = {
             "timestamp": self.formatTime(record, self.datefmt),
diff --git a/mellea/formatters/granite/base/util.py b/mellea/formatters/granite/base/util.py
index 4d02ef9c6..4da107f0a 100644
--- a/mellea/formatters/granite/base/util.py
+++ b/mellea/formatters/granite/base/util.py
@@ -3,6 +3,8 @@
 """Common utility functions for the library and tests."""
 
 # Standard
+from __future__ import annotations
+
 import contextlib
 import itertools
 import json
@@ -10,10 +12,14 @@
 import os
 import re
 import uuid
+from typing import TYPE_CHECKING
 
 # Third Party
 import pydantic
 
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel, PreTrainedTokenizerBase
+
 # First Party
 from .types import ChatCompletionResponse, ChatCompletionResponseChoice
 
@@ -98,7 +104,7 @@ def random_uuid() -> str:
     return str(uuid.uuid4())
 
 
-def load_transformers_lora(local_or_remote_path):
+def load_transformers_lora(local_or_remote_path: str) -> tuple:
     """Load transformers LoRA model.
 
     AutoModelForCausalLM.from_pretrained() is supposed to auto-load base models if you
@@ -136,7 +142,10 @@ def load_transformers_lora(local_or_remote_path):
 
 
 def chat_completion_request_to_transformers_inputs(
-    request, tokenizer=None, model=None, constrained_decoding_prefix=None
+    request: dict,
+    tokenizer: PreTrainedTokenizerBase | None = None,
+    model: PreTrainedModel | None = None,
+    constrained_decoding_prefix: str | None = None,
 ) -> tuple[dict, dict]:
     """Translate an OpenAI-style chat completion request.
 
@@ -191,7 +200,7 @@ def chat_completion_request_to_transformers_inputs(
     ):
         tokenizer_input["documents"] = request["extra_body"]["documents"]
 
-    input_tokens = tokenizer.apply_chat_template(**tokenizer_input, return_tensors="pt")
+    input_tokens = tokenizer.apply_chat_template(**tokenizer_input, return_tensors="pt")  # type: ignore[union-attr]
 
     # Transformers 5 switched the return type of apply_chat_template() from Tensor to
     # BatchEncoding. Adjust our behavior depending on which direction the currently
@@ -208,7 +217,7 @@ def chat_completion_request_to_transformers_inputs(
 
     # generate() will fail with many different creative error messages if tokens aren't
     # on the right device.
-    input_tokens = input_tokens.to(model.device)
+    input_tokens = input_tokens.to(model.device)  # type: ignore[union-attr]
     generate_input["input_tokens"] = input_tokens
 
     # The generate() method sometimes needs to know what is the integer ID
@@ -216,9 +225,9 @@ def chat_completion_request_to_transformers_inputs(
     # isn't included in the serialized model. We get it from the tokenizer.
     # And of course some tokenizers don't set this parameter, in which case
     # we use the end of string token and hope for the best.
-    pad_token_id = tokenizer.pad_token_id
+    pad_token_id = tokenizer.pad_token_id  # type: ignore[union-attr]
     if pad_token_id is None:
-        pad_token_id = tokenizer.eos_token_id
+        pad_token_id = tokenizer.eos_token_id  # type: ignore[union-attr]
     if pad_token_id is None:
         # Raise an error here because the some branches of the generate
         # method won't complain about an invalid value of this parameter,
@@ -229,7 +238,7 @@ def chat_completion_request_to_transformers_inputs(
 
     # Make sure you specify this parameter explicitly, or you will have
     # a bad time.
-    generate_input["eos_token_id"] = tokenizer.eos_token_id
+    generate_input["eos_token_id"] = tokenizer.eos_token_id  # type: ignore[union-attr]
 
     other_input = {}
 
@@ -316,7 +325,10 @@ def chat_completion_request_to_transformers_inputs(
 
 
 def generate_with_transformers(
-    tokenizer, model, generate_input: dict, other_input: dict
+    tokenizer: PreTrainedTokenizerBase,
+    model: PreTrainedModel,
+    generate_input: dict,
+    other_input: dict,
 ) -> ChatCompletionResponse:
     """Call Transformers generate and get usable results.
 
@@ -348,7 +360,7 @@ def generate_with_transformers(
     generate_input = generate_input.copy()
     del generate_input["input_tokens"]
 
-    generate_result = model.generate(input_tokens, **generate_input)
+    generate_result = model.generate(input_tokens, **generate_input)  # type: ignore[operator]
 
     # Result is a a 2D tensor of shape (num responses, prompt + max generated tokens)
     # containing tokens, plus a tuple of <max generated tokens> tensors of shape
diff --git a/mellea/formatters/granite/granite3/granite32/input.py b/mellea/formatters/granite/granite3/granite32/input.py
index a5333cf9f..939113973 100644
--- a/mellea/formatters/granite/granite3/granite32/input.py
+++ b/mellea/formatters/granite/granite3/granite32/input.py
@@ -13,6 +13,7 @@
     NO_TOOLS_NO_DOCS_NO_THINKING_SYSTEM_MESSAGE_PART,
 )
 from ...granite3.input import Granite3InputProcessor
+from ...granite3.types import Granite3ChatCompletion
 
 # Local
 from .constants import (
@@ -221,12 +222,14 @@ def _remove_special_tokens(cls, text: str) -> str:
         return new_text
 
     @classmethod
-    def sanitize(cls, chat_completion, parts="all"):
+    def sanitize(
+        cls, chat_completion: Granite3ChatCompletion, parts: list[str] | str = "all"
+    ) -> Granite3ChatCompletion:
         """Sanitize the chat completion by removing Granite 3.2 special tokens.
 
         Args:
             chat_completion: The chat completion request to sanitize.
-            parts (str): Which parts of the chat completion to sanitize;
+            parts (list[str] | str): Which parts of the chat completion to sanitize;
                 defaults to ``"all"``.
 
         Returns:
diff --git a/mellea/formatters/granite/granite3/granite33/input.py b/mellea/formatters/granite/granite3/granite33/input.py
index 2bb8f899d..f671cfd83 100644
--- a/mellea/formatters/granite/granite3/granite33/input.py
+++ b/mellea/formatters/granite/granite3/granite33/input.py
@@ -13,6 +13,7 @@
     NO_TOOLS_NO_DOCS_NO_THINKING_SYSTEM_MESSAGE_PART,
 )
 from ...granite3.input import Granite3InputProcessor
+from ...granite3.types import Granite3ChatCompletion
 
 # Local
 from .constants import (
@@ -141,12 +142,14 @@ def _remove_special_tokens(cls, text: str) -> str:
         return new_text
 
     @classmethod
-    def sanitize(cls, chat_completion, parts="all"):
+    def sanitize(
+        cls, chat_completion: Granite3ChatCompletion, parts: list[str] | str = "all"
+    ) -> Granite3ChatCompletion:
         """Sanitize the chat completion by removing Granite 3.3 special tokens.
 
         Args:
             chat_completion: The chat completion request to sanitize.
-            parts (str): Which parts of the chat completion to sanitize;
+            parts (list[str] | str): Which parts of the chat completion to sanitize;
                 defaults to ``"all"``.
 
         Returns:
diff --git a/mellea/formatters/granite/intrinsics/input.py b/mellea/formatters/granite/intrinsics/input.py
index bfa064ae2..cd0a3a5e0 100644
--- a/mellea/formatters/granite/intrinsics/input.py
+++ b/mellea/formatters/granite/intrinsics/input.py
@@ -33,7 +33,7 @@ def _needs_logprobs(transformations: list | None) -> bool:
     return any(t["type"] == "likelihood" for t in transformations)
 
 
-def sentence_delimiter(tag, sentence_num) -> str:
+def sentence_delimiter(tag: str, sentence_num: int) -> str:
     """Return a tag string that identifies the beginning of the indicated sentence.
 
     Args:
@@ -56,14 +56,16 @@ def mark_sentence_boundaries(
     ``<[prefix][number]>``
     at the location of each sentence boundary.
 
-    :param split_strings: Input string(s), pre-split into sentences
-    :param tag_prefix: String to place before the number part of each tagged
-        sentence boundary.
-    :param index: Starting index for sentence numbering. Defaults to 0. Pass a
-        non-zero value to continue numbering from a prior call.
+    Args:
+        split_strings: Input string(s), pre-split into sentences.
+        tag_prefix: String to place before the number part of each tagged
+            sentence boundary.
+        index: Starting index for sentence numbering. Defaults to 0. Pass a
+            non-zero value to continue numbering from a prior call.
 
-    :returns: Tuple of (list of input strings with all sentence boundaries marked,
-        next available index after the last sentence).
+    Returns:
+        tuple[list[str], int]: Tuple of (list of input strings with all sentence
+        boundaries marked, next available index after the last sentence).
     """
     result: list[str] = []
     for sentences in split_strings:
diff --git a/mellea/helpers/openai_compatible_helpers.py b/mellea/helpers/openai_compatible_helpers.py
index 739677368..4858ea366 100644
--- a/mellea/helpers/openai_compatible_helpers.py
+++ b/mellea/helpers/openai_compatible_helpers.py
@@ -140,7 +140,7 @@ def chat_completion_delta_merge(
     return merged
 
 
-def message_to_openai_message(msg: Message):
+def message_to_openai_message(msg: Message) -> dict:
     """Serialise a Mellea ``Message`` to the format required by OpenAI-compatible API providers.
 
     Args:
diff --git a/mellea/stdlib/components/mify.py b/mellea/stdlib/components/mify.py
index 26aa6da3a..ec8df5c6d 100644
--- a/mellea/stdlib/components/mify.py
+++ b/mellea/stdlib/components/mify.py
@@ -298,7 +298,7 @@ def mify(
 ) -> T: ...  # Overloads for @mify and mify(obj|cls)
 
 
-def mify(*args, **kwargs):  # noqa: D417
+def mify(*args, **kwargs) -> object:  # noqa: D417
     """M-ify an object or class.
 
     Allows the object (or instances of the class) to be used in m sessions and with m functions.
diff --git a/mellea/stdlib/components/mobject.py b/mellea/stdlib/components/mobject.py
index 38719b213..e706b80d5 100644
--- a/mellea/stdlib/components/mobject.py
+++ b/mellea/stdlib/components/mobject.py
@@ -306,7 +306,7 @@ def format_for_llm(self) -> TemplateRepresentation | str:
         return TemplateRepresentation(
             args={"content": self.content_as_string()},
             obj=self,
-            tools=tools,
+            tools=tools,  # type: ignore[arg-type]
             fields=[],
             template_order=["*", "MObject"],
         )
diff --git a/mellea/stdlib/functional.py b/mellea/stdlib/functional.py
index 16290c229..906d1c6ae 100644
--- a/mellea/stdlib/functional.py
+++ b/mellea/stdlib/functional.py
@@ -349,7 +349,7 @@ def query(
         tool_calls: If true, the model may make tool calls. Defaults to False.
 
     Returns:
-        ModelOutputThunk: The result of the query as processed by the backend.
+        tuple[ModelOutputThunk, Context]: The result of the query and updated context.
     """
     if not isinstance(obj, MObjectProtocol):
         obj = mify(obj)
@@ -983,7 +983,7 @@ async def aquery(
         tool_calls: If true, the model may make tool calls. Defaults to False.
 
     Returns:
-        ModelOutputThunk: The result of the query as processed by the backend.
+        tuple[ModelOutputThunk, Context]: The result of the query and updated context.
     """
     if not isinstance(obj, MObjectProtocol):
         obj = mify(obj)
@@ -1024,9 +1024,9 @@ async def atransform(
         model_options: Model options to pass to the backend.
 
     Returns:
-        ModelOutputThunk|Any: The result of the transformation as processed by the backend. If no tools were called,
-        the return type will be always be ModelOutputThunk. If a tool was called, the return type will be the return type
-        of the function called, usually the type of the object passed in.
+        tuple[ModelOutputThunk | Any, Context]: The result of the transformation and updated context.
+        If no tools were called, the first element will always be ModelOutputThunk. If a tool was called,
+        the first element will be the return type of the function called, usually the type of the object passed in.
     """
     if not isinstance(obj, MObjectProtocol):
         obj = mify(obj)
diff --git a/mellea/stdlib/sampling/base.py b/mellea/stdlib/sampling/base.py
index 59c044dd0..a5295d084 100644
--- a/mellea/stdlib/sampling/base.py
+++ b/mellea/stdlib/sampling/base.py
@@ -136,7 +136,7 @@ async def sample(
             show_progress: if true, a tqdm progress bar is used. Otherwise, messages will still be sent to flog.
 
         Returns:
-            SamplingResult: A result object indicating the success or failure of the sampling process.
+            SamplingResult[S]: A result object indicating the success or failure of the sampling process.
 
         Raises:
             AssertionError: Asserts that all required components (repair, select_from_failure, validate, and generate) are provided before proceeding with the sampling.
@@ -491,7 +491,7 @@ def select_from_failure(
         sampled_actions: list[Component],
         sampled_results: list[ModelOutputThunk],
         sampled_val: list[list[tuple[Requirement, ValidationResult]]],
-    ):
+    ) -> int:
         """Always returns the last index. The last message from the model will always be returned if all results are failures.
 
         Args:
diff --git a/mellea/stdlib/session.py b/mellea/stdlib/session.py
index acdd42c67..4fcfa783e 100644
--- a/mellea/stdlib/session.py
+++ b/mellea/stdlib/session.py
@@ -339,7 +339,7 @@ def __copy__(self):
 
         return new
 
-    def clone(self):
+    def clone(self) -> MelleaSession:
         """Useful for running multiple generation requests while keeping the context at a given point in time.
 
         Returns:
diff --git a/tooling/docs-autogen/audit_coverage.py b/tooling/docs-autogen/audit_coverage.py
index eb9506c9f..d53adb034 100755
--- a/tooling/docs-autogen/audit_coverage.py
+++ b/tooling/docs-autogen/audit_coverage.py
@@ -11,6 +11,7 @@
 """
 
 import argparse
+import ast
 import json
 import os
 import re
@@ -24,6 +25,54 @@
     sys.exit(1)
 
 
+# Modules that are confirmed internal but whose parent __init__.py imports nothing
+# (making the import-based check indeterminate).  Must stay in sync with generate-ast.py.
+_CONFIRMED_INTERNAL_MODULES: frozenset[str] = frozenset(
+    {"json_util", "backend_instrumentation"}
+)
+
+
+def _imported_submodule_names(init_path: Path) -> set[str] | None:
+    """Return submodule names imported via relative imports in init_path.
+
+    Returns None if the file cannot be parsed (treat as indeterminate).
+    Returns an empty set if the file is readable but has no relative imports.
+    """
+    try:
+        tree = ast.parse(init_path.read_text())
+    except Exception:
+        return None
+    result: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.ImportFrom) and node.level > 0 and node.module:
+            result.add(node.module.split(".")[0])
+    return result
+
+
+def _is_public_submodule(submodule_name: str, submodule_filepath: Path | None) -> bool:
+    """Return True if a submodule should be treated as part of the public API.
+
+    Mirrors the filter in generate-ast.py's remove_internal_modules().
+    """
+    if submodule_name in _CONFIRMED_INTERNAL_MODULES:
+        return False
+    if submodule_filepath is None:
+        return True  # conservative: keep if we can't determine
+    # Griffe gives filepath as:
+    #   - module file:  .../pkg/submodule.py  → parent init is  .../pkg/__init__.py
+    #   - package:      .../pkg/subpkg/__init__.py → parent init is .../pkg/__init__.py
+    if submodule_filepath.name == "__init__.py":
+        parent_init = submodule_filepath.parent.parent / "__init__.py"
+    else:
+        parent_init = submodule_filepath.parent / "__init__.py"
+    if not parent_init.exists():
+        return True  # conservative
+    subs = _imported_submodule_names(parent_init)
+    if subs is None or not subs:
+        return True  # conservative: can't determine, keep
+    return submodule_name in subs
+
+
 def _load_package(source_dir: Path, package_name: str):
     """Load a package with Griffe. Returns the package object or None on failure."""
     try:
@@ -84,9 +133,13 @@ def walk_module(module, module_path: str):
         # Recursively walk submodules (but skip internal ones)
         if hasattr(module, "modules"):
             for submodule_name, submodule in module.modules.items():
-                if not submodule_name.startswith("_"):
-                    submodule_path = f"{module_path}.{submodule_name}"
-                    walk_module(submodule, submodule_path)
+                if submodule_name.startswith("_"):
+                    continue
+                fp = getattr(submodule, "filepath", None)
+                if not _is_public_submodule(submodule_name, fp):
+                    continue
+                submodule_path = f"{module_path}.{submodule_name}"
+                walk_module(submodule, submodule_path)
 
     # Walk through all top-level modules
     for module_name, module in package.modules.items():
@@ -102,6 +155,7 @@ def walk_module(module, module_path: str):
 # ---------------------------------------------------------------------------
 
 _ARGS_RE = re.compile(r"^\s*(Args|Arguments|Parameters)\s*:", re.MULTILINE)
+_TYPEDDICT_BASES = re.compile(r"\bTypedDict\b")
 _RETURNS_RE = re.compile(r"^\s*Returns\s*:", re.MULTILINE)
 _YIELDS_RE = re.compile(r"^\s*Yields\s*:", re.MULTILINE)
 _RAISES_RE = re.compile(r"^\s*Raises\s*:", re.MULTILINE)
@@ -110,9 +164,106 @@ def walk_module(module, module_path: str):
 # The colon must be followed by whitespace to avoid matching Sphinx cross-reference
 # continuation lines like "        through :func:`...`".
 _ARGS_ENTRY_RE = re.compile(r"^\s{4,}(\w+)\s*(?:\([^)]*\))?\s*:\s", re.MULTILINE)
+# Matches an indented param entry with an EXPLICIT type: "    param_name (SomeType):"
+_ARGS_ENTRY_WITH_TYPE_RE = re.compile(r"^\s{4,}(\w+)\s*\(([^)]+)\)\s*:\s", re.MULTILINE)
+# Matches the type prefix on the first content line of a Returns: or Yields: section.
+# Format: "    SomeType: description" — only matches unambiguous type-like prefixes
+# (word chars, brackets, pipes, commas, dots; no sentence-starting punctuation).
+_SECTION_TYPE_LINE_RE = re.compile(
+    r"^\s{4,}([\w][\w\[\] |,.*]*(?:\[[\w\[\] |,.*]+\])?):\s", re.MULTILINE
+)
 # Return annotations that need no Returns section
 _TRIVIAL_RETURNS = {"None", "NoReturn", "Never", "never", ""}
 
+# Typing-module aliases → modern builtin equivalents for normalization.
+_TYPING_ALIAS_RE = re.compile(
+    r"\b(List|Dict|Tuple|Set|FrozenSet|Type|Sequence|Mapping|Iterable|Iterator"
+    r"|Generator|AsyncGenerator|AsyncIterator|Awaitable|Callable)\b"
+)
+_TYPING_ALIAS_MAP = {
+    "List": "list",
+    "Dict": "dict",
+    "Tuple": "tuple",
+    "Set": "set",
+    "FrozenSet": "frozenset",
+    "Type": "type",
+}
+
+
+def _normalize_type(t: str) -> str:
+    """Normalize a type string for loose equality comparison.
+
+    Handles the most common differences between docstring-stated types and
+    Python annotations:
+    - typing-module aliases: List → list, Dict → dict, etc.
+    - Optional[X] → X | None
+    - Union[A, B] → A | B
+    - Union/pipe ordering: components are sorted so str|None == None|str
+    - incidental whitespace
+
+    Known simplifications (will NOT flag as mismatch):
+    - Deeply nested Union/Optional combinations may not fully flatten
+    - typing.X (qualified) vs X (unqualified) are NOT normalised
+    - Callable[[A], B] argument ordering is not normalised
+    """
+    t = t.strip()
+    # Strip typing. prefix
+    t = re.sub(r"\btyping\.", "", t)
+    # typing → builtin aliases
+    t = _TYPING_ALIAS_RE.sub(lambda m: _TYPING_ALIAS_MAP.get(m.group(0), m.group(0)), t)
+    # Optional[X] → X | None (non-nested only — avoids false positives on complex nesting)
+    t = re.sub(r"\bOptional\[([^\[\]]*)\]", r"\1 | None", t)
+    # Union[A, B, ...] → A | B | ... (non-nested)
+    t = re.sub(
+        r"\bUnion\[([^\[\]]+)\]",
+        lambda m: " | ".join(x.strip() for x in m.group(1).split(",")),
+        t,
+    )
+    # normalise whitespace around type operators
+    t = re.sub(r"\s*\|\s*", " | ", t)
+    t = re.sub(r"\s*,\s*", ", ", t)
+    t = re.sub(r"\s*\[\s*", "[", t)
+    t = re.sub(r"\s*\]\s*", "]", t)
+    t = t.strip()
+    # Sort pipe-union components so str|None == None|str.
+    # Only apply at the top level (not inside brackets) to avoid reordering
+    # tuple/generic args, which would cause false positives.
+    if " | " in t and "[" not in t:
+        t = " | ".join(sorted(t.split(" | ")))
+    return t
+
+
+def _types_match(a: str, b: str) -> bool:
+    """Return True if two type strings are equivalent after normalisation.
+
+    Returns True (no mismatch reported) when normalisation is uncertain:
+    if either side still contains unexpanded Optional[...] or Union[...] after
+    normalisation (indicating a nested generic we couldn't fully expand), we
+    suppress the comparison to avoid false positives.
+    """
+    na, nb = _normalize_type(a), _normalize_type(b)
+    # If either side has unexpanded Optional/Union (e.g. Optional[list[str]]),
+    # skip — we can't reliably compare and must not emit a false positive.
+    if re.search(r"\b(Optional|Union)\[", na) or re.search(r"\b(Optional|Union)\[", nb):
+        return True
+    return na == nb
+
+
+def _extract_section_type(doc_text: str, section_re: re.Pattern[str]) -> str | None:
+    """Extract the type prefix from the first content line of a docstring section.
+
+    Returns the raw type string if found, or None if the section is absent or
+    the first content line has no recognisable type prefix.
+    """
+    m = section_re.search(doc_text)
+    if not m:
+        return None
+    # Grab text after the section heading up to the next blank line.
+    after = doc_text[m.end() :]
+    type_match = _SECTION_TYPE_LINE_RE.search(after.split("\n\n")[0])
+    return type_match.group(1).strip() if type_match else None
+
+
 # Return annotations that indicate a generator (should use Yields, not Returns)
 _GENERATOR_RETURN_PATTERNS = re.compile(
     r"Generator|Iterator|AsyncGenerator|AsyncIterator"
@@ -120,8 +271,21 @@ def walk_module(module, module_path: str):
 
 
 def _check_member(member, full_path: str, short_threshold: int) -> list[dict]:
-    """Return quality issues for a single class or function member."""
+    """Return quality issues for a single class or function member.
+
+    Each returned dict has keys: ``path``, ``kind``, ``detail``, ``file``, ``line``.
+    ``file`` is the absolute source path; ``line`` is the 1-based line number of the
+    symbol definition (the ``def`` / ``class`` keyword line).
+    """
     issues: list[dict] = []
+    _raw_file = getattr(member, "filepath", None)
+    _abs_file = str(_raw_file) if _raw_file is not None else ""
+    # Convert to a repo-relative path for readability and GHA annotations.
+    try:
+        _rel_file = str(Path(_abs_file).relative_to(Path.cwd())) if _abs_file else ""
+    except ValueError:
+        _rel_file = _abs_file
+    _line = getattr(member, "lineno", None) or 0
 
     doc = getattr(member, "docstring", None)
     doc_text = doc.value.strip() if (doc and doc.value) else ""
@@ -193,6 +357,65 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]:
                         }
                     )
 
+            # Missing type annotation: Args: section exists but parameters lack Python
+            # annotations — the type column will be absent from the generated API docs.
+            # Only checked here (when Args: exists) to avoid duplicating no_args reports.
+            # Variadic *args/**kwargs are excluded via ParameterKind (same filter as concrete).
+            unannotated = [
+                p.name
+                for p in (params or [])
+                if p.name not in ("self", "cls")
+                and getattr(p, "kind", None) not in _variadic_kinds
+                and p.annotation is None
+            ]
+            if unannotated:
+                sample = ", ".join(unannotated[:3]) + (
+                    "..." if len(unannotated) > 3 else ""
+                )
+                issues.append(
+                    {
+                        "path": full_path,
+                        "kind": "missing_param_type",
+                        "detail": f"params [{sample}] have no Python type annotation — type absent from API docs",
+                    }
+                )
+
+            # Param type consistency: docstring states a type but it differs from
+            # the signature annotation.  Only fires when BOTH sides have an explicit
+            # type — absence is already handled by missing_param_type / no_args.
+            # Uses _types_match() which normalises aliases, Optional/Union, and pipe
+            # ordering.  Comparisons involving nested generics (e.g. Optional[list[X]])
+            # that cannot be fully expanded are silently skipped to avoid false positives.
+            if args_block:
+                for p in params or []:
+                    if p.name in ("self", "cls"):
+                        continue
+                    if getattr(p, "kind", None) in _variadic_kinds:
+                        continue
+                    if p.annotation is None:
+                        continue  # missing_param_type already handles this
+                    # Find the docstring type for this param
+                    param_type_match = re.search(
+                        rf"^\s{{4,}}{re.escape(p.name)}\s*\(([^)]+)\)\s*:\s",
+                        args_block.group(1),
+                        re.MULTILINE,
+                    )
+                    if param_type_match:
+                        if not _types_match(
+                            param_type_match.group(1), str(p.annotation)
+                        ):
+                            issues.append(
+                                {
+                                    "path": full_path,
+                                    "kind": "param_type_mismatch",
+                                    "detail": (
+                                        f"param '{p.name}': docstring says"
+                                        f" '{param_type_match.group(1).strip()}'"
+                                        f" but annotation is '{p.annotation}'"
+                                    ),
+                                }
+                            )
+
         # Returns/Yields section check: only flag when there is an explicit non-trivial annotation.
         # Generator return types (Generator, Iterator, etc.) require Yields:, not Returns:.
         returns = getattr(member, "returns", None)
@@ -215,6 +438,38 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]:
                         "detail": f"return type {ret_str!r} has no Returns section",
                     }
                 )
+            elif not is_generator:
+                # Return type consistency: docstring Returns: states a type prefix but
+                # it differs from the signature annotation.
+                # Only fires when BOTH sides have an explicit type — missing_return_type
+                # and no_returns already handle one-sided absence.
+                # See _normalize_type() docstring for known simplifications that may
+                # suppress edge cases (e.g. deeply nested Union, qualified typing.X).
+                doc_ret_type = _extract_section_type(doc_text, _RETURNS_RE)
+                if doc_ret_type:
+                    if not _types_match(doc_ret_type, ret_str):
+                        issues.append(
+                            {
+                                "path": full_path,
+                                "kind": "return_type_mismatch",
+                                "detail": (
+                                    f"Returns: says '{doc_ret_type}'"
+                                    f" but annotation is '{ret_str}'"
+                                ),
+                            }
+                        )
+
+        # Missing return annotation: Returns: section documented but no Python annotation.
+        # Naturally non-overlapping with no_returns (which fires when annotation exists
+        # but Returns: section is absent). When both are missing, no_returns fires first.
+        if not ret_str and _RETURNS_RE.search(doc_text):
+            issues.append(
+                {
+                    "path": full_path,
+                    "kind": "missing_return_type",
+                    "detail": "Returns: section documented but no return type annotation — type absent from API docs",
+                }
+            )
 
         # Raises section check: only flag when the source contains explicit raise statements
         source = getattr(member, "source", None) or ""
@@ -274,6 +529,49 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]:
                     }
                 )
 
+        # TypedDict field mismatch check.
+        # Unlike regular classes (where Attributes: is optional under Option C),
+        # TypedDict fields *are* the entire public contract. When an Attributes:
+        # section exists, every entry must match an actual declared field and every
+        # declared field must appear — stale or missing entries are always a bug.
+        is_typeddict = any(
+            _TYPEDDICT_BASES.search(str(base)) for base in getattr(member, "bases", [])
+        )
+        if is_typeddict and _ATTRIBUTES_RE.search(doc_text):
+            attrs_block = re.search(
+                r"Attributes\s*:(.*?)(?:\n\s*\n|\Z)", doc_text, re.DOTALL
+            )
+            if attrs_block:
+                doc_field_names = set(_ARGS_ENTRY_RE.findall(attrs_block.group(1)))
+                actual_fields = {
+                    name
+                    for name, m in member.members.items()
+                    if not name.startswith("_") and getattr(m, "is_attribute", False)
+                }
+                phantom = doc_field_names - actual_fields
+                if phantom:
+                    issues.append(
+                        {
+                            "path": full_path,
+                            "kind": "typeddict_phantom",
+                            "detail": f"Attributes: documents {sorted(phantom)} not declared in TypedDict",
+                        }
+                    )
+                undocumented = actual_fields - doc_field_names
+                if undocumented:
+                    issues.append(
+                        {
+                            "path": full_path,
+                            "kind": "typeddict_undocumented",
+                            "detail": f"TypedDict fields {sorted(undocumented)} missing from Attributes: section",
+                        }
+                    )
+
+    # Inject file location into every issue so callers can display and annotate.
+    for issue in issues:
+        issue["file"] = _rel_file
+        issue["line"] = _line
+
     return issues
 
 
@@ -296,11 +594,21 @@ def audit_docstring_quality(
     - no_class_args: class whose __init__ has typed params but no Args section on the class
     - duplicate_init_args: Args: present in both class docstring and __init__ (Option C violation)
     - param_mismatch: Args section documents names absent from the real signature
-
-    Note: Attributes: sections are intentionally not enforced. Under the Option C
-    convention, Attributes: is only used when stored values differ in type or
-    behaviour from the constructor inputs (e.g. type transforms, computed values,
-    class constants). Pure-echo entries that repeat Args: verbatim are omitted.
+    - typeddict_phantom: TypedDict Attributes: section documents fields not declared in the class
+    - typeddict_undocumented: TypedDict has declared fields absent from its Attributes: section
+    - missing_param_type: Args: section exists but one or more parameters lack Python type
+      annotations — the type column will be absent from the generated API docs. Only checked
+      when no no_args issue exists (avoids duplicate reports).
+    - missing_return_type: Returns: section exists but the function has no return type
+      annotation — the return type will be absent from the generated API docs. Only checked
+      when no no_returns issue exists (avoids duplicate reports).
+
+    Note: Attributes: sections are intentionally not enforced for regular classes. Under
+    the Option C convention, Attributes: is only used when stored values differ in type or
+    behaviour from the constructor inputs (e.g. type transforms, computed values, class
+    constants). Pure-echo entries that repeat Args: verbatim are omitted. TypedDicts are
+    a carve-out: their fields are the entire public contract, so when an Attributes:
+    section is present it must exactly match the declared fields.
 
     Only symbols (and methods whose parent class) present in `documented` are
     checked when that set is provided — ensuring the audit is scoped to what is
@@ -376,6 +684,82 @@ def walk_module(module, module_path: str) -> None:
 
 _IN_GHA = os.environ.get("GITHUB_ACTIONS") == "true"
 
+# Base URLs for documentation references emitted in fix hints.
+# These point to upstream main; anchors for the CI checks reference section
+# will resolve once the PR introducing them is merged.
+_CONTRIB_DOCS_URL = (
+    "https://github.com/generative-computing/mellea/blob/main"
+    "/docs/docs/guide/CONTRIBUTING.md"
+)
+_COVERAGE_DOCS_URL = (
+    "https://github.com/generative-computing/mellea/blob/main"
+    "/CONTRIBUTING.md#validating-docstrings"
+)
+
+# Per-kind fix hints: (one-line fix text, CONTRIBUTING.md anchor)
+_KIND_FIX_HINTS: dict[str, tuple[str, str]] = {
+    "missing": (
+        "Add a Google-style summary sentence.",
+        f"{_CONTRIB_DOCS_URL}#missing-or-short-docstrings",
+    ),
+    "short": (
+        "Expand the summary — aim for at least 5 meaningful words.",
+        f"{_CONTRIB_DOCS_URL}#missing-or-short-docstrings",
+    ),
+    "no_args": (
+        "Add an Args: section listing each parameter with a description.",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "no_returns": (
+        "Add a Returns: section describing the return value.",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "no_yields": (
+        "Add a Yields: section (generator functions use Yields:, not Returns:).",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "no_raises": (
+        "Add a Raises: section listing each exception and the condition that triggers it.",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "no_class_args": (
+        "Add Args: to the class docstring (not __init__) — Option C convention.",
+        f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c",
+    ),
+    "duplicate_init_args": (
+        "Remove Args: from __init__ docstring; place it on the class docstring only (Option C).",
+        f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c",
+    ),
+    "param_mismatch": (
+        "Remove or rename phantom entries to match the actual parameter names.",
+        f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c",
+    ),
+    "typeddict_phantom": (
+        "Remove documented fields that are not declared in the TypedDict.",
+        f"{_CONTRIB_DOCS_URL}#typeddict-classes",
+    ),
+    "typeddict_undocumented": (
+        "Add the missing fields to the Attributes: section.",
+        f"{_CONTRIB_DOCS_URL}#typeddict-classes",
+    ),
+    "missing_param_type": (
+        "Add a Python type annotation to each listed parameter in the function signature.",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "missing_return_type": (
+        "Add a return type annotation (-> SomeType) to the function signature.",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "param_type_mismatch": (
+        "Align the type in the Args: entry with the signature annotation (or vice versa).",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+    "return_type_mismatch": (
+        "Align the type prefix in Returns: with the signature annotation (or vice versa).",
+        f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises",
+    ),
+}
+
 
 def _gha_cmd(level: str, title: str, message: str) -> None:
     """Emit a GitHub Actions workflow command annotation."""
@@ -385,8 +769,57 @@ def _gha_cmd(level: str, title: str, message: str) -> None:
     print(f"::{level} title={title}::{message}")
 
 
-def _print_quality_report(issues: list[dict]) -> None:
-    """Print a grouped quality report to stdout."""
+def _gha_file_annotation(
+    level: str, title: str, message: str, file: str, line: int
+) -> None:
+    """Emit a GitHub Actions annotation anchored to a specific file and line.
+
+    When ``file`` and ``line`` are provided the annotation appears inline in the
+    PR diff view, making it immediately obvious which symbol needs fixing.
+
+    Args:
+        level: Annotation level — ``"error"``, ``"warning"``, or ``"notice"``.
+        title: Short label shown in bold in the annotation.
+        message: Body text for the annotation.
+        file: Repo-relative file path (e.g. ``mellea/core/base.py``).
+        line: 1-based line number of the symbol definition.
+    """
+    for s in (message, title, file):
+        s = s.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
+    if file and line:
+        print(f"::{level} file={file},line={line},title={title}::{message}")
+    else:
+        print(f"::{level} title={title}::{message}")
+
+
+# GitHub Actions silently drops inline diff annotations beyond ~10 per step.
+# This per-kind cap ensures every category gets at least one visible annotation
+# rather than early kinds consuming the entire budget.
+_GHA_ANNOTATIONS_PER_KIND = 10
+
+
+def _print_quality_report(issues: list[dict], *, fail_on_quality: bool = False) -> None:
+    """Print a grouped quality report to stdout and emit GHA annotations.
+
+    When running in GitHub Actions (``GITHUB_ACTIONS=true``), each issue is
+    also emitted as a file-level annotation (``::error`` or ``::warning``)
+    anchored to the source line, so they appear inline in the PR diff.
+
+    GitHub Actions caps inline diff annotations at roughly 10 per step; issues
+    beyond that cap are silently dropped from the diff view (they still appear
+    in the full job log and in the JSON artifact).  To ensure every check
+    category gets at least one visible annotation, this function emits at most
+    :data:`_GHA_ANNOTATIONS_PER_KIND` annotations per kind and prints a
+    ``"... and N more"`` notice for the remainder.  The complete list is always
+    written to the job log regardless of the GHA cap.
+
+    Args:
+        issues: List of issue dicts from :func:`audit_docstring_quality`.
+            Each dict must have keys: ``path``, ``kind``, ``detail``,
+            ``file``, ``line``.
+        fail_on_quality: When ``True`` annotations are emitted as ``error``
+            (red); otherwise as ``warning`` (yellow).
+    """
     by_kind: dict[str, list[dict]] = {}
     for issue in issues:
         by_kind.setdefault(issue["kind"], []).append(issue)
@@ -401,8 +834,16 @@ def _print_quality_report(issues: list[dict]) -> None:
         "no_class_args": "Missing class Args section",
         "duplicate_init_args": "Duplicate Args: in class + __init__ (Option C violation)",
         "param_mismatch": "Param name mismatches (documented but not in signature)",
+        "typeddict_phantom": "TypedDict phantom fields (documented but not declared)",
+        "typeddict_undocumented": "TypedDict undocumented fields (declared but missing from Attributes:)",
+        "missing_param_type": "Missing parameter type annotations (type absent from API docs)",
+        "missing_return_type": "Missing return type annotations (type absent from API docs)",
+        "param_type_mismatch": "Param type mismatch (docstring vs annotation)",
+        "return_type_mismatch": "Return type mismatch (docstring vs annotation)",
     }
 
+    annotation_level = "error" if fail_on_quality else "warning"
+
     total = len(issues)
     print(f"\n{'=' * 60}")
     print("Docstring Quality Report")
@@ -419,17 +860,48 @@ def _print_quality_report(issues: list[dict]) -> None:
         "no_class_args",
         "duplicate_init_args",
         "param_mismatch",
+        "typeddict_phantom",
+        "typeddict_undocumented",
+        "missing_param_type",
+        "missing_return_type",
+        "param_type_mismatch",
+        "return_type_mismatch",
     ):
         items = by_kind.get(kind, [])
         if not items:
             continue
         label = kind_labels.get(kind, kind)
+        fix_hint, ref_url = _KIND_FIX_HINTS.get(kind, ("", ""))
         print(f"\n{'─' * 50}")
         print(f"  {label} ({len(items)})")
+        if fix_hint:
+            print(f"  Fix: {fix_hint}")
+        if ref_url:
+            print(f"  Ref: {ref_url}")
         print(f"{'─' * 50}")
-        for item in sorted(items, key=lambda x: x["path"]):
-            print(f"  {item['path']}")
+        sorted_items = sorted(items, key=lambda x: x["path"])
+        gha_emitted = 0
+        for item in sorted_items:
+            file_ref = item.get("file", "")
+            line_num = item.get("line", 0)
+            loc = f"  [{file_ref}:{line_num}]" if file_ref and line_num else ""
+            print(f"  {item['path']}{loc}")
             print(f"    {item['detail']}")
+            if _IN_GHA and file_ref and gha_emitted < _GHA_ANNOTATIONS_PER_KIND:
+                _gha_file_annotation(
+                    annotation_level,
+                    f"[{kind}] {item['path']}",
+                    item["detail"],
+                    file_ref,
+                    line_num,
+                )
+                gha_emitted += 1
+        if _IN_GHA and len(sorted_items) > _GHA_ANNOTATIONS_PER_KIND:
+            remainder = len(sorted_items) - _GHA_ANNOTATIONS_PER_KIND
+            print(
+                f"  (GHA annotations capped at {_GHA_ANNOTATIONS_PER_KIND} per kind — "
+                f"{remainder} more {kind!r} issue(s) in job log and JSON artifact)"
+            )
 
 
 def audit_nav_orphans(docs_dir: Path, source_dir: Path) -> list[str]:
@@ -445,18 +917,33 @@ def audit_nav_orphans(docs_dir: Path, source_dir: Path) -> list[str]:
     Returns:
         Sorted list of orphaned module paths relative to docs_dir (no extension)
     """
-    mint_json = source_dir / "docs" / "mint.json"
+    # Support both Mintlify v1 (mint.json at docs/mint.json) and
+    # v2 (docs.json at docs/docs/docs.json).  docs.json uses plain string
+    # entries in "pages" arrays; mint.json uses {"page": "..."} dicts.
+    nav_config: Path | None = None
+    for candidate in (
+        source_dir / "docs" / "docs" / "docs.json",
+        source_dir / "docs" / "mint.json",
+    ):
+        if candidate.exists():
+            nav_config = candidate
+            break
 
     mdx_files: set[str] = set()
     for mdx_file in docs_dir.rglob("*.mdx"):
         mdx_files.add(str(mdx_file.relative_to(docs_dir).with_suffix("")))
 
     nav_refs: set[str] = set()
-    if mint_json.exists():
-        config = json.loads(mint_json.read_text())
+    if nav_config is not None:
+        config = json.loads(nav_config.read_text())
 
         def _extract(obj: object) -> None:
-            if isinstance(obj, dict):
+            if isinstance(obj, str):
+                # docs.json / mint.json plain string page entry
+                if obj.startswith("api/"):
+                    nav_refs.add(obj[len("api/") :])
+            elif isinstance(obj, dict):
+                # mint.json {"page": "api/..."} dict entry
                 if "page" in obj:
                     page = obj["page"]
                     if isinstance(page, str) and page.startswith("api/"):
@@ -654,11 +1141,34 @@ def main():
     print(f"CLI commands: {len(report['cli_commands'])}")
 
     if report["missing_symbols"]:
+        total_missing = sum(len(s) for s in report["missing_symbols"].values())
+        print(f"\n{'─' * 60}")
         print(
-            f"\n⚠️  Missing documentation for {len(report['missing_symbols'])} modules:"
+            f"  Missing API docs — {total_missing} symbol(s) across "
+            f"{len(report['missing_symbols'])} module(s)"
         )
+        print(
+            "  Fix: Run the doc generation pipeline to produce MDX for new symbols,\n"
+            "       then add entries to docs/docs/docs.json navigation.\n"
+            "       uv run python tooling/docs-autogen/generate-ast.py"
+        )
+        print(f"  Ref: {_COVERAGE_DOCS_URL}")
+        print(f"{'─' * 60}")
         for module, symbols in sorted(report["missing_symbols"].items()):
-            print(f"  {module}: {', '.join(symbols)}")
+            print(f"  {module}")
+            for sym in sorted(symbols):
+                print(f"    {sym}")
+        if _IN_GHA:
+            _gha_cmd(
+                "error"
+                if report["coverage_percentage"] < args.threshold
+                else "warning",
+                "API Coverage",
+                f"{total_missing} symbol(s) undocumented in "
+                f"{len(report['missing_symbols'])} module(s) — "
+                f"coverage {report['coverage_percentage']}% "
+                f"(threshold {args.threshold}%)",
+            )
 
     # Quality audit — scoped to documented (API reference) symbols only
     quality_issues: list[dict] = []
@@ -677,7 +1187,7 @@ def main():
                         documented=documented,
                     )
                 )
-        _print_quality_report(quality_issues)
+        _print_quality_report(quality_issues, fail_on_quality=args.fail_on_quality)
 
     # Nav orphan check — MDX files not referenced in mint.json navigation
     orphans: list[str] = []
@@ -724,9 +1234,7 @@ def main():
 
     if _IN_GHA:
         if not args.quality:
-            _gha_cmd(
-                "notice", "Docstring quality", "skipped (pass --quality to enable)"
-            )
+            pass  # quality step is a separate CI job; no annotation needed here
         elif quality_issues:
             _gha_cmd(
                 "error" if args.fail_on_quality else "warning",
diff --git a/tooling/docs-autogen/generate-ast.py b/tooling/docs-autogen/generate-ast.py
index 5687eee04..7384936b2 100644
--- a/tooling/docs-autogen/generate-ast.py
+++ b/tooling/docs-autogen/generate-ast.py
@@ -20,6 +20,7 @@
 """
 
 import argparse
+import ast
 import glob
 import json
 import os
@@ -390,6 +391,141 @@ def remove_empty_mdx_files() -> None:
     print(f"✅ Removed {removed} empty files.", flush=True)
 
 
+# -----------------------------
+# Public API filter
+# -----------------------------
+
+
+def _imported_submodules(init_path: Path) -> set[str] | None:
+    """Return submodule names explicitly imported via relative imports in ``__init__.py``.
+
+    Uses AST parsing (no import) so it is safe to call during doc generation.
+
+    A ``__init__.py`` that does ``from .ollama import OllamaModelBackend`` imports
+    from the ``ollama`` submodule, making it part of the package's public surface
+    even if the module name does not appear in ``__all__``.
+
+    Args:
+        init_path: Path to the ``__init__.py`` file to parse.
+
+    Returns:
+        Set of submodule names imported via ``from .name import ...``, or ``None``
+        when the file cannot be parsed.  An empty set means the ``__init__.py``
+        exists but contains no relative imports — caller should treat the module
+        as indeterminate and keep it.
+    """
+    try:
+        tree = ast.parse(init_path.read_text(encoding="utf-8"))
+    except (SyntaxError, OSError):
+        return None
+    imported: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.ImportFrom) and node.level > 0 and node.module:
+            # from .submodule import X  →  record "submodule"
+            imported.add(node.module.split(".")[0])
+    return imported
+
+
+# Modules that are confirmed internal implementation details and must never
+# appear in the public API reference.  These are a fallback for cases where
+# the parent __init__.py does not import from any submodule (so the
+# import-based filter cannot determine intent automatically).
+#
+# Long-term fix: rename these files with a leading ``_`` (e.g. ``_json_util.py``)
+# so the doc generator skips them automatically per Python convention.
+# Tracked in: TODO open issue
+_CONFIRMED_INTERNAL_MODULES: frozenset[str] = frozenset(
+    {
+        # Granite intrinsics internal JSON parser — not part of the public API
+        "mellea/formatters/granite/intrinsics/json_util",
+        # Backend telemetry wiring — internal instrumentation helpers
+        "mellea/telemetry/backend_instrumentation",
+    }
+)
+
+
+def remove_internal_modules(source_root: Path) -> None:
+    """Remove MDX files for submodules that are not part of the package's public API.
+
+    ``mdxify --all`` generates documentation for every reachable submodule,
+    regardless of whether the package author intends it to be public.  This step
+    enforces the Python convention: a submodule is public when its parent
+    ``__init__.py`` explicitly imports from it (``from .name import ...``).
+    Submodules that are never imported in the parent ``__init__.py`` are internal
+    implementation details and are removed from the staging output.
+
+    **Conservative rules:**
+
+    * If the parent ``__init__.py`` cannot be parsed → keep (safe default).
+    * If the parent ``__init__.py`` contains *no* relative imports → keep
+      (cannot determine intent; typical for packages where submodules are
+      accessed via their full dotted path rather than re-exported).
+    * Package index files (``__init__.mdx`` renamed to ``<pkg>.mdx``) are
+      identified by ``stem == parent_dir_name`` and are never filtered.
+
+    A hardcoded ``_CONFIRMED_INTERNAL_MODULES`` set catches known internals in
+    packages where the parent ``__init__.py`` imports nothing (indeterminate
+    case).  These should eventually be renamed with a ``_`` prefix in source.
+
+    Args:
+        source_root: Root of the source repo (contains ``mellea/`` and ``cli/``).
+    """
+    print("-" * 30, flush=True)
+    print(
+        "🔒 Filtering internal modules (not imported by parent __init__)...", flush=True
+    )
+
+    mdx_files = list(STAGING_API_DIR.rglob("*.mdx"))
+    removed = 0
+
+    for mdx_path in sorted(mdx_files):
+        rel = mdx_path.relative_to(STAGING_API_DIR)
+        parts = list(rel.with_suffix("").parts)
+
+        # Need at least pkg/parent/module to have a meaningful parent check.
+        if len(parts) < 3:
+            continue
+
+        module_name = parts[-1]
+
+        # Package index files: __init__.mdx was renamed to <foldername>.mdx.
+        # Identified by stem == containing directory name.  Never filter these.
+        if module_name == parts[-2]:
+            continue
+
+        # Hardcoded confirmed-internal override (indeterminate parent cases).
+        rel_no_ext = str(rel.with_suffix(""))
+        if rel_no_ext in _CONFIRMED_INTERNAL_MODULES:
+            print(
+                f"   🔒 {rel}  (confirmed internal — hardcoded exclusion)", flush=True
+            )
+            mdx_path.unlink(missing_ok=True)
+            removed += 1
+            continue
+
+        # Import-based check: parent __init__.py must import from this submodule.
+        parent_src = source_root / Path(*parts[:-1]) / "__init__.py"
+        if not parent_src.exists():
+            continue  # no __init__.py — cannot determine, keep
+
+        parent_imports = _imported_submodules(parent_src)
+        if parent_imports is None:
+            continue  # parse failure — keep (safe default)
+        if not parent_imports:
+            continue  # parent imports nothing from submodules — indeterminate, keep
+
+        if module_name not in parent_imports:
+            print(
+                f"   🔒 {rel}  ('{module_name}' not imported by "
+                f"{Path(*parts[:-1])}/__init__.py)",
+                flush=True,
+            )
+            mdx_path.unlink(missing_ok=True)
+            removed += 1
+
+    print(f"✅ Removed {removed} internal module file(s).", flush=True)
+
+
 # -----------------------------
 # Move + nav generation
 # -----------------------------
@@ -770,6 +906,7 @@ def main() -> None:
     rename_init_files_to_parent()
     update_frontmatter_metadata()
     remove_empty_mdx_files()
+    remove_internal_modules(source_root)
 
     # Move staging api -> final docs root/api
     final_api_dir = move_api_to_docs_root(docs_root)
diff --git a/tooling/docs-autogen/validate.py b/tooling/docs-autogen/validate.py
index d1e2bc980..94097d506 100755
--- a/tooling/docs-autogen/validate.py
+++ b/tooling/docs-autogen/validate.py
@@ -11,38 +11,149 @@
 
 import argparse
 import json
+import os
 import re
 import sys
 from pathlib import Path
 
+_IN_GHA = os.environ.get("GITHUB_ACTIONS") == "true"
+
+_ROOT_CONTRIB = (
+    "https://github.com/generative-computing/mellea/blob/main/CONTRIBUTING.md"
+)
+_GUIDE_CONTRIB = (
+    "https://github.com/generative-computing/mellea/blob/main"
+    "/docs/docs/guide/CONTRIBUTING.md"
+)
+
+# Per-check fix hints: label (as passed to _print_check_errors) -> (fix text, ref URL)
+_CHECK_FIX_HINTS: dict[str, tuple[str, str]] = {
+    "Source links": (
+        "Regenerate API docs to update version tags in source links: "
+        "uv run python tooling/docs-autogen/generate-ast.py",
+        f"{_ROOT_CONTRIB}#validating-docstrings",
+    ),
+    "MDX syntax": (
+        "Check for unclosed ``` fences, missing frontmatter, or unescaped {{ }} "
+        "in code blocks. Each error includes the file and line number.",
+        f"{_GUIDE_CONTRIB}#frontmatter-required-on-every-page",
+    ),
+    "Internal links": (
+        "Verify the relative link path resolves to an existing .mdx file. "
+        "Each error shows the file, line, and broken link target.",
+        f"{_GUIDE_CONTRIB}#links",
+    ),
+    "Anchor collisions": (
+        "Rename one of the conflicting headings so they produce unique anchors. "
+        "Each error shows both heading texts and their line numbers.",
+        f"{_GUIDE_CONTRIB}#headings",
+    ),
+    "RST docstrings": (
+        "Replace RST double-backtick notation (``Symbol``) with single backticks "
+        "(`Symbol`) in the Python source docstring.",
+        f"{_ROOT_CONTRIB}#docstrings",
+    ),
+    "Stale files": (
+        "Delete the listed files — they are review artifacts or superseded content "
+        "that should not ship.",
+        f"{_GUIDE_CONTRIB}#missing-content",
+    ),
+    "Doc imports": (
+        "Update the import path in the documentation code block to match the "
+        "current module/symbol location.",
+        f"{_GUIDE_CONTRIB}#code-and-fragment-consistency",
+    ),
+    "Examples catalogue": (
+        "Add a row for the new example directory to docs/docs/examples/index.md.",
+        f"{_GUIDE_CONTRIB}#keeping-the-examples-catalogue-up-to-date",
+    ),
+}
+
+# GitHub Actions silently drops inline diff annotations beyond ~10 per step.
+# We cap at 20 total across all validate.py checks so the most important issues
+# from each category remain visible in the PR diff.  The complete list is always
+# in the job log and (when --output is used) the JSON artifact.
+_GHA_ANNOTATION_CAP = 20
+
+
+# ---------------------------------------------------------------------------
+# Error dict helpers
+# ---------------------------------------------------------------------------
+
+# Each check function returns list[dict] where every dict has:
+#   file    — repo-relative file path (str); empty string for project-level issues
+#   line    — 1-based line number (int); 0 for file- or project-level issues
+#   message — human-readable description (str)
+
+
+def _err(file: str, line: int, message: str) -> dict:
+    return {"file": file, "line": line, "message": message}
+
+
+def _line_of(content: str, match_start: int) -> int:
+    """Return the 1-based line number for a byte offset in *content*."""
+    return content[:match_start].count("\n") + 1
+
+
+# ---------------------------------------------------------------------------
+# GHA annotation helpers
+# ---------------------------------------------------------------------------
+
+
+def _gha_annotation(
+    level: str, title: str, message: str, file: str = "", line: int = 0
+) -> None:
+    """Emit a GitHub Actions workflow command annotation.
+
+    When *file* and *line* are provided the annotation is anchored to that
+    source location so it appears inline in the PR diff view.
+    """
+    for attr in ("message", "title", "file"):
+        locals()[attr]  # reference to avoid lint unused-var
+    message = message.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
+    title = title.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
+    file = file.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
+    if file and line:
+        print(f"::{level} file={file},line={line},title={title}::{message}")
+    else:
+        print(f"::{level} title={title}::{message}")
+
+
+# ---------------------------------------------------------------------------
+# Validation checks
+# ---------------------------------------------------------------------------
 
-def validate_source_links(docs_dir: Path, version: str) -> tuple[int, list[str]]:
-    """Validate GitHub source links.
+
+def validate_source_links(docs_dir: Path, version: str) -> tuple[int, list[dict]]:
+    """Validate GitHub source links in generated MDX files.
 
     Args:
-        docs_dir: Directory containing MDX files
-        version: Expected version in links (e.g., "0.5.0")
+        docs_dir: Directory containing MDX files.
+        version: Expected version string in links (e.g., ``"0.5.0"``).
 
     Returns:
-        Tuple of (error_count, error_messages)
+        Tuple of (error_count, errors) where each error dict has keys
+        ``file``, ``line``, and ``message``.
     """
-    errors = []
+    errors: list[dict] = []
     expected_repo = "ibm-granite/mellea"
     expected_pattern = f"https://github.com/{expected_repo}/blob/v{version}/"
+    link_re = re.compile(r"\[View source\]\((https://github\.com/[^)]+)\)")
 
-    for mdx_file in docs_dir.rglob("*.mdx"):
+    for mdx_file in sorted(docs_dir.rglob("*.mdx")):
         content = mdx_file.read_text()
-
-        # Find all GitHub links
-        link_pattern = r"\[View source\]\((https://github\.com/[^)]+)\)"
-        for match in re.finditer(link_pattern, content):
-            link = match.group(1)
-            if not link.startswith(expected_pattern):
-                rel_path = mdx_file.relative_to(docs_dir)
-                errors.append(
-                    f"{rel_path}: Invalid source link: {link}\n"
-                    f"  Expected to start with: {expected_pattern}"
-                )
+        rel = str(mdx_file.relative_to(docs_dir))
+        for line_num, line in enumerate(content.splitlines(), 1):
+            for match in link_re.finditer(line):
+                link = match.group(1)
+                if not link.startswith(expected_pattern):
+                    errors.append(
+                        _err(
+                            rel,
+                            line_num,
+                            f"Invalid source link: {link}  (expected prefix: {expected_pattern})",
+                        )
+                    )
 
     return len(errors), errors
 
@@ -51,15 +162,12 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]:
     """Validate API coverage meets threshold.
 
     Args:
-        docs_dir: Directory containing MDX files
-        threshold: Minimum coverage percentage (0-100)
+        docs_dir: Directory containing MDX files.
+        threshold: Minimum coverage percentage (0-100).
 
     Returns:
-        Tuple of (passed, coverage_report)
+        Tuple of (passed, coverage_report).
     """
-    # Import audit_coverage functionality
-    import sys
-
     sys.path.insert(0, str(Path(__file__).parent))
 
     try:
@@ -72,8 +180,7 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]:
     except ImportError:
         return False, {"error": "audit_coverage.py not found"}
 
-    # Run coverage audit
-    source_dir = docs_dir.parent.parent.parent  # Go up to project root
+    source_dir = docs_dir.parent.parent.parent
     mellea_symbols = discover_public_symbols(source_dir / "mellea", "mellea")
     cli_symbols = discover_public_symbols(source_dir / "cli", "cli")
     cli_commands = discover_cli_commands(source_dir / "cli")
@@ -81,179 +188,162 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]:
 
     all_symbols = {**mellea_symbols, **cli_symbols}
     report = generate_coverage_report(all_symbols, documented, cli_commands)
+    return report["coverage_percentage"] >= threshold, report
 
-    passed = report["coverage_percentage"] >= threshold
-    return passed, report
 
+def validate_mdx_syntax(docs_dir: Path) -> tuple[int, list[dict]]:
+    """Validate MDX syntax in generated documentation files.
 
-def validate_mdx_syntax(docs_dir: Path) -> tuple[int, list[str]]:
-    """Validate MDX syntax.
+    Checks for unclosed code fences, unescaped curly braces inside code
+    blocks, missing frontmatter, and a missing ``title`` field.
 
     Args:
-        docs_dir: Directory containing MDX files
+        docs_dir: Directory containing MDX files.
 
     Returns:
-        Tuple of (error_count, error_messages)
+        Tuple of (error_count, errors).
     """
-    errors = []
+    errors: list[dict] = []
+    fence_re = re.compile(r"^```")
 
-    for mdx_file in docs_dir.rglob("*.mdx"):
+    for mdx_file in sorted(docs_dir.rglob("*.mdx")):
         content = mdx_file.read_text()
-        rel_path = mdx_file.relative_to(docs_dir)
+        rel = str(mdx_file.relative_to(docs_dir))
+        lines = content.splitlines()
 
-        # Check for unclosed code blocks
-        code_block_count = content.count("```")
-        if code_block_count % 2 != 0:
-            errors.append(f"{rel_path}: Unclosed code block (odd number of ```)")
+        # Unclosed code fence (odd number of ``` in the whole file)
+        if content.count("```") % 2 != 0:
+            errors.append(_err(rel, 0, "Unclosed code block (odd number of ```)"))
 
-        # Check for unescaped curly braces in code blocks
-        lines = content.splitlines()
+        # Unescaped curly braces inside code blocks
         in_code_block = False
-        code_fence_pattern = re.compile(r"^```")
-
         for line_num, line in enumerate(lines, 1):
-            if code_fence_pattern.match(line):
+            if fence_re.match(line):
                 in_code_block = not in_code_block
                 continue
-
-            if in_code_block:
-                # Check for sequences of { or } that have odd length (indicating unescaped)
-                # Properly escaped: {{ or }} (even length)
-                # Unescaped: { or } or {{{ or }}} (odd length)
-
-                # Find all sequences of consecutive open braces
-                for match in re.finditer(r"\{+", line):
-                    brace_seq = match.group()
-                    if len(brace_seq) % 2 != 0:  # Odd number = unescaped
-                        errors.append(
-                            f"{rel_path}:{line_num}: Unescaped curly brace in code block (found {len(brace_seq)} consecutive '{{' - should be even)\n"
-                            f"  Line: {line.strip()[:80]}"
-                        )
-                        break  # Only report once per line
-
-                # Find all sequences of consecutive close braces
-                for match in re.finditer(r"\}+", line):
-                    brace_seq = match.group()
-                    if len(brace_seq) % 2 != 0:  # Odd number = unescaped
+            if not in_code_block:
+                continue
+            for pattern, brace in ((r"\{+", "{"), (r"\}+", "}")):
+                for match in re.finditer(pattern, line):
+                    seq = match.group()
+                    if len(seq) % 2 != 0:
                         errors.append(
-                            f"{rel_path}:{line_num}: Unescaped curly brace in code block (found {len(brace_seq)} consecutive '}}' - should be even)\n"
-                            f"  Line: {line.strip()[:80]}"
+                            _err(
+                                rel,
+                                line_num,
+                                f"Unescaped '{brace}' in code block: "
+                                f"{len(seq)} consecutive '{brace}' (must be even) — "
+                                f"line: {line.strip()[:80]}",
+                            )
                         )
-                        break  # Only report once per line
+                        break
 
-        # Check for frontmatter
+        # Frontmatter presence and structure
         if not content.startswith("---\n"):
-            errors.append(f"{rel_path}: Missing frontmatter")
-
-        # Check for required frontmatter fields
-        if content.startswith("---\n"):
-            frontmatter_end = content.find("\n---\n", 4)
-            if frontmatter_end == -1:
-                errors.append(f"{rel_path}: Malformed frontmatter (no closing ---)")
-            else:
-                frontmatter = content[4:frontmatter_end]
-                if "title:" not in frontmatter:
-                    errors.append(f"{rel_path}: Missing 'title' in frontmatter")
+            errors.append(
+                _err(rel, 1, "Missing frontmatter (file must start with ---)")
+            )
+        else:
+            end = content.find("\n---\n", 4)
+            if end == -1:
+                errors.append(_err(rel, 1, "Malformed frontmatter (no closing ---)"))
+            elif "title:" not in content[4:end]:
+                errors.append(_err(rel, 1, "Missing 'title' field in frontmatter"))
 
     return len(errors), errors
 
 
-def validate_internal_links(docs_dir: Path) -> tuple[int, list[str]]:
-    """Validate internal links point to existing files.
+def validate_internal_links(docs_dir: Path) -> tuple[int, list[dict]]:
+    """Validate that relative internal links resolve to existing files.
 
     Args:
-        docs_dir: Directory containing MDX files
+        docs_dir: Directory containing MDX files.
 
     Returns:
-        Tuple of (error_count, error_messages)
+        Tuple of (error_count, errors).
     """
-    errors = []
+    errors: list[dict] = []
+    link_re = re.compile(r"\[([^\]]+)\]\(([^)]+)\)", re.DOTALL)
 
-    for mdx_file in docs_dir.rglob("*.mdx"):
+    for mdx_file in sorted(docs_dir.rglob("*.mdx")):
         content = mdx_file.read_text()
-        rel_path = mdx_file.relative_to(docs_dir)
+        rel = str(mdx_file.relative_to(docs_dir))
 
-        # Find relative links (not starting with http)
-        # Use DOTALL to handle multiline links
-        link_pattern = r"\[([^\]]+)\]\(([^)]+)\)"
-        for match in re.finditer(link_pattern, content, re.DOTALL):
+        for match in link_re.finditer(content):
             link_text, link_url = match.groups()
-
-            # Strip whitespace from URL (handles multiline links)
             link_url = link_url.strip()
 
-            # Skip external links
             if link_url.startswith(("http://", "https://", "#")):
                 continue
 
-            # Split anchor from path (e.g., "base#class-component" -> "base", "class-component")
-            if "#" in link_url:
-                file_path, _ = link_url.split("#", 1)  # anchor not used
-            else:
-                file_path = link_url
-
-            # Resolve relative link - add .mdx extension if not present
-            if file_path and not file_path.endswith(".mdx"):
-                file_path = f"{file_path}.mdx"
-
-            target = (mdx_file.parent / file_path).resolve()
+            file_part = link_url.split("#", 1)[0] if "#" in link_url else link_url
+            if file_part and not file_part.endswith(".mdx"):
+                file_part = f"{file_part}.mdx"
 
+            target = (mdx_file.parent / file_part).resolve()
             if not target.exists():
                 errors.append(
-                    f"{rel_path}: Broken link to '{link_url}' (text: '{link_text}')"
+                    _err(
+                        rel,
+                        _line_of(content, match.start()),
+                        f"Broken link to '{link_url}' (text: '{link_text}')",
+                    )
                 )
 
     return len(errors), errors
 
 
-def validate_anchor_collisions(docs_dir: Path) -> tuple[int, list[str]]:
-    """Check for anchor collisions within files.
+def validate_anchor_collisions(docs_dir: Path) -> tuple[int, list[dict]]:
+    """Check for heading anchor collisions within MDX files.
 
     Args:
-        docs_dir: Directory containing MDX files
+        docs_dir: Directory containing MDX files.
 
     Returns:
-        Tuple of (error_count, error_messages)
+        Tuple of (error_count, errors).
     """
-    errors = []
+    errors: list[dict] = []
 
-    # Import mintlify_anchor function
     sys.path.insert(0, str(Path(__file__).parent))
     try:
         from test_mintlify_anchors import mintlify_anchor
     except ImportError:
-        # Fallback implementation
-        def mintlify_anchor(heading: str) -> str:
+
+        def mintlify_anchor(heading: str) -> str:  # type: ignore[misc]
             anchor = heading.lower().replace(" ", "-")
             anchor = re.sub(r"[^a-z0-9-]", "", anchor)
-            anchor = re.sub(r"-+", "-", anchor)
-            return anchor.strip("-")
+            return re.sub(r"-+", "-", anchor).strip("-")
 
-    for mdx_file in docs_dir.rglob("*.mdx"):
-        content = mdx_file.read_text()
-        rel_path = mdx_file.relative_to(docs_dir)
+    heading_re = re.compile(r"^#+\s+(.+)$", re.MULTILINE)
 
-        # Extract all headings
-        heading_pattern = r"^#+\s+(.+)$"
-        headings = re.findall(heading_pattern, content, re.MULTILINE)
+    for mdx_file in sorted(docs_dir.rglob("*.mdx")):
+        content = mdx_file.read_text()
+        rel = str(mdx_file.relative_to(docs_dir))
 
-        # Generate anchors
-        anchors: dict[str, list[str]] = {}
-        for heading in headings:
+        # anchors maps anchor -> (heading_text, line_num) for the first occurrence
+        seen: dict[str, tuple[str, int]] = {}
+        for match in heading_re.finditer(content):
+            heading = match.group(1)
+            line_num = _line_of(content, match.start())
             anchor = mintlify_anchor(heading)
-            if anchor in anchors:
+            if anchor in seen:
+                prev_heading, prev_line = seen[anchor]
                 errors.append(
-                    f"{rel_path}: Anchor collision '{anchor}' from headings:\n"
-                    f"  1. {anchors[anchor]}\n"
-                    f"  2. {heading}"
+                    _err(
+                        rel,
+                        line_num,
+                        f"Anchor collision '{anchor}': "
+                        f"'{prev_heading}' (line {prev_line}) and "
+                        f"'{heading}' (line {line_num}) produce the same anchor",
+                    )
                 )
             else:
-                anchors[anchor] = heading
+                seen[anchor] = (heading, line_num)
 
     return len(errors), errors
 
 
-def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[str]]:
+def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[dict]]:
     """Scan Python source files for RST double-backtick notation in docstrings.
 
     RST-style ``Symbol`` double-backtick markup interacts badly with the
@@ -263,34 +353,35 @@ def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[str]]:
     rather than a clickable link.
 
     Args:
-        source_dir: Root of the Python source tree to scan (e.g. repo/mellea)
+        source_dir: Root of the Python source tree to scan (e.g. repo/mellea).
 
     Returns:
-        Tuple of (error_count, error_messages)
+        Tuple of (error_count, errors).
     """
-    errors = []
-    # Match ``Word`` where Word starts with a letter (RST inline literal)
+    errors: list[dict] = []
     pattern = re.compile(r"``([A-Za-z][^`]*)``")
 
-    for py_file in source_dir.rglob("*.py"):
+    for py_file in sorted(source_dir.rglob("*.py")):
         try:
             content = py_file.read_text(encoding="utf-8")
         except Exception:
             continue
-
+        rel = str(py_file.relative_to(source_dir.parent))
         for line_num, line in enumerate(content.splitlines(), 1):
             if pattern.search(line):
-                rel = py_file.relative_to(source_dir.parent)
                 errors.append(
-                    f"{rel}:{line_num}: RST double-backtick notation — "
-                    f"use single backticks for Markdown/MDX compatibility\n"
-                    f"  {line.strip()[:100]}"
+                    _err(
+                        rel,
+                        line_num,
+                        f"RST double-backtick notation — use single backticks for "
+                        f"Markdown/MDX compatibility: {line.strip()[:100]}",
+                    )
                 )
 
     return len(errors), errors
 
 
-def validate_stale_files(docs_root: Path) -> tuple[int, list[str]]:
+def validate_stale_files(docs_root: Path) -> tuple[int, list[dict]]:
     """Check for stale files that should have been cleaned up.
 
     Detects review artifacts, superseded files, and other content that
@@ -300,30 +391,29 @@ def validate_stale_files(docs_root: Path) -> tuple[int, list[str]]:
         docs_root: The ``docs/`` directory (parent of ``docs/docs/``).
 
     Returns:
-        Tuple of (error_count, error_messages).
+        Tuple of (error_count, errors).
     """
-    errors = []
+    errors: list[dict] = []
 
-    # Review tracker artifacts (e.g. PR601-REVIEW.md)
     for f in docs_root.glob("*REVIEW*"):
-        errors.append(f"Stale review artifact: {f.relative_to(docs_root)}")
+        errors.append(_err(str(f.relative_to(docs_root)), 0, "Stale review artifact"))
 
-    # Superseded landing page: docs/index.md replaced by docs/docs/index.mdx
     old_index = docs_root / "index.md"
     new_index = docs_root / "docs" / "index.mdx"
     if old_index.exists() and new_index.exists():
-        errors.append("Stale file: index.md — superseded by docs/index.mdx")
+        errors.append(_err("index.md", 0, "Stale file — superseded by docs/index.mdx"))
 
-    # Legacy tutorial: docs/tutorial.md replaced by docs/docs/tutorials/
     old_tutorial = docs_root / "tutorial.md"
     tutorials_dir = docs_root / "docs" / "tutorials"
     if old_tutorial.exists() and tutorials_dir.is_dir():
-        errors.append("Stale file: tutorial.md — superseded by docs/tutorials/")
+        errors.append(
+            _err("tutorial.md", 0, "Stale file — superseded by docs/tutorials/")
+        )
 
     return len(errors), errors
 
 
-def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[str]]:
+def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[dict]]:
     """Check that every example directory is listed in the examples index page.
 
     Scans ``docs/examples/`` for subdirectories that contain at least one
@@ -334,45 +424,45 @@ def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[str]]:
         docs_root: The ``docs/`` directory (parent of ``docs/docs/``).
 
     Returns:
-        Tuple of (error_count, error_messages).
+        Tuple of (error_count, errors).
     """
-    errors: list[str] = []
+    errors: list[dict] = []
     examples_dir = docs_root / "examples"
     index_file = docs_root / "docs" / "examples" / "index.md"
 
     if not examples_dir.is_dir():
         return 0, []
     if not index_file.exists():
-        errors.append("Examples index page not found: docs/docs/examples/index.md")
+        errors.append(
+            _err("docs/docs/examples/index.md", 0, "Examples index page not found")
+        )
         return len(errors), errors
 
     index_content = index_file.read_text()
-
-    # Directories to skip: not standalone example categories
     skip = {"__pycache__", "helper"}
 
     for child in sorted(examples_dir.iterdir()):
-        if not child.is_dir():
+        if not child.is_dir() or child.name in skip or child.name.startswith("."):
             continue
-        if child.name in skip or child.name.startswith("."):
-            continue
-        # Only check directories that contain at least one .py file
         if not any(child.rglob("*.py")):
             continue
-        # Check the directory name appears in the index (as a table entry or heading)
         if (
             f"`{child.name}/" not in index_content
             and f"`{child.name}`" not in index_content
         ):
             errors.append(
-                f"Example directory '{child.name}/' is not listed in "
-                f"docs/docs/examples/index.md"
+                _err(
+                    f"docs/examples/{child.name}/",
+                    0,
+                    f"Example directory '{child.name}/' is not listed in "
+                    f"docs/docs/examples/index.md",
+                )
             )
 
     return len(errors), errors
 
 
-def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]:
+def validate_doc_imports(docs_dir: Path) -> tuple[int, list[dict]]:
     """Verify that mellea imports in documentation code blocks still resolve.
 
     Parses fenced Python code blocks in static docs for ``from mellea.X import Y``
@@ -384,21 +474,21 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]:
         docs_dir: The ``docs/docs/`` directory containing static documentation.
 
     Returns:
-        Tuple of (error_count, error_messages).
+        Tuple of (error_count, errors).
     """
     import importlib
 
-    errors: list[str] = []
+    errors: list[dict] = []
     seen: set[tuple[str, str]] = set()
 
     for doc_file in sorted(
         list(docs_dir.rglob("*.md")) + list(docs_dir.rglob("*.mdx"))
     ):
         content = doc_file.read_text()
-        rel = doc_file.relative_to(docs_dir)
+        rel = str(doc_file.relative_to(docs_dir))
         in_python = False
 
-        for line in content.splitlines():
+        for line_num, line in enumerate(content.splitlines(), 1):
             stripped = line.strip()
             if stripped.startswith("```python"):
                 in_python = True
@@ -427,16 +517,17 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]:
                     try:
                         mod = importlib.import_module(module)
                     except ImportError:
-                        # Optional dependency not installed — skip
                         continue
                     if not hasattr(mod, name):
-                        # Could be a submodule (e.g. from pkg import submod)
                         try:
                             importlib.import_module(f"{module}.{name}")
                         except ImportError:
                             errors.append(
-                                f"{rel}: from {module} import {name}"
-                                f" — symbol not found in module"
+                                _err(
+                                    rel,
+                                    line_num,
+                                    f"from {module} import {name} — symbol not found in module",
+                                )
                             )
                 continue
 
@@ -456,29 +547,44 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]:
     return len(errors), errors
 
 
+# ---------------------------------------------------------------------------
+# Report assembly
+# ---------------------------------------------------------------------------
+
+
 def generate_report(
-    source_link_errors: list[str],
+    source_link_errors: list[dict],
     coverage_passed: bool,
     coverage_report: dict,
-    mdx_errors: list[str],
-    link_errors: list[str],
-    anchor_errors: list[str],
-    rst_docstring_errors: list[str] | None = None,
-    stale_errors: list[str] | None = None,
-    import_errors: list[str] | None = None,
-    examples_catalogue_errors: list[str] | None = None,
+    mdx_errors: list[dict],
+    link_errors: list[dict],
+    anchor_errors: list[dict],
+    rst_docstring_errors: list[dict] | None = None,
+    stale_errors: list[dict] | None = None,
+    import_errors: list[dict] | None = None,
+    examples_catalogue_errors: list[dict] | None = None,
 ) -> dict:
-    """Generate validation report.
+    """Assemble the full validation report.
+
+    Args:
+        source_link_errors: Errors from :func:`validate_source_links`.
+        coverage_passed: Whether coverage met the threshold.
+        coverage_report: Raw coverage report dict.
+        mdx_errors: Errors from :func:`validate_mdx_syntax`.
+        link_errors: Errors from :func:`validate_internal_links`.
+        anchor_errors: Errors from :func:`validate_anchor_collisions`.
+        rst_docstring_errors: Errors from :func:`validate_rst_docstrings`.
+        stale_errors: Errors from :func:`validate_stale_files`.
+        import_errors: Errors from :func:`validate_doc_imports`.
+        examples_catalogue_errors: Errors from :func:`validate_examples_catalogue`.
 
     Returns:
         Report dictionary with all validation results.
     """
-    if stale_errors is None:
-        stale_errors = []
-    if import_errors is None:
-        import_errors = []
-    if examples_catalogue_errors is None:
-        examples_catalogue_errors = []
+    rst_docstring_errors = rst_docstring_errors or []
+    stale_errors = stale_errors or []
+    import_errors = import_errors or []
+    examples_catalogue_errors = examples_catalogue_errors or []
 
     return {
         "source_links": {
@@ -509,9 +615,9 @@ def generate_report(
             "errors": anchor_errors,
         },
         "rst_docstrings": {
-            "passed": len(rst_docstring_errors or []) == 0,
-            "error_count": len(rst_docstring_errors or []),
-            "errors": rst_docstring_errors or [],
+            "passed": len(rst_docstring_errors) == 0,
+            "error_count": len(rst_docstring_errors),
+            "errors": rst_docstring_errors,
         },
         "stale_files": {
             "passed": len(stale_errors) == 0,
@@ -537,11 +643,72 @@ def generate_report(
             and len(stale_errors) == 0
             and len(import_errors) == 0
             and len(examples_catalogue_errors) == 0
-            # rst_docstrings is a warning only — does not fail the build
+            # rst_docstrings is warning-only — does not fail the build
         ),
     }
 
 
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+
+def _icon(passed: bool) -> str:
+    return "✅" if passed else "❌"
+
+
+def _print_check_errors(label: str, errors: list[dict], gha_budget: list[int]) -> None:
+    """Print a grouped error block for one check to stdout and emit GHA annotations.
+
+    GHA annotations are emitted until the shared *gha_budget* counter (a
+    single-element list used as a mutable int) reaches :data:`_GHA_ANNOTATION_CAP`.
+    This prevents later check categories from being invisible because an earlier
+    category consumed the entire annotation budget.  The full error list is
+    always printed to stdout regardless of the cap.
+
+    Args:
+        label: Human-readable check name shown in the section header.
+        errors: List of error dicts with keys ``file``, ``line``, ``message``.
+        gha_budget: Single-element list ``[remaining_annotations]``; decremented
+            in-place as annotations are emitted.
+    """
+    if not errors:
+        return
+    fix_hint, ref_url = _CHECK_FIX_HINTS.get(label, ("", ""))
+    print(f"\n{'─' * 50}")
+    print(f"  {label} ({len(errors)} error(s))")
+    if fix_hint:
+        print(f"  Fix: {fix_hint}")
+    if ref_url:
+        print(f"  Ref: {ref_url}")
+    print(f"{'─' * 50}")
+    capped = 0
+    for e in errors:
+        print(f"  {e['file'] or '—'}{':' + str(e['line']) if e.get('line') else ''}")
+        print(f"    {e['message']}")
+        if _IN_GHA and gha_budget[0] > 0:
+            _gha_annotation(
+                "error",
+                label,
+                e["message"],
+                file=e.get("file", ""),
+                line=e.get("line", 0),
+            )
+            gha_budget[0] -= 1
+        elif _IN_GHA and gha_budget[0] == 0:
+            capped += 1
+    if capped:
+        print(
+            f"  (GHA annotations exhausted — {capped} more error(s) in job log "
+            f"and JSON artifact; total cap is {_GHA_ANNOTATION_CAP})"
+        )
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+
+
 def main():
     parser = argparse.ArgumentParser(description="Validate API documentation")
     parser.add_argument("docs_dir", help="Directory containing generated MDX files")
@@ -568,6 +735,11 @@ def main():
         "--docs-root",
         help="Root docs/ directory for stale-file checks (default: parent of docs_dir)",
     )
+    parser.add_argument(
+        "--warn-only",
+        action="store_true",
+        help="Print issues but always exit 0 (pre-commit informational mode)",
+    )
     args = parser.parse_args()
 
     docs_dir = Path(args.docs_dir)
@@ -582,7 +754,7 @@ def main():
     print(f"   Coverage threshold: {args.coverage_threshold}%")
     print()
 
-    # Run validations
+    # Run all checks
     print("Checking GitHub source links...")
     _, source_link_errors = (
         validate_source_links(docs_dir, args.version) if args.version else (0, [])
@@ -604,7 +776,7 @@ def main():
     print("Checking anchor collisions...")
     _, anchor_errors = validate_anchor_collisions(docs_dir)
 
-    rst_docstring_errors: list[str] = []
+    rst_docstring_errors: list[dict] = []
     if args.source_dir:
         print("Checking source docstrings for RST double-backtick notation...")
         _, rst_docstring_errors = validate_rst_docstrings(args.source_dir)
@@ -620,7 +792,7 @@ def main():
     print("Checking examples catalogue...")
     _, examples_catalogue_errors = validate_examples_catalogue(docs_root)
 
-    # Generate report
+    # Assemble report
     report = generate_report(
         source_link_errors,
         coverage_passed,
@@ -634,88 +806,77 @@ def main():
         examples_catalogue_errors,
     )
 
-    # Print results
+    # Summary table
     print("\n" + "=" * 60)
     print("Validation Results")
     print("=" * 60)
+    checks = [
+        ("Source links", report["source_links"]["passed"]),
+        ("Coverage", report["coverage"]["passed"]),
+        ("MDX syntax", report["mdx_syntax"]["passed"]),
+        ("Internal links", report["internal_links"]["passed"]),
+        ("Anchor collisions", report["anchor_collisions"]["passed"]),
+        ("Stale files", report["stale_files"]["passed"]),
+        ("Doc imports", report["doc_imports"]["passed"]),
+        ("Examples catalogue", report["examples_catalogue"]["passed"]),
+    ]
+    if args.source_dir:
+        checks.append(("RST docstrings (warning)", report["rst_docstrings"]["passed"]))
 
-    print(f"✅ Source links: {'PASS' if report['source_links']['passed'] else 'FAIL'}")
-    if not report["source_links"]["passed"]:
-        print(f"   {report['source_links']['error_count']} errors found")
+    for label, passed in checks:
+        print(f"{_icon(passed)} {label}: {'PASS' if passed else 'FAIL'}")
 
-    print(f"✅ Coverage: {'PASS' if report['coverage']['passed'] else 'FAIL'}")
     if not args.skip_coverage:
+        cov = report["coverage"]
         print(
-            f"   {report['coverage']['percentage']}% "
-            f"({report['coverage']['documented_symbols']}/{report['coverage']['total_symbols']} symbols)"
+            f"   Coverage: {cov['percentage']}%"
+            f" ({cov['documented_symbols']}/{cov['total_symbols']} symbols)"
         )
 
-    print(f"✅ MDX syntax: {'PASS' if report['mdx_syntax']['passed'] else 'FAIL'}")
-    if not report["mdx_syntax"]["passed"]:
-        print(f"   {report['mdx_syntax']['error_count']} errors found")
-
-    print(
-        f"✅ Internal links: {'PASS' if report['internal_links']['passed'] else 'FAIL'}"
-    )
-    if not report["internal_links"]["passed"]:
-        print(f"   {report['internal_links']['error_count']} errors found")
-
-    print(
-        f"✅ Anchor collisions: {'PASS' if report['anchor_collisions']['passed'] else 'FAIL'}"
-    )
-    if not report["anchor_collisions"]["passed"]:
-        print(f"   {report['anchor_collisions']['error_count']} errors found")
-
-    if args.source_dir:
-        print(
-            f"✅ RST docstrings: {'PASS' if report['rst_docstrings']['passed'] else 'FAIL'}"
-        )
-        if not report["rst_docstrings"]["passed"]:
-            print(f"   {report['rst_docstrings']['error_count']} occurrences found")
-
-    print(f"✅ Stale files: {'PASS' if report['stale_files']['passed'] else 'FAIL'}")
-    if not report["stale_files"]["passed"]:
-        print(f"   {report['stale_files']['error_count']} errors found")
-
-    print(f"✅ Doc imports: {'PASS' if report['doc_imports']['passed'] else 'FAIL'}")
-    if not report["doc_imports"]["passed"]:
-        print(f"   {report['doc_imports']['error_count']} errors found")
-
+    print("\n" + "=" * 60)
     print(
-        f"✅ Examples catalogue: {'PASS' if report['examples_catalogue']['passed'] else 'FAIL'}"
+        f"Overall: {_icon(report['overall_passed'])} {'PASS' if report['overall_passed'] else 'FAIL'}"
     )
-    if not report["examples_catalogue"]["passed"]:
-        print(f"   {report['examples_catalogue']['error_count']} errors found")
-
-    print("\n" + "=" * 60)
-    print(f"Overall: {'✅ PASS' if report['overall_passed'] else '❌ FAIL'}")
     print("=" * 60)
 
-    # Print detailed errors
-    if not report["overall_passed"]:
-        print("\nDetailed Errors:")
-        all_errors = (
-            source_link_errors
-            + mdx_errors
-            + link_errors
-            + anchor_errors
-            + rst_docstring_errors
-            + stale_errors
-            + import_errors
-            + examples_catalogue_errors
-        )
-        for error in all_errors:
-            print(f"  • {error}")
+    # Detailed errors — grouped by check
+    error_sections = [
+        ("Source links", source_link_errors),
+        ("MDX syntax", mdx_errors),
+        ("Internal links", link_errors),
+        ("Anchor collisions", anchor_errors),
+        ("RST docstrings", rst_docstring_errors),
+        ("Stale files", stale_errors),
+        ("Doc imports", import_errors),
+        ("Examples catalogue", examples_catalogue_errors),
+    ]
+    any_errors = any(errs for _, errs in error_sections)
+    if any_errors:
+        print("\nDetailed errors:")
+        gha_budget = [_GHA_ANNOTATION_CAP]
+        for label, errs in error_sections:
+            _print_check_errors(label, errs, gha_budget)
+
+    # GHA summary annotation
+    if _IN_GHA:
+        if report["overall_passed"]:
+            _gha_annotation("notice", "MDX Validation", "All checks passed")
+        else:
+            total_errors = sum(len(e) for _, e in error_sections)
+            _gha_annotation(
+                "error",
+                "MDX Validation",
+                f"{total_errors} error(s) found across {sum(1 for _, e in error_sections if e)} check(s) — see job summary for details",
+            )
 
-    # Save report
+    # Save JSON report
     if args.output:
         output_path = Path(args.output)
         output_path.parent.mkdir(parents=True, exist_ok=True)
         output_path.write_text(json.dumps(report, indent=2))
         print(f"\n📄 Report saved to {output_path}")
 
-    # Exit with appropriate code
-    sys.exit(0 if report["overall_passed"] else 1)
+    sys.exit(0 if (report["overall_passed"] or args.warn_only) else 1)
 
 
 if __name__ == "__main__":