diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml index 77ad0f4a5..f8694702f 100644 --- a/.github/workflows/docs-publish.yml +++ b/.github/workflows/docs-publish.yml @@ -105,12 +105,31 @@ jobs: id: audit_coverage run: | set -o pipefail - uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 --quality 2>&1 \ + uv run python tooling/docs-autogen/audit_coverage.py --docs-dir docs/docs/api --threshold 80 2>&1 \ | tee /tmp/audit_coverage.log continue-on-error: ${{ inputs.strict_validation != true }} + - name: Docstring quality gate + id: quality_gate + run: | + set -o pipefail + uv run python tooling/docs-autogen/audit_coverage.py \ + --docs-dir docs/docs/api \ + --quality --fail-on-quality --threshold 100 \ + --orphans \ + --output /tmp/quality_report.json 2>&1 \ + | tee /tmp/quality_gate.log + # -- Upload artifact for deploy job -------------------------------------- + - name: Upload quality report + if: always() + uses: actions/upload-artifact@v7 + with: + name: docstring-quality-report + path: /tmp/quality_report.json + retention-days: 30 + - name: Upload docs artifact if: success() || (inputs.strict_validation != true) uses: actions/upload-artifact@v7 @@ -141,12 +160,11 @@ jobs: markdownlint_outcome = "${{ steps.markdownlint.outcome }}" validate_outcome = "${{ steps.validate_mdx.outcome }}" coverage_outcome = "${{ steps.audit_coverage.outcome }}" - strict = "${{ inputs.strict_validation }}" == "true" - mode = "" if strict else " *(soft-fail)*" - + quality_gate_outcome = "${{ steps.quality_gate.outcome }}" lint_log = read_log("/tmp/markdownlint.log") validate_log = read_log("/tmp/validate_mdx.log") coverage_log = read_log("/tmp/audit_coverage.log") + quality_gate_log = read_log("/tmp/quality_gate.log") # Count markdownlint issues (lines matching file:line:col format) lint_issues = len([l for l in lint_log.splitlines() if re.match(r'.+:\d+:\d+ ', l)]) @@ -186,45 +204,81 @@ jobs: mdx_detail = parse_validate_detail(validate_log) - # Docstring quality annotation emitted by audit_coverage.py into the log - # Format: ::notice title=Docstring quality::message - # or ::warning title=Docstring quality::message - quality_match = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", coverage_log) - if quality_match: - quality_level, quality_msg = quality_match.group(1), quality_match.group(2) - quality_icon = "✅" if quality_level == "notice" else "⚠️" - quality_status = "pass" if quality_level == "notice" else "warning" - quality_detail = re.sub(r"\s*—\s*see job summary.*$", "", quality_msg) - quality_row = f"| Docstring Quality | {quality_icon} {quality_status}{mode} | {quality_detail} |" + # Parse per-kind counts from the quality gate log. + # _print_quality_report emits section headers like: + # " Missing docstrings (12)" + # " Missing Args section (5)" + # Capture label -> count from those lines, then build a compact + # per-kind breakdown for the summary table cell. + kind_short = { + "Missing docstrings": "missing", + "Short docstrings": "short", + "Missing Args section": "no_args", + "Missing Returns section": "no_returns", + "Missing Yields section (generator)": "no_yields", + "Missing Raises section": "no_raises", + "Missing class Args section": "no_class_args", + "Duplicate Args: in class + __init__ (Option C violation)": "dup_init_args", + "Param name mismatches (documented but not in signature)": "param_mismatch", + "TypedDict phantom fields (documented but not declared)": "td_phantom", + "TypedDict undocumented fields (declared but missing from Attributes:)": "td_undoc", + "Missing parameter type annotations (type absent from API docs)": "missing_param_type", + "Missing return type annotations (type absent from API docs)": "missing_return_type", + "Param type mismatch (docstring vs annotation)": "param_type_mismatch", + "Return type mismatch (docstring vs annotation)": "return_type_mismatch", + } + section_re = re.compile(r"^\s{2}(.+?)\s+\((\d+)\)\s*$", re.MULTILINE) + kind_counts = {} + for m in section_re.finditer(quality_gate_log): + label, count = m.group(1), int(m.group(2)) + short = kind_short.get(label) + if short: + kind_counts[short] = count + + if kind_counts: + parts = [f"{v} {k}" for k, v in kind_counts.items()] + quality_gate_detail = ", ".join(parts) else: - quality_row = None + # Fall back to the summary annotation message + qm = re.search(r"::(notice|warning|error) title=Docstring quality::(.+)", quality_gate_log) + quality_gate_detail = re.sub(r"\s*—\s*see job summary.*$", "", qm.group(2)) if qm else "" - # Split coverage log at quality section to avoid duplicate output in collapsibles - quality_start = coverage_log.find("🔬 Running docstring quality") - if quality_start != -1: - quality_log = coverage_log[quality_start:] - coverage_display_log = coverage_log[:quality_start].strip() - else: - quality_log = "" - coverage_display_log = coverage_log + CONTRIB_URL = ( + "https://github.com/generative-computing/mellea/blob/main" + "/docs/docs/guide/CONTRIBUTING.md" + ) + REPO = "${{ github.repository }}" + RUN_ID = "${{ github.run_id }}" + ARTIFACT_URL = f"https://github.com/{REPO}/actions/runs/{RUN_ID}#artifacts" lines = [ "## Docs Build — Validation Summary\n", "| Check | Result | Details |", "|-------|--------|---------|", - f"| Markdownlint | {icon(markdownlint_outcome)} {markdownlint_outcome}{mode} | {lint_detail} |", - f"| MDX Validation | {icon(validate_outcome)} {validate_outcome}{mode} | {mdx_detail} |", - f"| API Coverage | {icon(coverage_outcome)} {coverage_outcome}{mode} | {cov_detail} |", + f"| Markdownlint | {icon(markdownlint_outcome)} {markdownlint_outcome} | {lint_detail} |", + f"| MDX Validation | {icon(validate_outcome)} {validate_outcome} | {mdx_detail} |", + f"| API Coverage | {icon(coverage_outcome)} {coverage_outcome} | {cov_detail} |", + f"| Docstring Quality | {icon(quality_gate_outcome)} {quality_gate_outcome} | {quality_gate_detail} |", ] - if quality_row: - lines.append(quality_row) lines.append("") + # When the quality gate fails, surface a direct link to the fix reference. + # Per-kind Ref: URLs in the log output are inside a ```text``` block and + # don't render as links there. + if quality_gate_outcome == "failure": + lines += [ + "> ❌ **Docstring quality gate failed.** " + f"See the [CI docstring checks reference]({CONTRIB_URL}#ci-docstring-checks-reference) " + "for per-kind fix instructions, or expand **Docstring quality details** below for the full list. \n" + f"> The full machine-readable report is available as the [`docstring-quality-report` artifact]({ARTIFACT_URL}).", + "", + ] + for title, log, limit in [ ("Markdownlint output", lint_log, 5_000), ("MDX validation output", validate_log, 5_000), - ("API coverage output", coverage_display_log, 5_000), - ("Docstring quality details", quality_log, 1_000_000), + ("API coverage output", coverage_log, 5_000), + ("Docstring quality details", quality_gate_log, 1_000_000), ]: if log: lines += [ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 07c61b7d3..97b91a439 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,17 +49,16 @@ repos: hooks: - id: docs-mdx-validate name: Validate generated MDX docs - entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/validate.py docs/docs/api --skip-coverage || true' + entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/validate.py docs/docs/api --skip-coverage --warn-only || true' language: system pass_filenames: false files: (docs/docs/.*\.mdx$|tooling/docs-autogen/) - # TODO(#616): Move to normal commit flow once docstring quality issues reach 0. - # Griffe loads the full package (~10s), so this is manual-only for now to avoid - # slowing down every Python commit. Re-enable (remove stages: [manual]) and add - # --fail-on-quality once quality issues are resolved. + # Docstring quality gate — manual only (CI is the hard gate via docs-publish.yml). + # Run locally with: pre-commit run docs-docstring-quality --hook-stage manual + # Requires generated API docs (run `uv run python tooling/docs-autogen/build.py` first). - id: docs-docstring-quality - name: Audit docstring quality (informational) - entry: bash -c 'test -d docs/docs/api && uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --docs-dir docs/docs/api || true' + name: Audit docstring quality + entry: uv run --no-sync python tooling/docs-autogen/audit_coverage.py --quality --fail-on-quality --threshold 0 --docs-dir docs/docs/api language: system pass_filenames: false files: (mellea/.*\.py$|cli/.*\.py$) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 44032b476..6d7a204d1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -174,6 +174,25 @@ differs in type or behaviour from the constructor input — for example, when a argument is wrapped into a `CBlock`, or when a class-level constant is relevant to callers. Pure-echo entries that repeat `Args:` verbatim should be omitted. +**`TypedDict` classes are a special case.** Their fields *are* the entire public +contract, so when an `Attributes:` section is present it must exactly match the +declared fields. The audit will flag: + +- `typeddict_phantom` — `Attributes:` documents a field that is not declared in the `TypedDict` +- `typeddict_undocumented` — a declared field is absent from the `Attributes:` section + +```python +class ConstraintResult(TypedDict): + """Result of a constraint check. + + Attributes: + passed: Whether the constraint was satisfied. + reason: Human-readable explanation. + """ + passed: bool + reason: str +``` + #### Validating docstrings Run the coverage and quality audit to check your changes before committing: @@ -194,6 +213,8 @@ Key checks the audit enforces: | `no_args` | Standalone function has params but no `Args:` section | | `no_returns` | Function has a non-trivial return annotation but no `Returns:` section | | `param_mismatch` | `Args:` documents names not present in the actual signature | +| `typeddict_phantom` | `TypedDict` `Attributes:` documents a field not declared in the class | +| `typeddict_undocumented` | `TypedDict` has a declared field absent from its `Attributes:` section | **IDE hover verification** — open any of these existing classes in VS Code and hover over the class name or a constructor call to confirm the hover card shows `Args:` once diff --git a/cli/alora/intrinsic_uploader.py b/cli/alora/intrinsic_uploader.py index fe7c52a88..ec159b963 100644 --- a/cli/alora/intrinsic_uploader.py +++ b/cli/alora/intrinsic_uploader.py @@ -40,7 +40,7 @@ def upload_intrinsic( base_model (str): Base model ID or path (e.g. ``"ibm-granite/granite-3.3-2b-instruct"``). Must contain at most one ``"/"`` separator. - type (Literal["lora", "alora"]): Adapter type, used as the leaf + type (Literal['lora', 'alora']): Adapter type, used as the leaf directory name in the repository layout. io_yaml (str): Path to the ``io.yaml`` configuration file for intrinsic input/output processing. diff --git a/cli/alora/train.py b/cli/alora/train.py index bda54ae39..212e0c6fe 100644 --- a/cli/alora/train.py +++ b/cli/alora/train.py @@ -16,7 +16,14 @@ import typer from datasets import Dataset from peft import LoraConfig, get_peft_model -from transformers import AutoModelForCausalLM, AutoTokenizer, TrainerCallback +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + TrainerCallback, + TrainerControl, + TrainerState, + TrainingArguments, +) from trl import DataCollatorForCompletionOnlyLM, SFTConfig, SFTTrainer # Handle MPS with old PyTorch versions on macOS only @@ -39,7 +46,9 @@ ) -def load_dataset_from_json(json_path, tokenizer, invocation_prompt): +def load_dataset_from_json( + json_path: str, tokenizer: AutoTokenizer, invocation_prompt: str +) -> Dataset: """Load a JSONL dataset and format it for SFT training. Reads ``item``/``label`` pairs from a JSONL file and builds a HuggingFace @@ -73,7 +82,7 @@ def load_dataset_from_json(json_path, tokenizer, invocation_prompt): return Dataset.from_dict({"input": inputs, "target": targets}) -def formatting_prompts_func(example): +def formatting_prompts_func(example: dict) -> list[str]: """Concatenate input and target columns for SFT prompt formatting. Args: @@ -101,7 +110,13 @@ class SaveBestModelCallback(TrainerCallback): def __init__(self): self.best_eval_loss = float("inf") - def on_evaluate(self, args, state, control, **kwargs): + def on_evaluate( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + **kwargs, + ): """Save the adapter weights if the current evaluation loss is a new best. Called automatically by the HuggingFace Trainer after each evaluation diff --git a/cli/decompose/decompose.py b/cli/decompose/decompose.py index a7d35e834..63b043f61 100644 --- a/cli/decompose/decompose.py +++ b/cli/decompose/decompose.py @@ -49,6 +49,19 @@ class DecompVersion(StrEnum): def reorder_subtasks( subtasks: list[DecompSubtasksResult], ) -> list[DecompSubtasksResult]: + """Topologically sort subtasks by their ``depends_on`` relationships. + + Args: + subtasks: List of subtask dicts, each with a ``"tag"`` and optional + ``"depends_on"`` field. + + Returns: + list[DecompSubtasksResult]: The subtasks reordered so that dependencies + come before dependents, with numbering prefixes updated. + + Raises: + ValueError: If a circular dependency is detected. + """ subtask_map = {subtask["tag"].lower(): subtask for subtask in subtasks} graph = {} @@ -78,6 +91,19 @@ def reorder_subtasks( def verify_user_variables( decomp_data: DecompPipelineResult, input_var: list[str] | None ) -> DecompPipelineResult: + """Validate that all required input variables and dependencies exist. + + Args: + decomp_data: The decomposition pipeline result containing subtasks. + input_var: User-provided input variable names, or ``None`` for none. + + Returns: + DecompPipelineResult: The (possibly reordered) decomposition data. + + Raises: + ValueError: If a subtask requires an input variable that was not + provided, or depends on a subtask tag that does not exist. + """ if input_var is None: input_var = [] diff --git a/cli/eval/runner.py b/cli/eval/runner.py index 9fdf55ff3..a456df0d6 100644 --- a/cli/eval/runner.py +++ b/cli/eval/runner.py @@ -49,7 +49,7 @@ def __init__( self.score = score self.validation_reason = validation_reason - def to_dict(self): + def to_dict(self) -> dict: """Serialise the input evaluation result to a plain dictionary. Returns: @@ -84,7 +84,7 @@ def __init__(self, test_eval: TestBasedEval, input_results: list[InputEvalResult self.test_eval = test_eval self.input_results = input_results - def to_dict(self): + def to_dict(self) -> dict: """Serialise the test evaluation result to a plain dictionary. Returns: @@ -366,7 +366,7 @@ def execute_test_eval( return test_result -def parse_judge_output(judge_output: str): +def parse_judge_output(judge_output: str) -> tuple[int | None, str]: """Parse score and justification from a judge model's output string. Args: diff --git a/docs/docs/docs.json b/docs/docs/docs.json index 8fb65bf95..c9516bc1b 100644 --- a/docs/docs/docs.json +++ b/docs/docs/docs.json @@ -161,25 +161,14 @@ "pages": [ "api/mellea/backends/backend", "api/mellea/backends/backends", - "api/mellea/backends/bedrock", "api/mellea/backends/cache", - "api/mellea/backends/dummy", - "api/mellea/backends/huggingface", - "api/mellea/backends/kv_block_helpers", - "api/mellea/backends/litellm", "api/mellea/backends/model_ids", "api/mellea/backends/model_options", - "api/mellea/backends/ollama", - "api/mellea/backends/openai", "api/mellea/backends/tools", - "api/mellea/backends/utils", - "api/mellea/backends/vllm", - "api/mellea/backends/watsonx", { "group": "adapters", "pages": [ - "api/mellea/backends/adapters/adapter", - "api/mellea/backends/adapters/catalog" + "api/mellea/backends/adapters/adapter" ] } ] @@ -218,9 +207,6 @@ { "group": "granite3", "pages": [ - "api/mellea/formatters/granite/granite3/constants", - "api/mellea/formatters/granite/granite3/input", - "api/mellea/formatters/granite/granite3/output", "api/mellea/formatters/granite/granite3/types", { "group": "granite32", @@ -244,18 +230,9 @@ "group": "intrinsics", "pages": [ "api/mellea/formatters/granite/intrinsics/input", - "api/mellea/formatters/granite/intrinsics/json_util", "api/mellea/formatters/granite/intrinsics/output", "api/mellea/formatters/granite/intrinsics/util" ] - }, - { - "group": "retrievers", - "pages": [ - "api/mellea/formatters/granite/retrievers/elasticsearch", - "api/mellea/formatters/granite/retrievers/embeddings", - "api/mellea/formatters/granite/retrievers/util" - ] } ] } @@ -274,9 +251,7 @@ "group": "plugins", "pages": [ "api/mellea/plugins/base", - "api/mellea/plugins/context", "api/mellea/plugins/decorators", - "api/mellea/plugins/manager", "api/mellea/plugins/plugins", "api/mellea/plugins/pluginset", "api/mellea/plugins/registry", @@ -305,25 +280,20 @@ "group": "components", "pages": [ "api/mellea/stdlib/components/chat", - "api/mellea/stdlib/components/genslot", "api/mellea/stdlib/components/instruction", "api/mellea/stdlib/components/mify", "api/mellea/stdlib/components/mobject", - "api/mellea/stdlib/components/react", "api/mellea/stdlib/components/simple", - "api/mellea/stdlib/components/unit_test_eval", { "group": "docs", "pages": [ - "api/mellea/stdlib/components/docs/document", - "api/mellea/stdlib/components/docs/richdocument" + "api/mellea/stdlib/components/docs/document" ] }, { "group": "intrinsic", "pages": [ - "api/mellea/stdlib/components/intrinsic/intrinsic", - "api/mellea/stdlib/components/intrinsic/rag" + "api/mellea/stdlib/components/intrinsic/intrinsic" ] } ] @@ -353,8 +323,6 @@ "group": "sampling", "pages": [ "api/mellea/stdlib/sampling/base", - "api/mellea/stdlib/sampling/budget_forcing", - "api/mellea/stdlib/sampling/majority_voting", "api/mellea/stdlib/sampling/sofai", { "group": "sampling_algos", @@ -375,7 +343,6 @@ { "group": "telemetry", "pages": [ - "api/mellea/telemetry/backend_instrumentation", "api/mellea/telemetry/logging", "api/mellea/telemetry/metrics", "api/mellea/telemetry/telemetry", @@ -401,10 +368,7 @@ { "group": "decompose", "pages": [ - "api/cli/decompose/__init__", - "api/cli/decompose/decompose", - "api/cli/decompose/pipeline", - "api/cli/decompose/utils" + "api/cli/decompose/decompose" ] }, { diff --git a/docs/docs/guide/CONTRIBUTING.md b/docs/docs/guide/CONTRIBUTING.md index fd0a58434..de2110b21 100644 --- a/docs/docs/guide/CONTRIBUTING.md +++ b/docs/docs/guide/CONTRIBUTING.md @@ -353,8 +353,62 @@ Add `Attributes:` only when a stored value differs in type or behaviour from the input (e.g. a `str` wrapped into a `CBlock`, or a class-level constant). Pure-echo entries that repeat `Args:` verbatim should be omitted. +**`TypedDict` classes** are a special case — their fields are the entire public contract, +so when an `Attributes:` section is present it must exactly match the declared fields. +The CI audit will fail on phantom fields (documented but not declared) and undocumented +fields (declared but missing from `Attributes:`). + See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the full validation workflow. +### CI docstring checks reference + +The `audit_coverage.py --quality` gate (run in CI on every PR) reports the +following check kinds. If your PR is blocked by this gate, find the check kind +in the table below, follow the fix instructions, and re-push. + +> **Note on PR diff annotations:** GitHub Actions shows inline annotations +> directly on the changed lines in your PR diff. These are capped at **10 per +> check category** to ensure every category gets at least one visible marker. +> If there are more issues than the cap, a `"... and N more"` notice appears in +> the job log. The **complete list** of issues is always in the full job log +> (expand the "Docstring quality gate" step) and in the +> `docstring-quality-report` artifact (JSON) attached to the workflow run. + +#### Missing or short docstrings + +| Check kind | What it means | How to fix | +| ---------- | ------------- | ---------- | +| `missing` | No docstring present on the symbol | Add a Google-style one-line summary sentence | +| `short` | Docstring has fewer than 5 words | Expand the summary — describe what the function/class does and why | + +#### Args, Returns, Yields, and Raises + +| Check kind | What it means | How to fix | +| ---------- | ------------- | ---------- | +| `no_args` | Function has named parameters but the docstring has no `Args:` section | Add an `Args:` section listing each parameter name and a short description | +| `no_returns` | Function has a non-`None` return annotation but the docstring has no `Returns:` section | Add a `Returns:` section describing what is returned and when | +| `no_yields` | Function returns `Generator` / `Iterator` but the docstring has no `Yields:` section | Add a `Yields:` section — generator functions use `Yields:`, not `Returns:` | +| `no_raises` | Function source contains `raise` but the docstring has no `Raises:` section | Add a `Raises:` section listing each exception type and the condition that triggers it | +| `missing_param_type` | `Args:` section exists but one or more parameters have no Python type annotation — the type column is absent from the generated API docs | Add a type annotation to each listed parameter in the function signature (e.g. `def f(x: int)`). Only fires when `no_args` is already satisfied; `*args`/`**kwargs` are excluded. | +| `missing_return_type` | `Returns:` section is documented but the function has no return type annotation — the return type is absent from the generated API docs | Add a return annotation to the function signature (e.g. `-> str`). Only fires when `no_returns` is already satisfied. | +| `param_type_mismatch` | A parameter's `Args:` entry states an explicit type (e.g. `x (int): …`) that does not match the Python annotation in the function signature | Align the docstring type with the annotation, or vice versa. The check normalises common equivalents (`Optional[X]` ↔ `X \| None`, `List` ↔ `list`, union ordering) before comparing, so only genuine disagreements are flagged. Only fires when both the docstring and the signature have an explicit type. **Note:** Python's AST normalises string literals to single quotes, so `Literal["a", "b"]` in source is read as `Literal['a', 'b']` — use single quotes in docstrings to match. | +| `return_type_mismatch` | The `Returns:` section has a type prefix (e.g. `Returns: \n str: …`) that does not match the Python return annotation | Align the docstring return type with the annotation, or vice versa. Same normalisation rules as `param_type_mismatch`. Only fires when both sides have an explicit type. | + +#### Class docstrings (Option C) + +| Check kind | What it means | How to fix | +| ---------- | ------------- | ---------- | +| `no_class_args` | Class `__init__` has typed parameters but the **class** docstring has no `Args:` section | Add `Args:` to the class docstring (not `__init__`) — see Option C convention above | +| `duplicate_init_args` | `Args:` appears in **both** the class docstring and the `__init__` docstring | Remove `Args:` from the `__init__` docstring; keep it on the class docstring only | +| `param_mismatch` | `Args:` section documents parameter names that do not exist in the actual signature | Remove or rename the phantom entries so they exactly match the real parameter names | + +#### TypedDict classes + +| Check kind | What it means | How to fix | +| ---------- | ------------- | ---------- | +| `typeddict_phantom` | `Attributes:` section documents field names not declared in the `TypedDict` | Remove the extra entries — every `Attributes:` entry must match a declared field | +| `typeddict_undocumented` | `TypedDict` has declared fields that are absent from the `Attributes:` section | Add the missing fields — every declared field must appear in `Attributes:` | + --- ## Local preview diff --git a/mellea/backends/tools.py b/mellea/backends/tools.py index 63f818111..0b83a2eac 100644 --- a/mellea/backends/tools.py +++ b/mellea/backends/tools.py @@ -70,7 +70,7 @@ def as_json_tool(self) -> dict[str, Any]: return self._as_json_tool.copy() @classmethod - def from_langchain(cls, tool: Any): + def from_langchain(cls, tool: Any) -> "MelleaTool": """Create a MelleaTool from a LangChain tool object. Args: @@ -117,7 +117,7 @@ def parameter_remapper(*args, **kwargs): ) from e @classmethod - def from_smolagents(cls, tool: Any): + def from_smolagents(cls, tool: Any) -> "MelleaTool": """Create a Tool from a HuggingFace smolagents tool object. Args: @@ -172,7 +172,7 @@ def tool_call(*args, **kwargs): ) from e @classmethod - def from_callable(cls, func: Callable, name: str | None = None): + def from_callable(cls, func: Callable, name: str | None = None) -> "MelleaTool": """Create a MelleaTool from a plain Python callable. Introspects the callable's signature and docstring to build an @@ -379,7 +379,7 @@ def json_extraction(text: str) -> Generator[dict, None, None]: index = text.find("{", index) -def find_func(d) -> tuple[str | None, Mapping | None]: +def find_func(d: object) -> tuple[str | None, Mapping | None]: """Find the first function in a json-like dictionary. Most llms output tool requests in the form ``...{"name": string, "arguments": {}}...`` diff --git a/mellea/core/sampling.py b/mellea/core/sampling.py index 0bf75badf..8dc253e1b 100644 --- a/mellea/core/sampling.py +++ b/mellea/core/sampling.py @@ -133,5 +133,5 @@ async def sample( tool_calls: True if tool calls should be used during this sampling strategy. Returns: - SamplingResult: A result object indicating the success or failure of the sampling process. + SamplingResult[S]: A result object indicating the success or failure of the sampling process. """ diff --git a/mellea/core/utils.py b/mellea/core/utils.py index dfeaafa5d..3823a83a6 100644 --- a/mellea/core/utils.py +++ b/mellea/core/utils.py @@ -67,7 +67,7 @@ class JsonFormatter(logging.Formatter): process ID, thread ID, and (if present) exception information. """ - def format(self, record): # type: ignore + def format(self, record: logging.LogRecord) -> dict: # type: ignore[override] """Formats a log record as a JSON-serialisable dictionary. Includes timestamp, level, message, module, function name, line number, @@ -75,6 +75,10 @@ def format(self, record): # type: ignore Args: record (logging.LogRecord): The log record to format. + + Returns: + dict: A dictionary containing timestamp, level, message, module, function, + line number, process/thread IDs, and optional exception info. """ log_record = { "timestamp": self.formatTime(record, self.datefmt), diff --git a/mellea/formatters/granite/base/util.py b/mellea/formatters/granite/base/util.py index 4d02ef9c6..4da107f0a 100644 --- a/mellea/formatters/granite/base/util.py +++ b/mellea/formatters/granite/base/util.py @@ -3,6 +3,8 @@ """Common utility functions for the library and tests.""" # Standard +from __future__ import annotations + import contextlib import itertools import json @@ -10,10 +12,14 @@ import os import re import uuid +from typing import TYPE_CHECKING # Third Party import pydantic +if TYPE_CHECKING: + from transformers import PreTrainedModel, PreTrainedTokenizerBase + # First Party from .types import ChatCompletionResponse, ChatCompletionResponseChoice @@ -98,7 +104,7 @@ def random_uuid() -> str: return str(uuid.uuid4()) -def load_transformers_lora(local_or_remote_path): +def load_transformers_lora(local_or_remote_path: str) -> tuple: """Load transformers LoRA model. AutoModelForCausalLM.from_pretrained() is supposed to auto-load base models if you @@ -136,7 +142,10 @@ def load_transformers_lora(local_or_remote_path): def chat_completion_request_to_transformers_inputs( - request, tokenizer=None, model=None, constrained_decoding_prefix=None + request: dict, + tokenizer: PreTrainedTokenizerBase | None = None, + model: PreTrainedModel | None = None, + constrained_decoding_prefix: str | None = None, ) -> tuple[dict, dict]: """Translate an OpenAI-style chat completion request. @@ -191,7 +200,7 @@ def chat_completion_request_to_transformers_inputs( ): tokenizer_input["documents"] = request["extra_body"]["documents"] - input_tokens = tokenizer.apply_chat_template(**tokenizer_input, return_tensors="pt") + input_tokens = tokenizer.apply_chat_template(**tokenizer_input, return_tensors="pt") # type: ignore[union-attr] # Transformers 5 switched the return type of apply_chat_template() from Tensor to # BatchEncoding. Adjust our behavior depending on which direction the currently @@ -208,7 +217,7 @@ def chat_completion_request_to_transformers_inputs( # generate() will fail with many different creative error messages if tokens aren't # on the right device. - input_tokens = input_tokens.to(model.device) + input_tokens = input_tokens.to(model.device) # type: ignore[union-attr] generate_input["input_tokens"] = input_tokens # The generate() method sometimes needs to know what is the integer ID @@ -216,9 +225,9 @@ def chat_completion_request_to_transformers_inputs( # isn't included in the serialized model. We get it from the tokenizer. # And of course some tokenizers don't set this parameter, in which case # we use the end of string token and hope for the best. - pad_token_id = tokenizer.pad_token_id + pad_token_id = tokenizer.pad_token_id # type: ignore[union-attr] if pad_token_id is None: - pad_token_id = tokenizer.eos_token_id + pad_token_id = tokenizer.eos_token_id # type: ignore[union-attr] if pad_token_id is None: # Raise an error here because the some branches of the generate # method won't complain about an invalid value of this parameter, @@ -229,7 +238,7 @@ def chat_completion_request_to_transformers_inputs( # Make sure you specify this parameter explicitly, or you will have # a bad time. - generate_input["eos_token_id"] = tokenizer.eos_token_id + generate_input["eos_token_id"] = tokenizer.eos_token_id # type: ignore[union-attr] other_input = {} @@ -316,7 +325,10 @@ def chat_completion_request_to_transformers_inputs( def generate_with_transformers( - tokenizer, model, generate_input: dict, other_input: dict + tokenizer: PreTrainedTokenizerBase, + model: PreTrainedModel, + generate_input: dict, + other_input: dict, ) -> ChatCompletionResponse: """Call Transformers generate and get usable results. @@ -348,7 +360,7 @@ def generate_with_transformers( generate_input = generate_input.copy() del generate_input["input_tokens"] - generate_result = model.generate(input_tokens, **generate_input) + generate_result = model.generate(input_tokens, **generate_input) # type: ignore[operator] # Result is a a 2D tensor of shape (num responses, prompt + max generated tokens) # containing tokens, plus a tuple of tensors of shape diff --git a/mellea/formatters/granite/granite3/granite32/input.py b/mellea/formatters/granite/granite3/granite32/input.py index a5333cf9f..939113973 100644 --- a/mellea/formatters/granite/granite3/granite32/input.py +++ b/mellea/formatters/granite/granite3/granite32/input.py @@ -13,6 +13,7 @@ NO_TOOLS_NO_DOCS_NO_THINKING_SYSTEM_MESSAGE_PART, ) from ...granite3.input import Granite3InputProcessor +from ...granite3.types import Granite3ChatCompletion # Local from .constants import ( @@ -221,12 +222,14 @@ def _remove_special_tokens(cls, text: str) -> str: return new_text @classmethod - def sanitize(cls, chat_completion, parts="all"): + def sanitize( + cls, chat_completion: Granite3ChatCompletion, parts: list[str] | str = "all" + ) -> Granite3ChatCompletion: """Sanitize the chat completion by removing Granite 3.2 special tokens. Args: chat_completion: The chat completion request to sanitize. - parts (str): Which parts of the chat completion to sanitize; + parts (list[str] | str): Which parts of the chat completion to sanitize; defaults to ``"all"``. Returns: diff --git a/mellea/formatters/granite/granite3/granite33/input.py b/mellea/formatters/granite/granite3/granite33/input.py index 2bb8f899d..f671cfd83 100644 --- a/mellea/formatters/granite/granite3/granite33/input.py +++ b/mellea/formatters/granite/granite3/granite33/input.py @@ -13,6 +13,7 @@ NO_TOOLS_NO_DOCS_NO_THINKING_SYSTEM_MESSAGE_PART, ) from ...granite3.input import Granite3InputProcessor +from ...granite3.types import Granite3ChatCompletion # Local from .constants import ( @@ -141,12 +142,14 @@ def _remove_special_tokens(cls, text: str) -> str: return new_text @classmethod - def sanitize(cls, chat_completion, parts="all"): + def sanitize( + cls, chat_completion: Granite3ChatCompletion, parts: list[str] | str = "all" + ) -> Granite3ChatCompletion: """Sanitize the chat completion by removing Granite 3.3 special tokens. Args: chat_completion: The chat completion request to sanitize. - parts (str): Which parts of the chat completion to sanitize; + parts (list[str] | str): Which parts of the chat completion to sanitize; defaults to ``"all"``. Returns: diff --git a/mellea/formatters/granite/intrinsics/input.py b/mellea/formatters/granite/intrinsics/input.py index bfa064ae2..cd0a3a5e0 100644 --- a/mellea/formatters/granite/intrinsics/input.py +++ b/mellea/formatters/granite/intrinsics/input.py @@ -33,7 +33,7 @@ def _needs_logprobs(transformations: list | None) -> bool: return any(t["type"] == "likelihood" for t in transformations) -def sentence_delimiter(tag, sentence_num) -> str: +def sentence_delimiter(tag: str, sentence_num: int) -> str: """Return a tag string that identifies the beginning of the indicated sentence. Args: @@ -56,14 +56,16 @@ def mark_sentence_boundaries( ``<[prefix][number]>`` at the location of each sentence boundary. - :param split_strings: Input string(s), pre-split into sentences - :param tag_prefix: String to place before the number part of each tagged - sentence boundary. - :param index: Starting index for sentence numbering. Defaults to 0. Pass a - non-zero value to continue numbering from a prior call. + Args: + split_strings: Input string(s), pre-split into sentences. + tag_prefix: String to place before the number part of each tagged + sentence boundary. + index: Starting index for sentence numbering. Defaults to 0. Pass a + non-zero value to continue numbering from a prior call. - :returns: Tuple of (list of input strings with all sentence boundaries marked, - next available index after the last sentence). + Returns: + tuple[list[str], int]: Tuple of (list of input strings with all sentence + boundaries marked, next available index after the last sentence). """ result: list[str] = [] for sentences in split_strings: diff --git a/mellea/helpers/openai_compatible_helpers.py b/mellea/helpers/openai_compatible_helpers.py index 739677368..4858ea366 100644 --- a/mellea/helpers/openai_compatible_helpers.py +++ b/mellea/helpers/openai_compatible_helpers.py @@ -140,7 +140,7 @@ def chat_completion_delta_merge( return merged -def message_to_openai_message(msg: Message): +def message_to_openai_message(msg: Message) -> dict: """Serialise a Mellea ``Message`` to the format required by OpenAI-compatible API providers. Args: diff --git a/mellea/stdlib/components/mify.py b/mellea/stdlib/components/mify.py index 26aa6da3a..ec8df5c6d 100644 --- a/mellea/stdlib/components/mify.py +++ b/mellea/stdlib/components/mify.py @@ -298,7 +298,7 @@ def mify( ) -> T: ... # Overloads for @mify and mify(obj|cls) -def mify(*args, **kwargs): # noqa: D417 +def mify(*args, **kwargs) -> object: # noqa: D417 """M-ify an object or class. Allows the object (or instances of the class) to be used in m sessions and with m functions. diff --git a/mellea/stdlib/components/mobject.py b/mellea/stdlib/components/mobject.py index 38719b213..e706b80d5 100644 --- a/mellea/stdlib/components/mobject.py +++ b/mellea/stdlib/components/mobject.py @@ -306,7 +306,7 @@ def format_for_llm(self) -> TemplateRepresentation | str: return TemplateRepresentation( args={"content": self.content_as_string()}, obj=self, - tools=tools, + tools=tools, # type: ignore[arg-type] fields=[], template_order=["*", "MObject"], ) diff --git a/mellea/stdlib/functional.py b/mellea/stdlib/functional.py index 16290c229..906d1c6ae 100644 --- a/mellea/stdlib/functional.py +++ b/mellea/stdlib/functional.py @@ -349,7 +349,7 @@ def query( tool_calls: If true, the model may make tool calls. Defaults to False. Returns: - ModelOutputThunk: The result of the query as processed by the backend. + tuple[ModelOutputThunk, Context]: The result of the query and updated context. """ if not isinstance(obj, MObjectProtocol): obj = mify(obj) @@ -983,7 +983,7 @@ async def aquery( tool_calls: If true, the model may make tool calls. Defaults to False. Returns: - ModelOutputThunk: The result of the query as processed by the backend. + tuple[ModelOutputThunk, Context]: The result of the query and updated context. """ if not isinstance(obj, MObjectProtocol): obj = mify(obj) @@ -1024,9 +1024,9 @@ async def atransform( model_options: Model options to pass to the backend. Returns: - ModelOutputThunk|Any: The result of the transformation as processed by the backend. If no tools were called, - the return type will be always be ModelOutputThunk. If a tool was called, the return type will be the return type - of the function called, usually the type of the object passed in. + tuple[ModelOutputThunk | Any, Context]: The result of the transformation and updated context. + If no tools were called, the first element will always be ModelOutputThunk. If a tool was called, + the first element will be the return type of the function called, usually the type of the object passed in. """ if not isinstance(obj, MObjectProtocol): obj = mify(obj) diff --git a/mellea/stdlib/sampling/base.py b/mellea/stdlib/sampling/base.py index 59c044dd0..a5295d084 100644 --- a/mellea/stdlib/sampling/base.py +++ b/mellea/stdlib/sampling/base.py @@ -136,7 +136,7 @@ async def sample( show_progress: if true, a tqdm progress bar is used. Otherwise, messages will still be sent to flog. Returns: - SamplingResult: A result object indicating the success or failure of the sampling process. + SamplingResult[S]: A result object indicating the success or failure of the sampling process. Raises: AssertionError: Asserts that all required components (repair, select_from_failure, validate, and generate) are provided before proceeding with the sampling. @@ -491,7 +491,7 @@ def select_from_failure( sampled_actions: list[Component], sampled_results: list[ModelOutputThunk], sampled_val: list[list[tuple[Requirement, ValidationResult]]], - ): + ) -> int: """Always returns the last index. The last message from the model will always be returned if all results are failures. Args: diff --git a/mellea/stdlib/session.py b/mellea/stdlib/session.py index acdd42c67..4fcfa783e 100644 --- a/mellea/stdlib/session.py +++ b/mellea/stdlib/session.py @@ -339,7 +339,7 @@ def __copy__(self): return new - def clone(self): + def clone(self) -> MelleaSession: """Useful for running multiple generation requests while keeping the context at a given point in time. Returns: diff --git a/tooling/docs-autogen/audit_coverage.py b/tooling/docs-autogen/audit_coverage.py index eb9506c9f..d53adb034 100755 --- a/tooling/docs-autogen/audit_coverage.py +++ b/tooling/docs-autogen/audit_coverage.py @@ -11,6 +11,7 @@ """ import argparse +import ast import json import os import re @@ -24,6 +25,54 @@ sys.exit(1) +# Modules that are confirmed internal but whose parent __init__.py imports nothing +# (making the import-based check indeterminate). Must stay in sync with generate-ast.py. +_CONFIRMED_INTERNAL_MODULES: frozenset[str] = frozenset( + {"json_util", "backend_instrumentation"} +) + + +def _imported_submodule_names(init_path: Path) -> set[str] | None: + """Return submodule names imported via relative imports in init_path. + + Returns None if the file cannot be parsed (treat as indeterminate). + Returns an empty set if the file is readable but has no relative imports. + """ + try: + tree = ast.parse(init_path.read_text()) + except Exception: + return None + result: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.level > 0 and node.module: + result.add(node.module.split(".")[0]) + return result + + +def _is_public_submodule(submodule_name: str, submodule_filepath: Path | None) -> bool: + """Return True if a submodule should be treated as part of the public API. + + Mirrors the filter in generate-ast.py's remove_internal_modules(). + """ + if submodule_name in _CONFIRMED_INTERNAL_MODULES: + return False + if submodule_filepath is None: + return True # conservative: keep if we can't determine + # Griffe gives filepath as: + # - module file: .../pkg/submodule.py → parent init is .../pkg/__init__.py + # - package: .../pkg/subpkg/__init__.py → parent init is .../pkg/__init__.py + if submodule_filepath.name == "__init__.py": + parent_init = submodule_filepath.parent.parent / "__init__.py" + else: + parent_init = submodule_filepath.parent / "__init__.py" + if not parent_init.exists(): + return True # conservative + subs = _imported_submodule_names(parent_init) + if subs is None or not subs: + return True # conservative: can't determine, keep + return submodule_name in subs + + def _load_package(source_dir: Path, package_name: str): """Load a package with Griffe. Returns the package object or None on failure.""" try: @@ -84,9 +133,13 @@ def walk_module(module, module_path: str): # Recursively walk submodules (but skip internal ones) if hasattr(module, "modules"): for submodule_name, submodule in module.modules.items(): - if not submodule_name.startswith("_"): - submodule_path = f"{module_path}.{submodule_name}" - walk_module(submodule, submodule_path) + if submodule_name.startswith("_"): + continue + fp = getattr(submodule, "filepath", None) + if not _is_public_submodule(submodule_name, fp): + continue + submodule_path = f"{module_path}.{submodule_name}" + walk_module(submodule, submodule_path) # Walk through all top-level modules for module_name, module in package.modules.items(): @@ -102,6 +155,7 @@ def walk_module(module, module_path: str): # --------------------------------------------------------------------------- _ARGS_RE = re.compile(r"^\s*(Args|Arguments|Parameters)\s*:", re.MULTILINE) +_TYPEDDICT_BASES = re.compile(r"\bTypedDict\b") _RETURNS_RE = re.compile(r"^\s*Returns\s*:", re.MULTILINE) _YIELDS_RE = re.compile(r"^\s*Yields\s*:", re.MULTILINE) _RAISES_RE = re.compile(r"^\s*Raises\s*:", re.MULTILINE) @@ -110,9 +164,106 @@ def walk_module(module, module_path: str): # The colon must be followed by whitespace to avoid matching Sphinx cross-reference # continuation lines like " through :func:`...`". _ARGS_ENTRY_RE = re.compile(r"^\s{4,}(\w+)\s*(?:\([^)]*\))?\s*:\s", re.MULTILINE) +# Matches an indented param entry with an EXPLICIT type: " param_name (SomeType):" +_ARGS_ENTRY_WITH_TYPE_RE = re.compile(r"^\s{4,}(\w+)\s*\(([^)]+)\)\s*:\s", re.MULTILINE) +# Matches the type prefix on the first content line of a Returns: or Yields: section. +# Format: " SomeType: description" — only matches unambiguous type-like prefixes +# (word chars, brackets, pipes, commas, dots; no sentence-starting punctuation). +_SECTION_TYPE_LINE_RE = re.compile( + r"^\s{4,}([\w][\w\[\] |,.*]*(?:\[[\w\[\] |,.*]+\])?):\s", re.MULTILINE +) # Return annotations that need no Returns section _TRIVIAL_RETURNS = {"None", "NoReturn", "Never", "never", ""} +# Typing-module aliases → modern builtin equivalents for normalization. +_TYPING_ALIAS_RE = re.compile( + r"\b(List|Dict|Tuple|Set|FrozenSet|Type|Sequence|Mapping|Iterable|Iterator" + r"|Generator|AsyncGenerator|AsyncIterator|Awaitable|Callable)\b" +) +_TYPING_ALIAS_MAP = { + "List": "list", + "Dict": "dict", + "Tuple": "tuple", + "Set": "set", + "FrozenSet": "frozenset", + "Type": "type", +} + + +def _normalize_type(t: str) -> str: + """Normalize a type string for loose equality comparison. + + Handles the most common differences between docstring-stated types and + Python annotations: + - typing-module aliases: List → list, Dict → dict, etc. + - Optional[X] → X | None + - Union[A, B] → A | B + - Union/pipe ordering: components are sorted so str|None == None|str + - incidental whitespace + + Known simplifications (will NOT flag as mismatch): + - Deeply nested Union/Optional combinations may not fully flatten + - typing.X (qualified) vs X (unqualified) are NOT normalised + - Callable[[A], B] argument ordering is not normalised + """ + t = t.strip() + # Strip typing. prefix + t = re.sub(r"\btyping\.", "", t) + # typing → builtin aliases + t = _TYPING_ALIAS_RE.sub(lambda m: _TYPING_ALIAS_MAP.get(m.group(0), m.group(0)), t) + # Optional[X] → X | None (non-nested only — avoids false positives on complex nesting) + t = re.sub(r"\bOptional\[([^\[\]]*)\]", r"\1 | None", t) + # Union[A, B, ...] → A | B | ... (non-nested) + t = re.sub( + r"\bUnion\[([^\[\]]+)\]", + lambda m: " | ".join(x.strip() for x in m.group(1).split(",")), + t, + ) + # normalise whitespace around type operators + t = re.sub(r"\s*\|\s*", " | ", t) + t = re.sub(r"\s*,\s*", ", ", t) + t = re.sub(r"\s*\[\s*", "[", t) + t = re.sub(r"\s*\]\s*", "]", t) + t = t.strip() + # Sort pipe-union components so str|None == None|str. + # Only apply at the top level (not inside brackets) to avoid reordering + # tuple/generic args, which would cause false positives. + if " | " in t and "[" not in t: + t = " | ".join(sorted(t.split(" | "))) + return t + + +def _types_match(a: str, b: str) -> bool: + """Return True if two type strings are equivalent after normalisation. + + Returns True (no mismatch reported) when normalisation is uncertain: + if either side still contains unexpanded Optional[...] or Union[...] after + normalisation (indicating a nested generic we couldn't fully expand), we + suppress the comparison to avoid false positives. + """ + na, nb = _normalize_type(a), _normalize_type(b) + # If either side has unexpanded Optional/Union (e.g. Optional[list[str]]), + # skip — we can't reliably compare and must not emit a false positive. + if re.search(r"\b(Optional|Union)\[", na) or re.search(r"\b(Optional|Union)\[", nb): + return True + return na == nb + + +def _extract_section_type(doc_text: str, section_re: re.Pattern[str]) -> str | None: + """Extract the type prefix from the first content line of a docstring section. + + Returns the raw type string if found, or None if the section is absent or + the first content line has no recognisable type prefix. + """ + m = section_re.search(doc_text) + if not m: + return None + # Grab text after the section heading up to the next blank line. + after = doc_text[m.end() :] + type_match = _SECTION_TYPE_LINE_RE.search(after.split("\n\n")[0]) + return type_match.group(1).strip() if type_match else None + + # Return annotations that indicate a generator (should use Yields, not Returns) _GENERATOR_RETURN_PATTERNS = re.compile( r"Generator|Iterator|AsyncGenerator|AsyncIterator" @@ -120,8 +271,21 @@ def walk_module(module, module_path: str): def _check_member(member, full_path: str, short_threshold: int) -> list[dict]: - """Return quality issues for a single class or function member.""" + """Return quality issues for a single class or function member. + + Each returned dict has keys: ``path``, ``kind``, ``detail``, ``file``, ``line``. + ``file`` is the absolute source path; ``line`` is the 1-based line number of the + symbol definition (the ``def`` / ``class`` keyword line). + """ issues: list[dict] = [] + _raw_file = getattr(member, "filepath", None) + _abs_file = str(_raw_file) if _raw_file is not None else "" + # Convert to a repo-relative path for readability and GHA annotations. + try: + _rel_file = str(Path(_abs_file).relative_to(Path.cwd())) if _abs_file else "" + except ValueError: + _rel_file = _abs_file + _line = getattr(member, "lineno", None) or 0 doc = getattr(member, "docstring", None) doc_text = doc.value.strip() if (doc and doc.value) else "" @@ -193,6 +357,65 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]: } ) + # Missing type annotation: Args: section exists but parameters lack Python + # annotations — the type column will be absent from the generated API docs. + # Only checked here (when Args: exists) to avoid duplicating no_args reports. + # Variadic *args/**kwargs are excluded via ParameterKind (same filter as concrete). + unannotated = [ + p.name + for p in (params or []) + if p.name not in ("self", "cls") + and getattr(p, "kind", None) not in _variadic_kinds + and p.annotation is None + ] + if unannotated: + sample = ", ".join(unannotated[:3]) + ( + "..." if len(unannotated) > 3 else "" + ) + issues.append( + { + "path": full_path, + "kind": "missing_param_type", + "detail": f"params [{sample}] have no Python type annotation — type absent from API docs", + } + ) + + # Param type consistency: docstring states a type but it differs from + # the signature annotation. Only fires when BOTH sides have an explicit + # type — absence is already handled by missing_param_type / no_args. + # Uses _types_match() which normalises aliases, Optional/Union, and pipe + # ordering. Comparisons involving nested generics (e.g. Optional[list[X]]) + # that cannot be fully expanded are silently skipped to avoid false positives. + if args_block: + for p in params or []: + if p.name in ("self", "cls"): + continue + if getattr(p, "kind", None) in _variadic_kinds: + continue + if p.annotation is None: + continue # missing_param_type already handles this + # Find the docstring type for this param + param_type_match = re.search( + rf"^\s{{4,}}{re.escape(p.name)}\s*\(([^)]+)\)\s*:\s", + args_block.group(1), + re.MULTILINE, + ) + if param_type_match: + if not _types_match( + param_type_match.group(1), str(p.annotation) + ): + issues.append( + { + "path": full_path, + "kind": "param_type_mismatch", + "detail": ( + f"param '{p.name}': docstring says" + f" '{param_type_match.group(1).strip()}'" + f" but annotation is '{p.annotation}'" + ), + } + ) + # Returns/Yields section check: only flag when there is an explicit non-trivial annotation. # Generator return types (Generator, Iterator, etc.) require Yields:, not Returns:. returns = getattr(member, "returns", None) @@ -215,6 +438,38 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]: "detail": f"return type {ret_str!r} has no Returns section", } ) + elif not is_generator: + # Return type consistency: docstring Returns: states a type prefix but + # it differs from the signature annotation. + # Only fires when BOTH sides have an explicit type — missing_return_type + # and no_returns already handle one-sided absence. + # See _normalize_type() docstring for known simplifications that may + # suppress edge cases (e.g. deeply nested Union, qualified typing.X). + doc_ret_type = _extract_section_type(doc_text, _RETURNS_RE) + if doc_ret_type: + if not _types_match(doc_ret_type, ret_str): + issues.append( + { + "path": full_path, + "kind": "return_type_mismatch", + "detail": ( + f"Returns: says '{doc_ret_type}'" + f" but annotation is '{ret_str}'" + ), + } + ) + + # Missing return annotation: Returns: section documented but no Python annotation. + # Naturally non-overlapping with no_returns (which fires when annotation exists + # but Returns: section is absent). When both are missing, no_returns fires first. + if not ret_str and _RETURNS_RE.search(doc_text): + issues.append( + { + "path": full_path, + "kind": "missing_return_type", + "detail": "Returns: section documented but no return type annotation — type absent from API docs", + } + ) # Raises section check: only flag when the source contains explicit raise statements source = getattr(member, "source", None) or "" @@ -274,6 +529,49 @@ def _check_member(member, full_path: str, short_threshold: int) -> list[dict]: } ) + # TypedDict field mismatch check. + # Unlike regular classes (where Attributes: is optional under Option C), + # TypedDict fields *are* the entire public contract. When an Attributes: + # section exists, every entry must match an actual declared field and every + # declared field must appear — stale or missing entries are always a bug. + is_typeddict = any( + _TYPEDDICT_BASES.search(str(base)) for base in getattr(member, "bases", []) + ) + if is_typeddict and _ATTRIBUTES_RE.search(doc_text): + attrs_block = re.search( + r"Attributes\s*:(.*?)(?:\n\s*\n|\Z)", doc_text, re.DOTALL + ) + if attrs_block: + doc_field_names = set(_ARGS_ENTRY_RE.findall(attrs_block.group(1))) + actual_fields = { + name + for name, m in member.members.items() + if not name.startswith("_") and getattr(m, "is_attribute", False) + } + phantom = doc_field_names - actual_fields + if phantom: + issues.append( + { + "path": full_path, + "kind": "typeddict_phantom", + "detail": f"Attributes: documents {sorted(phantom)} not declared in TypedDict", + } + ) + undocumented = actual_fields - doc_field_names + if undocumented: + issues.append( + { + "path": full_path, + "kind": "typeddict_undocumented", + "detail": f"TypedDict fields {sorted(undocumented)} missing from Attributes: section", + } + ) + + # Inject file location into every issue so callers can display and annotate. + for issue in issues: + issue["file"] = _rel_file + issue["line"] = _line + return issues @@ -296,11 +594,21 @@ def audit_docstring_quality( - no_class_args: class whose __init__ has typed params but no Args section on the class - duplicate_init_args: Args: present in both class docstring and __init__ (Option C violation) - param_mismatch: Args section documents names absent from the real signature - - Note: Attributes: sections are intentionally not enforced. Under the Option C - convention, Attributes: is only used when stored values differ in type or - behaviour from the constructor inputs (e.g. type transforms, computed values, - class constants). Pure-echo entries that repeat Args: verbatim are omitted. + - typeddict_phantom: TypedDict Attributes: section documents fields not declared in the class + - typeddict_undocumented: TypedDict has declared fields absent from its Attributes: section + - missing_param_type: Args: section exists but one or more parameters lack Python type + annotations — the type column will be absent from the generated API docs. Only checked + when no no_args issue exists (avoids duplicate reports). + - missing_return_type: Returns: section exists but the function has no return type + annotation — the return type will be absent from the generated API docs. Only checked + when no no_returns issue exists (avoids duplicate reports). + + Note: Attributes: sections are intentionally not enforced for regular classes. Under + the Option C convention, Attributes: is only used when stored values differ in type or + behaviour from the constructor inputs (e.g. type transforms, computed values, class + constants). Pure-echo entries that repeat Args: verbatim are omitted. TypedDicts are + a carve-out: their fields are the entire public contract, so when an Attributes: + section is present it must exactly match the declared fields. Only symbols (and methods whose parent class) present in `documented` are checked when that set is provided — ensuring the audit is scoped to what is @@ -376,6 +684,82 @@ def walk_module(module, module_path: str) -> None: _IN_GHA = os.environ.get("GITHUB_ACTIONS") == "true" +# Base URLs for documentation references emitted in fix hints. +# These point to upstream main; anchors for the CI checks reference section +# will resolve once the PR introducing them is merged. +_CONTRIB_DOCS_URL = ( + "https://github.com/generative-computing/mellea/blob/main" + "/docs/docs/guide/CONTRIBUTING.md" +) +_COVERAGE_DOCS_URL = ( + "https://github.com/generative-computing/mellea/blob/main" + "/CONTRIBUTING.md#validating-docstrings" +) + +# Per-kind fix hints: (one-line fix text, CONTRIBUTING.md anchor) +_KIND_FIX_HINTS: dict[str, tuple[str, str]] = { + "missing": ( + "Add a Google-style summary sentence.", + f"{_CONTRIB_DOCS_URL}#missing-or-short-docstrings", + ), + "short": ( + "Expand the summary — aim for at least 5 meaningful words.", + f"{_CONTRIB_DOCS_URL}#missing-or-short-docstrings", + ), + "no_args": ( + "Add an Args: section listing each parameter with a description.", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "no_returns": ( + "Add a Returns: section describing the return value.", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "no_yields": ( + "Add a Yields: section (generator functions use Yields:, not Returns:).", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "no_raises": ( + "Add a Raises: section listing each exception and the condition that triggers it.", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "no_class_args": ( + "Add Args: to the class docstring (not __init__) — Option C convention.", + f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c", + ), + "duplicate_init_args": ( + "Remove Args: from __init__ docstring; place it on the class docstring only (Option C).", + f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c", + ), + "param_mismatch": ( + "Remove or rename phantom entries to match the actual parameter names.", + f"{_CONTRIB_DOCS_URL}#class-docstrings-option-c", + ), + "typeddict_phantom": ( + "Remove documented fields that are not declared in the TypedDict.", + f"{_CONTRIB_DOCS_URL}#typeddict-classes", + ), + "typeddict_undocumented": ( + "Add the missing fields to the Attributes: section.", + f"{_CONTRIB_DOCS_URL}#typeddict-classes", + ), + "missing_param_type": ( + "Add a Python type annotation to each listed parameter in the function signature.", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "missing_return_type": ( + "Add a return type annotation (-> SomeType) to the function signature.", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "param_type_mismatch": ( + "Align the type in the Args: entry with the signature annotation (or vice versa).", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), + "return_type_mismatch": ( + "Align the type prefix in Returns: with the signature annotation (or vice versa).", + f"{_CONTRIB_DOCS_URL}#args-returns-yields-and-raises", + ), +} + def _gha_cmd(level: str, title: str, message: str) -> None: """Emit a GitHub Actions workflow command annotation.""" @@ -385,8 +769,57 @@ def _gha_cmd(level: str, title: str, message: str) -> None: print(f"::{level} title={title}::{message}") -def _print_quality_report(issues: list[dict]) -> None: - """Print a grouped quality report to stdout.""" +def _gha_file_annotation( + level: str, title: str, message: str, file: str, line: int +) -> None: + """Emit a GitHub Actions annotation anchored to a specific file and line. + + When ``file`` and ``line`` are provided the annotation appears inline in the + PR diff view, making it immediately obvious which symbol needs fixing. + + Args: + level: Annotation level — ``"error"``, ``"warning"``, or ``"notice"``. + title: Short label shown in bold in the annotation. + message: Body text for the annotation. + file: Repo-relative file path (e.g. ``mellea/core/base.py``). + line: 1-based line number of the symbol definition. + """ + for s in (message, title, file): + s = s.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + if file and line: + print(f"::{level} file={file},line={line},title={title}::{message}") + else: + print(f"::{level} title={title}::{message}") + + +# GitHub Actions silently drops inline diff annotations beyond ~10 per step. +# This per-kind cap ensures every category gets at least one visible annotation +# rather than early kinds consuming the entire budget. +_GHA_ANNOTATIONS_PER_KIND = 10 + + +def _print_quality_report(issues: list[dict], *, fail_on_quality: bool = False) -> None: + """Print a grouped quality report to stdout and emit GHA annotations. + + When running in GitHub Actions (``GITHUB_ACTIONS=true``), each issue is + also emitted as a file-level annotation (``::error`` or ``::warning``) + anchored to the source line, so they appear inline in the PR diff. + + GitHub Actions caps inline diff annotations at roughly 10 per step; issues + beyond that cap are silently dropped from the diff view (they still appear + in the full job log and in the JSON artifact). To ensure every check + category gets at least one visible annotation, this function emits at most + :data:`_GHA_ANNOTATIONS_PER_KIND` annotations per kind and prints a + ``"... and N more"`` notice for the remainder. The complete list is always + written to the job log regardless of the GHA cap. + + Args: + issues: List of issue dicts from :func:`audit_docstring_quality`. + Each dict must have keys: ``path``, ``kind``, ``detail``, + ``file``, ``line``. + fail_on_quality: When ``True`` annotations are emitted as ``error`` + (red); otherwise as ``warning`` (yellow). + """ by_kind: dict[str, list[dict]] = {} for issue in issues: by_kind.setdefault(issue["kind"], []).append(issue) @@ -401,8 +834,16 @@ def _print_quality_report(issues: list[dict]) -> None: "no_class_args": "Missing class Args section", "duplicate_init_args": "Duplicate Args: in class + __init__ (Option C violation)", "param_mismatch": "Param name mismatches (documented but not in signature)", + "typeddict_phantom": "TypedDict phantom fields (documented but not declared)", + "typeddict_undocumented": "TypedDict undocumented fields (declared but missing from Attributes:)", + "missing_param_type": "Missing parameter type annotations (type absent from API docs)", + "missing_return_type": "Missing return type annotations (type absent from API docs)", + "param_type_mismatch": "Param type mismatch (docstring vs annotation)", + "return_type_mismatch": "Return type mismatch (docstring vs annotation)", } + annotation_level = "error" if fail_on_quality else "warning" + total = len(issues) print(f"\n{'=' * 60}") print("Docstring Quality Report") @@ -419,17 +860,48 @@ def _print_quality_report(issues: list[dict]) -> None: "no_class_args", "duplicate_init_args", "param_mismatch", + "typeddict_phantom", + "typeddict_undocumented", + "missing_param_type", + "missing_return_type", + "param_type_mismatch", + "return_type_mismatch", ): items = by_kind.get(kind, []) if not items: continue label = kind_labels.get(kind, kind) + fix_hint, ref_url = _KIND_FIX_HINTS.get(kind, ("", "")) print(f"\n{'─' * 50}") print(f" {label} ({len(items)})") + if fix_hint: + print(f" Fix: {fix_hint}") + if ref_url: + print(f" Ref: {ref_url}") print(f"{'─' * 50}") - for item in sorted(items, key=lambda x: x["path"]): - print(f" {item['path']}") + sorted_items = sorted(items, key=lambda x: x["path"]) + gha_emitted = 0 + for item in sorted_items: + file_ref = item.get("file", "") + line_num = item.get("line", 0) + loc = f" [{file_ref}:{line_num}]" if file_ref and line_num else "" + print(f" {item['path']}{loc}") print(f" {item['detail']}") + if _IN_GHA and file_ref and gha_emitted < _GHA_ANNOTATIONS_PER_KIND: + _gha_file_annotation( + annotation_level, + f"[{kind}] {item['path']}", + item["detail"], + file_ref, + line_num, + ) + gha_emitted += 1 + if _IN_GHA and len(sorted_items) > _GHA_ANNOTATIONS_PER_KIND: + remainder = len(sorted_items) - _GHA_ANNOTATIONS_PER_KIND + print( + f" (GHA annotations capped at {_GHA_ANNOTATIONS_PER_KIND} per kind — " + f"{remainder} more {kind!r} issue(s) in job log and JSON artifact)" + ) def audit_nav_orphans(docs_dir: Path, source_dir: Path) -> list[str]: @@ -445,18 +917,33 @@ def audit_nav_orphans(docs_dir: Path, source_dir: Path) -> list[str]: Returns: Sorted list of orphaned module paths relative to docs_dir (no extension) """ - mint_json = source_dir / "docs" / "mint.json" + # Support both Mintlify v1 (mint.json at docs/mint.json) and + # v2 (docs.json at docs/docs/docs.json). docs.json uses plain string + # entries in "pages" arrays; mint.json uses {"page": "..."} dicts. + nav_config: Path | None = None + for candidate in ( + source_dir / "docs" / "docs" / "docs.json", + source_dir / "docs" / "mint.json", + ): + if candidate.exists(): + nav_config = candidate + break mdx_files: set[str] = set() for mdx_file in docs_dir.rglob("*.mdx"): mdx_files.add(str(mdx_file.relative_to(docs_dir).with_suffix(""))) nav_refs: set[str] = set() - if mint_json.exists(): - config = json.loads(mint_json.read_text()) + if nav_config is not None: + config = json.loads(nav_config.read_text()) def _extract(obj: object) -> None: - if isinstance(obj, dict): + if isinstance(obj, str): + # docs.json / mint.json plain string page entry + if obj.startswith("api/"): + nav_refs.add(obj[len("api/") :]) + elif isinstance(obj, dict): + # mint.json {"page": "api/..."} dict entry if "page" in obj: page = obj["page"] if isinstance(page, str) and page.startswith("api/"): @@ -654,11 +1141,34 @@ def main(): print(f"CLI commands: {len(report['cli_commands'])}") if report["missing_symbols"]: + total_missing = sum(len(s) for s in report["missing_symbols"].values()) + print(f"\n{'─' * 60}") print( - f"\n⚠️ Missing documentation for {len(report['missing_symbols'])} modules:" + f" Missing API docs — {total_missing} symbol(s) across " + f"{len(report['missing_symbols'])} module(s)" ) + print( + " Fix: Run the doc generation pipeline to produce MDX for new symbols,\n" + " then add entries to docs/docs/docs.json navigation.\n" + " uv run python tooling/docs-autogen/generate-ast.py" + ) + print(f" Ref: {_COVERAGE_DOCS_URL}") + print(f"{'─' * 60}") for module, symbols in sorted(report["missing_symbols"].items()): - print(f" {module}: {', '.join(symbols)}") + print(f" {module}") + for sym in sorted(symbols): + print(f" {sym}") + if _IN_GHA: + _gha_cmd( + "error" + if report["coverage_percentage"] < args.threshold + else "warning", + "API Coverage", + f"{total_missing} symbol(s) undocumented in " + f"{len(report['missing_symbols'])} module(s) — " + f"coverage {report['coverage_percentage']}% " + f"(threshold {args.threshold}%)", + ) # Quality audit — scoped to documented (API reference) symbols only quality_issues: list[dict] = [] @@ -677,7 +1187,7 @@ def main(): documented=documented, ) ) - _print_quality_report(quality_issues) + _print_quality_report(quality_issues, fail_on_quality=args.fail_on_quality) # Nav orphan check — MDX files not referenced in mint.json navigation orphans: list[str] = [] @@ -724,9 +1234,7 @@ def main(): if _IN_GHA: if not args.quality: - _gha_cmd( - "notice", "Docstring quality", "skipped (pass --quality to enable)" - ) + pass # quality step is a separate CI job; no annotation needed here elif quality_issues: _gha_cmd( "error" if args.fail_on_quality else "warning", diff --git a/tooling/docs-autogen/generate-ast.py b/tooling/docs-autogen/generate-ast.py index 5687eee04..7384936b2 100644 --- a/tooling/docs-autogen/generate-ast.py +++ b/tooling/docs-autogen/generate-ast.py @@ -20,6 +20,7 @@ """ import argparse +import ast import glob import json import os @@ -390,6 +391,141 @@ def remove_empty_mdx_files() -> None: print(f"✅ Removed {removed} empty files.", flush=True) +# ----------------------------- +# Public API filter +# ----------------------------- + + +def _imported_submodules(init_path: Path) -> set[str] | None: + """Return submodule names explicitly imported via relative imports in ``__init__.py``. + + Uses AST parsing (no import) so it is safe to call during doc generation. + + A ``__init__.py`` that does ``from .ollama import OllamaModelBackend`` imports + from the ``ollama`` submodule, making it part of the package's public surface + even if the module name does not appear in ``__all__``. + + Args: + init_path: Path to the ``__init__.py`` file to parse. + + Returns: + Set of submodule names imported via ``from .name import ...``, or ``None`` + when the file cannot be parsed. An empty set means the ``__init__.py`` + exists but contains no relative imports — caller should treat the module + as indeterminate and keep it. + """ + try: + tree = ast.parse(init_path.read_text(encoding="utf-8")) + except (SyntaxError, OSError): + return None + imported: set[str] = set() + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.level > 0 and node.module: + # from .submodule import X → record "submodule" + imported.add(node.module.split(".")[0]) + return imported + + +# Modules that are confirmed internal implementation details and must never +# appear in the public API reference. These are a fallback for cases where +# the parent __init__.py does not import from any submodule (so the +# import-based filter cannot determine intent automatically). +# +# Long-term fix: rename these files with a leading ``_`` (e.g. ``_json_util.py``) +# so the doc generator skips them automatically per Python convention. +# Tracked in: TODO open issue +_CONFIRMED_INTERNAL_MODULES: frozenset[str] = frozenset( + { + # Granite intrinsics internal JSON parser — not part of the public API + "mellea/formatters/granite/intrinsics/json_util", + # Backend telemetry wiring — internal instrumentation helpers + "mellea/telemetry/backend_instrumentation", + } +) + + +def remove_internal_modules(source_root: Path) -> None: + """Remove MDX files for submodules that are not part of the package's public API. + + ``mdxify --all`` generates documentation for every reachable submodule, + regardless of whether the package author intends it to be public. This step + enforces the Python convention: a submodule is public when its parent + ``__init__.py`` explicitly imports from it (``from .name import ...``). + Submodules that are never imported in the parent ``__init__.py`` are internal + implementation details and are removed from the staging output. + + **Conservative rules:** + + * If the parent ``__init__.py`` cannot be parsed → keep (safe default). + * If the parent ``__init__.py`` contains *no* relative imports → keep + (cannot determine intent; typical for packages where submodules are + accessed via their full dotted path rather than re-exported). + * Package index files (``__init__.mdx`` renamed to ``.mdx``) are + identified by ``stem == parent_dir_name`` and are never filtered. + + A hardcoded ``_CONFIRMED_INTERNAL_MODULES`` set catches known internals in + packages where the parent ``__init__.py`` imports nothing (indeterminate + case). These should eventually be renamed with a ``_`` prefix in source. + + Args: + source_root: Root of the source repo (contains ``mellea/`` and ``cli/``). + """ + print("-" * 30, flush=True) + print( + "🔒 Filtering internal modules (not imported by parent __init__)...", flush=True + ) + + mdx_files = list(STAGING_API_DIR.rglob("*.mdx")) + removed = 0 + + for mdx_path in sorted(mdx_files): + rel = mdx_path.relative_to(STAGING_API_DIR) + parts = list(rel.with_suffix("").parts) + + # Need at least pkg/parent/module to have a meaningful parent check. + if len(parts) < 3: + continue + + module_name = parts[-1] + + # Package index files: __init__.mdx was renamed to .mdx. + # Identified by stem == containing directory name. Never filter these. + if module_name == parts[-2]: + continue + + # Hardcoded confirmed-internal override (indeterminate parent cases). + rel_no_ext = str(rel.with_suffix("")) + if rel_no_ext in _CONFIRMED_INTERNAL_MODULES: + print( + f" 🔒 {rel} (confirmed internal — hardcoded exclusion)", flush=True + ) + mdx_path.unlink(missing_ok=True) + removed += 1 + continue + + # Import-based check: parent __init__.py must import from this submodule. + parent_src = source_root / Path(*parts[:-1]) / "__init__.py" + if not parent_src.exists(): + continue # no __init__.py — cannot determine, keep + + parent_imports = _imported_submodules(parent_src) + if parent_imports is None: + continue # parse failure — keep (safe default) + if not parent_imports: + continue # parent imports nothing from submodules — indeterminate, keep + + if module_name not in parent_imports: + print( + f" 🔒 {rel} ('{module_name}' not imported by " + f"{Path(*parts[:-1])}/__init__.py)", + flush=True, + ) + mdx_path.unlink(missing_ok=True) + removed += 1 + + print(f"✅ Removed {removed} internal module file(s).", flush=True) + + # ----------------------------- # Move + nav generation # ----------------------------- @@ -770,6 +906,7 @@ def main() -> None: rename_init_files_to_parent() update_frontmatter_metadata() remove_empty_mdx_files() + remove_internal_modules(source_root) # Move staging api -> final docs root/api final_api_dir = move_api_to_docs_root(docs_root) diff --git a/tooling/docs-autogen/validate.py b/tooling/docs-autogen/validate.py index d1e2bc980..94097d506 100755 --- a/tooling/docs-autogen/validate.py +++ b/tooling/docs-autogen/validate.py @@ -11,38 +11,149 @@ import argparse import json +import os import re import sys from pathlib import Path +_IN_GHA = os.environ.get("GITHUB_ACTIONS") == "true" + +_ROOT_CONTRIB = ( + "https://github.com/generative-computing/mellea/blob/main/CONTRIBUTING.md" +) +_GUIDE_CONTRIB = ( + "https://github.com/generative-computing/mellea/blob/main" + "/docs/docs/guide/CONTRIBUTING.md" +) + +# Per-check fix hints: label (as passed to _print_check_errors) -> (fix text, ref URL) +_CHECK_FIX_HINTS: dict[str, tuple[str, str]] = { + "Source links": ( + "Regenerate API docs to update version tags in source links: " + "uv run python tooling/docs-autogen/generate-ast.py", + f"{_ROOT_CONTRIB}#validating-docstrings", + ), + "MDX syntax": ( + "Check for unclosed ``` fences, missing frontmatter, or unescaped {{ }} " + "in code blocks. Each error includes the file and line number.", + f"{_GUIDE_CONTRIB}#frontmatter-required-on-every-page", + ), + "Internal links": ( + "Verify the relative link path resolves to an existing .mdx file. " + "Each error shows the file, line, and broken link target.", + f"{_GUIDE_CONTRIB}#links", + ), + "Anchor collisions": ( + "Rename one of the conflicting headings so they produce unique anchors. " + "Each error shows both heading texts and their line numbers.", + f"{_GUIDE_CONTRIB}#headings", + ), + "RST docstrings": ( + "Replace RST double-backtick notation (``Symbol``) with single backticks " + "(`Symbol`) in the Python source docstring.", + f"{_ROOT_CONTRIB}#docstrings", + ), + "Stale files": ( + "Delete the listed files — they are review artifacts or superseded content " + "that should not ship.", + f"{_GUIDE_CONTRIB}#missing-content", + ), + "Doc imports": ( + "Update the import path in the documentation code block to match the " + "current module/symbol location.", + f"{_GUIDE_CONTRIB}#code-and-fragment-consistency", + ), + "Examples catalogue": ( + "Add a row for the new example directory to docs/docs/examples/index.md.", + f"{_GUIDE_CONTRIB}#keeping-the-examples-catalogue-up-to-date", + ), +} + +# GitHub Actions silently drops inline diff annotations beyond ~10 per step. +# We cap at 20 total across all validate.py checks so the most important issues +# from each category remain visible in the PR diff. The complete list is always +# in the job log and (when --output is used) the JSON artifact. +_GHA_ANNOTATION_CAP = 20 + + +# --------------------------------------------------------------------------- +# Error dict helpers +# --------------------------------------------------------------------------- + +# Each check function returns list[dict] where every dict has: +# file — repo-relative file path (str); empty string for project-level issues +# line — 1-based line number (int); 0 for file- or project-level issues +# message — human-readable description (str) + + +def _err(file: str, line: int, message: str) -> dict: + return {"file": file, "line": line, "message": message} + + +def _line_of(content: str, match_start: int) -> int: + """Return the 1-based line number for a byte offset in *content*.""" + return content[:match_start].count("\n") + 1 + + +# --------------------------------------------------------------------------- +# GHA annotation helpers +# --------------------------------------------------------------------------- + + +def _gha_annotation( + level: str, title: str, message: str, file: str = "", line: int = 0 +) -> None: + """Emit a GitHub Actions workflow command annotation. + + When *file* and *line* are provided the annotation is anchored to that + source location so it appears inline in the PR diff view. + """ + for attr in ("message", "title", "file"): + locals()[attr] # reference to avoid lint unused-var + message = message.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + title = title.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + file = file.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + if file and line: + print(f"::{level} file={file},line={line},title={title}::{message}") + else: + print(f"::{level} title={title}::{message}") + + +# --------------------------------------------------------------------------- +# Validation checks +# --------------------------------------------------------------------------- -def validate_source_links(docs_dir: Path, version: str) -> tuple[int, list[str]]: - """Validate GitHub source links. + +def validate_source_links(docs_dir: Path, version: str) -> tuple[int, list[dict]]: + """Validate GitHub source links in generated MDX files. Args: - docs_dir: Directory containing MDX files - version: Expected version in links (e.g., "0.5.0") + docs_dir: Directory containing MDX files. + version: Expected version string in links (e.g., ``"0.5.0"``). Returns: - Tuple of (error_count, error_messages) + Tuple of (error_count, errors) where each error dict has keys + ``file``, ``line``, and ``message``. """ - errors = [] + errors: list[dict] = [] expected_repo = "ibm-granite/mellea" expected_pattern = f"https://github.com/{expected_repo}/blob/v{version}/" + link_re = re.compile(r"\[View source\]\((https://github\.com/[^)]+)\)") - for mdx_file in docs_dir.rglob("*.mdx"): + for mdx_file in sorted(docs_dir.rglob("*.mdx")): content = mdx_file.read_text() - - # Find all GitHub links - link_pattern = r"\[View source\]\((https://github\.com/[^)]+)\)" - for match in re.finditer(link_pattern, content): - link = match.group(1) - if not link.startswith(expected_pattern): - rel_path = mdx_file.relative_to(docs_dir) - errors.append( - f"{rel_path}: Invalid source link: {link}\n" - f" Expected to start with: {expected_pattern}" - ) + rel = str(mdx_file.relative_to(docs_dir)) + for line_num, line in enumerate(content.splitlines(), 1): + for match in link_re.finditer(line): + link = match.group(1) + if not link.startswith(expected_pattern): + errors.append( + _err( + rel, + line_num, + f"Invalid source link: {link} (expected prefix: {expected_pattern})", + ) + ) return len(errors), errors @@ -51,15 +162,12 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]: """Validate API coverage meets threshold. Args: - docs_dir: Directory containing MDX files - threshold: Minimum coverage percentage (0-100) + docs_dir: Directory containing MDX files. + threshold: Minimum coverage percentage (0-100). Returns: - Tuple of (passed, coverage_report) + Tuple of (passed, coverage_report). """ - # Import audit_coverage functionality - import sys - sys.path.insert(0, str(Path(__file__).parent)) try: @@ -72,8 +180,7 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]: except ImportError: return False, {"error": "audit_coverage.py not found"} - # Run coverage audit - source_dir = docs_dir.parent.parent.parent # Go up to project root + source_dir = docs_dir.parent.parent.parent mellea_symbols = discover_public_symbols(source_dir / "mellea", "mellea") cli_symbols = discover_public_symbols(source_dir / "cli", "cli") cli_commands = discover_cli_commands(source_dir / "cli") @@ -81,179 +188,162 @@ def validate_coverage(docs_dir: Path, threshold: float) -> tuple[bool, dict]: all_symbols = {**mellea_symbols, **cli_symbols} report = generate_coverage_report(all_symbols, documented, cli_commands) + return report["coverage_percentage"] >= threshold, report - passed = report["coverage_percentage"] >= threshold - return passed, report +def validate_mdx_syntax(docs_dir: Path) -> tuple[int, list[dict]]: + """Validate MDX syntax in generated documentation files. -def validate_mdx_syntax(docs_dir: Path) -> tuple[int, list[str]]: - """Validate MDX syntax. + Checks for unclosed code fences, unescaped curly braces inside code + blocks, missing frontmatter, and a missing ``title`` field. Args: - docs_dir: Directory containing MDX files + docs_dir: Directory containing MDX files. Returns: - Tuple of (error_count, error_messages) + Tuple of (error_count, errors). """ - errors = [] + errors: list[dict] = [] + fence_re = re.compile(r"^```") - for mdx_file in docs_dir.rglob("*.mdx"): + for mdx_file in sorted(docs_dir.rglob("*.mdx")): content = mdx_file.read_text() - rel_path = mdx_file.relative_to(docs_dir) + rel = str(mdx_file.relative_to(docs_dir)) + lines = content.splitlines() - # Check for unclosed code blocks - code_block_count = content.count("```") - if code_block_count % 2 != 0: - errors.append(f"{rel_path}: Unclosed code block (odd number of ```)") + # Unclosed code fence (odd number of ``` in the whole file) + if content.count("```") % 2 != 0: + errors.append(_err(rel, 0, "Unclosed code block (odd number of ```)")) - # Check for unescaped curly braces in code blocks - lines = content.splitlines() + # Unescaped curly braces inside code blocks in_code_block = False - code_fence_pattern = re.compile(r"^```") - for line_num, line in enumerate(lines, 1): - if code_fence_pattern.match(line): + if fence_re.match(line): in_code_block = not in_code_block continue - - if in_code_block: - # Check for sequences of { or } that have odd length (indicating unescaped) - # Properly escaped: {{ or }} (even length) - # Unescaped: { or } or {{{ or }}} (odd length) - - # Find all sequences of consecutive open braces - for match in re.finditer(r"\{+", line): - brace_seq = match.group() - if len(brace_seq) % 2 != 0: # Odd number = unescaped - errors.append( - f"{rel_path}:{line_num}: Unescaped curly brace in code block (found {len(brace_seq)} consecutive '{{' - should be even)\n" - f" Line: {line.strip()[:80]}" - ) - break # Only report once per line - - # Find all sequences of consecutive close braces - for match in re.finditer(r"\}+", line): - brace_seq = match.group() - if len(brace_seq) % 2 != 0: # Odd number = unescaped + if not in_code_block: + continue + for pattern, brace in ((r"\{+", "{"), (r"\}+", "}")): + for match in re.finditer(pattern, line): + seq = match.group() + if len(seq) % 2 != 0: errors.append( - f"{rel_path}:{line_num}: Unescaped curly brace in code block (found {len(brace_seq)} consecutive '}}' - should be even)\n" - f" Line: {line.strip()[:80]}" + _err( + rel, + line_num, + f"Unescaped '{brace}' in code block: " + f"{len(seq)} consecutive '{brace}' (must be even) — " + f"line: {line.strip()[:80]}", + ) ) - break # Only report once per line + break - # Check for frontmatter + # Frontmatter presence and structure if not content.startswith("---\n"): - errors.append(f"{rel_path}: Missing frontmatter") - - # Check for required frontmatter fields - if content.startswith("---\n"): - frontmatter_end = content.find("\n---\n", 4) - if frontmatter_end == -1: - errors.append(f"{rel_path}: Malformed frontmatter (no closing ---)") - else: - frontmatter = content[4:frontmatter_end] - if "title:" not in frontmatter: - errors.append(f"{rel_path}: Missing 'title' in frontmatter") + errors.append( + _err(rel, 1, "Missing frontmatter (file must start with ---)") + ) + else: + end = content.find("\n---\n", 4) + if end == -1: + errors.append(_err(rel, 1, "Malformed frontmatter (no closing ---)")) + elif "title:" not in content[4:end]: + errors.append(_err(rel, 1, "Missing 'title' field in frontmatter")) return len(errors), errors -def validate_internal_links(docs_dir: Path) -> tuple[int, list[str]]: - """Validate internal links point to existing files. +def validate_internal_links(docs_dir: Path) -> tuple[int, list[dict]]: + """Validate that relative internal links resolve to existing files. Args: - docs_dir: Directory containing MDX files + docs_dir: Directory containing MDX files. Returns: - Tuple of (error_count, error_messages) + Tuple of (error_count, errors). """ - errors = [] + errors: list[dict] = [] + link_re = re.compile(r"\[([^\]]+)\]\(([^)]+)\)", re.DOTALL) - for mdx_file in docs_dir.rglob("*.mdx"): + for mdx_file in sorted(docs_dir.rglob("*.mdx")): content = mdx_file.read_text() - rel_path = mdx_file.relative_to(docs_dir) + rel = str(mdx_file.relative_to(docs_dir)) - # Find relative links (not starting with http) - # Use DOTALL to handle multiline links - link_pattern = r"\[([^\]]+)\]\(([^)]+)\)" - for match in re.finditer(link_pattern, content, re.DOTALL): + for match in link_re.finditer(content): link_text, link_url = match.groups() - - # Strip whitespace from URL (handles multiline links) link_url = link_url.strip() - # Skip external links if link_url.startswith(("http://", "https://", "#")): continue - # Split anchor from path (e.g., "base#class-component" -> "base", "class-component") - if "#" in link_url: - file_path, _ = link_url.split("#", 1) # anchor not used - else: - file_path = link_url - - # Resolve relative link - add .mdx extension if not present - if file_path and not file_path.endswith(".mdx"): - file_path = f"{file_path}.mdx" - - target = (mdx_file.parent / file_path).resolve() + file_part = link_url.split("#", 1)[0] if "#" in link_url else link_url + if file_part and not file_part.endswith(".mdx"): + file_part = f"{file_part}.mdx" + target = (mdx_file.parent / file_part).resolve() if not target.exists(): errors.append( - f"{rel_path}: Broken link to '{link_url}' (text: '{link_text}')" + _err( + rel, + _line_of(content, match.start()), + f"Broken link to '{link_url}' (text: '{link_text}')", + ) ) return len(errors), errors -def validate_anchor_collisions(docs_dir: Path) -> tuple[int, list[str]]: - """Check for anchor collisions within files. +def validate_anchor_collisions(docs_dir: Path) -> tuple[int, list[dict]]: + """Check for heading anchor collisions within MDX files. Args: - docs_dir: Directory containing MDX files + docs_dir: Directory containing MDX files. Returns: - Tuple of (error_count, error_messages) + Tuple of (error_count, errors). """ - errors = [] + errors: list[dict] = [] - # Import mintlify_anchor function sys.path.insert(0, str(Path(__file__).parent)) try: from test_mintlify_anchors import mintlify_anchor except ImportError: - # Fallback implementation - def mintlify_anchor(heading: str) -> str: + + def mintlify_anchor(heading: str) -> str: # type: ignore[misc] anchor = heading.lower().replace(" ", "-") anchor = re.sub(r"[^a-z0-9-]", "", anchor) - anchor = re.sub(r"-+", "-", anchor) - return anchor.strip("-") + return re.sub(r"-+", "-", anchor).strip("-") - for mdx_file in docs_dir.rglob("*.mdx"): - content = mdx_file.read_text() - rel_path = mdx_file.relative_to(docs_dir) + heading_re = re.compile(r"^#+\s+(.+)$", re.MULTILINE) - # Extract all headings - heading_pattern = r"^#+\s+(.+)$" - headings = re.findall(heading_pattern, content, re.MULTILINE) + for mdx_file in sorted(docs_dir.rglob("*.mdx")): + content = mdx_file.read_text() + rel = str(mdx_file.relative_to(docs_dir)) - # Generate anchors - anchors: dict[str, list[str]] = {} - for heading in headings: + # anchors maps anchor -> (heading_text, line_num) for the first occurrence + seen: dict[str, tuple[str, int]] = {} + for match in heading_re.finditer(content): + heading = match.group(1) + line_num = _line_of(content, match.start()) anchor = mintlify_anchor(heading) - if anchor in anchors: + if anchor in seen: + prev_heading, prev_line = seen[anchor] errors.append( - f"{rel_path}: Anchor collision '{anchor}' from headings:\n" - f" 1. {anchors[anchor]}\n" - f" 2. {heading}" + _err( + rel, + line_num, + f"Anchor collision '{anchor}': " + f"'{prev_heading}' (line {prev_line}) and " + f"'{heading}' (line {line_num}) produce the same anchor", + ) ) else: - anchors[anchor] = heading + seen[anchor] = (heading, line_num) return len(errors), errors -def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[str]]: +def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[dict]]: """Scan Python source files for RST double-backtick notation in docstrings. RST-style ``Symbol`` double-backtick markup interacts badly with the @@ -263,34 +353,35 @@ def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[str]]: rather than a clickable link. Args: - source_dir: Root of the Python source tree to scan (e.g. repo/mellea) + source_dir: Root of the Python source tree to scan (e.g. repo/mellea). Returns: - Tuple of (error_count, error_messages) + Tuple of (error_count, errors). """ - errors = [] - # Match ``Word`` where Word starts with a letter (RST inline literal) + errors: list[dict] = [] pattern = re.compile(r"``([A-Za-z][^`]*)``") - for py_file in source_dir.rglob("*.py"): + for py_file in sorted(source_dir.rglob("*.py")): try: content = py_file.read_text(encoding="utf-8") except Exception: continue - + rel = str(py_file.relative_to(source_dir.parent)) for line_num, line in enumerate(content.splitlines(), 1): if pattern.search(line): - rel = py_file.relative_to(source_dir.parent) errors.append( - f"{rel}:{line_num}: RST double-backtick notation — " - f"use single backticks for Markdown/MDX compatibility\n" - f" {line.strip()[:100]}" + _err( + rel, + line_num, + f"RST double-backtick notation — use single backticks for " + f"Markdown/MDX compatibility: {line.strip()[:100]}", + ) ) return len(errors), errors -def validate_stale_files(docs_root: Path) -> tuple[int, list[str]]: +def validate_stale_files(docs_root: Path) -> tuple[int, list[dict]]: """Check for stale files that should have been cleaned up. Detects review artifacts, superseded files, and other content that @@ -300,30 +391,29 @@ def validate_stale_files(docs_root: Path) -> tuple[int, list[str]]: docs_root: The ``docs/`` directory (parent of ``docs/docs/``). Returns: - Tuple of (error_count, error_messages). + Tuple of (error_count, errors). """ - errors = [] + errors: list[dict] = [] - # Review tracker artifacts (e.g. PR601-REVIEW.md) for f in docs_root.glob("*REVIEW*"): - errors.append(f"Stale review artifact: {f.relative_to(docs_root)}") + errors.append(_err(str(f.relative_to(docs_root)), 0, "Stale review artifact")) - # Superseded landing page: docs/index.md replaced by docs/docs/index.mdx old_index = docs_root / "index.md" new_index = docs_root / "docs" / "index.mdx" if old_index.exists() and new_index.exists(): - errors.append("Stale file: index.md — superseded by docs/index.mdx") + errors.append(_err("index.md", 0, "Stale file — superseded by docs/index.mdx")) - # Legacy tutorial: docs/tutorial.md replaced by docs/docs/tutorials/ old_tutorial = docs_root / "tutorial.md" tutorials_dir = docs_root / "docs" / "tutorials" if old_tutorial.exists() and tutorials_dir.is_dir(): - errors.append("Stale file: tutorial.md — superseded by docs/tutorials/") + errors.append( + _err("tutorial.md", 0, "Stale file — superseded by docs/tutorials/") + ) return len(errors), errors -def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[str]]: +def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[dict]]: """Check that every example directory is listed in the examples index page. Scans ``docs/examples/`` for subdirectories that contain at least one @@ -334,45 +424,45 @@ def validate_examples_catalogue(docs_root: Path) -> tuple[int, list[str]]: docs_root: The ``docs/`` directory (parent of ``docs/docs/``). Returns: - Tuple of (error_count, error_messages). + Tuple of (error_count, errors). """ - errors: list[str] = [] + errors: list[dict] = [] examples_dir = docs_root / "examples" index_file = docs_root / "docs" / "examples" / "index.md" if not examples_dir.is_dir(): return 0, [] if not index_file.exists(): - errors.append("Examples index page not found: docs/docs/examples/index.md") + errors.append( + _err("docs/docs/examples/index.md", 0, "Examples index page not found") + ) return len(errors), errors index_content = index_file.read_text() - - # Directories to skip: not standalone example categories skip = {"__pycache__", "helper"} for child in sorted(examples_dir.iterdir()): - if not child.is_dir(): + if not child.is_dir() or child.name in skip or child.name.startswith("."): continue - if child.name in skip or child.name.startswith("."): - continue - # Only check directories that contain at least one .py file if not any(child.rglob("*.py")): continue - # Check the directory name appears in the index (as a table entry or heading) if ( f"`{child.name}/" not in index_content and f"`{child.name}`" not in index_content ): errors.append( - f"Example directory '{child.name}/' is not listed in " - f"docs/docs/examples/index.md" + _err( + f"docs/examples/{child.name}/", + 0, + f"Example directory '{child.name}/' is not listed in " + f"docs/docs/examples/index.md", + ) ) return len(errors), errors -def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]: +def validate_doc_imports(docs_dir: Path) -> tuple[int, list[dict]]: """Verify that mellea imports in documentation code blocks still resolve. Parses fenced Python code blocks in static docs for ``from mellea.X import Y`` @@ -384,21 +474,21 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]: docs_dir: The ``docs/docs/`` directory containing static documentation. Returns: - Tuple of (error_count, error_messages). + Tuple of (error_count, errors). """ import importlib - errors: list[str] = [] + errors: list[dict] = [] seen: set[tuple[str, str]] = set() for doc_file in sorted( list(docs_dir.rglob("*.md")) + list(docs_dir.rglob("*.mdx")) ): content = doc_file.read_text() - rel = doc_file.relative_to(docs_dir) + rel = str(doc_file.relative_to(docs_dir)) in_python = False - for line in content.splitlines(): + for line_num, line in enumerate(content.splitlines(), 1): stripped = line.strip() if stripped.startswith("```python"): in_python = True @@ -427,16 +517,17 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]: try: mod = importlib.import_module(module) except ImportError: - # Optional dependency not installed — skip continue if not hasattr(mod, name): - # Could be a submodule (e.g. from pkg import submod) try: importlib.import_module(f"{module}.{name}") except ImportError: errors.append( - f"{rel}: from {module} import {name}" - f" — symbol not found in module" + _err( + rel, + line_num, + f"from {module} import {name} — symbol not found in module", + ) ) continue @@ -456,29 +547,44 @@ def validate_doc_imports(docs_dir: Path) -> tuple[int, list[str]]: return len(errors), errors +# --------------------------------------------------------------------------- +# Report assembly +# --------------------------------------------------------------------------- + + def generate_report( - source_link_errors: list[str], + source_link_errors: list[dict], coverage_passed: bool, coverage_report: dict, - mdx_errors: list[str], - link_errors: list[str], - anchor_errors: list[str], - rst_docstring_errors: list[str] | None = None, - stale_errors: list[str] | None = None, - import_errors: list[str] | None = None, - examples_catalogue_errors: list[str] | None = None, + mdx_errors: list[dict], + link_errors: list[dict], + anchor_errors: list[dict], + rst_docstring_errors: list[dict] | None = None, + stale_errors: list[dict] | None = None, + import_errors: list[dict] | None = None, + examples_catalogue_errors: list[dict] | None = None, ) -> dict: - """Generate validation report. + """Assemble the full validation report. + + Args: + source_link_errors: Errors from :func:`validate_source_links`. + coverage_passed: Whether coverage met the threshold. + coverage_report: Raw coverage report dict. + mdx_errors: Errors from :func:`validate_mdx_syntax`. + link_errors: Errors from :func:`validate_internal_links`. + anchor_errors: Errors from :func:`validate_anchor_collisions`. + rst_docstring_errors: Errors from :func:`validate_rst_docstrings`. + stale_errors: Errors from :func:`validate_stale_files`. + import_errors: Errors from :func:`validate_doc_imports`. + examples_catalogue_errors: Errors from :func:`validate_examples_catalogue`. Returns: Report dictionary with all validation results. """ - if stale_errors is None: - stale_errors = [] - if import_errors is None: - import_errors = [] - if examples_catalogue_errors is None: - examples_catalogue_errors = [] + rst_docstring_errors = rst_docstring_errors or [] + stale_errors = stale_errors or [] + import_errors = import_errors or [] + examples_catalogue_errors = examples_catalogue_errors or [] return { "source_links": { @@ -509,9 +615,9 @@ def generate_report( "errors": anchor_errors, }, "rst_docstrings": { - "passed": len(rst_docstring_errors or []) == 0, - "error_count": len(rst_docstring_errors or []), - "errors": rst_docstring_errors or [], + "passed": len(rst_docstring_errors) == 0, + "error_count": len(rst_docstring_errors), + "errors": rst_docstring_errors, }, "stale_files": { "passed": len(stale_errors) == 0, @@ -537,11 +643,72 @@ def generate_report( and len(stale_errors) == 0 and len(import_errors) == 0 and len(examples_catalogue_errors) == 0 - # rst_docstrings is a warning only — does not fail the build + # rst_docstrings is warning-only — does not fail the build ), } +# --------------------------------------------------------------------------- +# Output helpers +# --------------------------------------------------------------------------- + + +def _icon(passed: bool) -> str: + return "✅" if passed else "❌" + + +def _print_check_errors(label: str, errors: list[dict], gha_budget: list[int]) -> None: + """Print a grouped error block for one check to stdout and emit GHA annotations. + + GHA annotations are emitted until the shared *gha_budget* counter (a + single-element list used as a mutable int) reaches :data:`_GHA_ANNOTATION_CAP`. + This prevents later check categories from being invisible because an earlier + category consumed the entire annotation budget. The full error list is + always printed to stdout regardless of the cap. + + Args: + label: Human-readable check name shown in the section header. + errors: List of error dicts with keys ``file``, ``line``, ``message``. + gha_budget: Single-element list ``[remaining_annotations]``; decremented + in-place as annotations are emitted. + """ + if not errors: + return + fix_hint, ref_url = _CHECK_FIX_HINTS.get(label, ("", "")) + print(f"\n{'─' * 50}") + print(f" {label} ({len(errors)} error(s))") + if fix_hint: + print(f" Fix: {fix_hint}") + if ref_url: + print(f" Ref: {ref_url}") + print(f"{'─' * 50}") + capped = 0 + for e in errors: + print(f" {e['file'] or '—'}{':' + str(e['line']) if e.get('line') else ''}") + print(f" {e['message']}") + if _IN_GHA and gha_budget[0] > 0: + _gha_annotation( + "error", + label, + e["message"], + file=e.get("file", ""), + line=e.get("line", 0), + ) + gha_budget[0] -= 1 + elif _IN_GHA and gha_budget[0] == 0: + capped += 1 + if capped: + print( + f" (GHA annotations exhausted — {capped} more error(s) in job log " + f"and JSON artifact; total cap is {_GHA_ANNOTATION_CAP})" + ) + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + + def main(): parser = argparse.ArgumentParser(description="Validate API documentation") parser.add_argument("docs_dir", help="Directory containing generated MDX files") @@ -568,6 +735,11 @@ def main(): "--docs-root", help="Root docs/ directory for stale-file checks (default: parent of docs_dir)", ) + parser.add_argument( + "--warn-only", + action="store_true", + help="Print issues but always exit 0 (pre-commit informational mode)", + ) args = parser.parse_args() docs_dir = Path(args.docs_dir) @@ -582,7 +754,7 @@ def main(): print(f" Coverage threshold: {args.coverage_threshold}%") print() - # Run validations + # Run all checks print("Checking GitHub source links...") _, source_link_errors = ( validate_source_links(docs_dir, args.version) if args.version else (0, []) @@ -604,7 +776,7 @@ def main(): print("Checking anchor collisions...") _, anchor_errors = validate_anchor_collisions(docs_dir) - rst_docstring_errors: list[str] = [] + rst_docstring_errors: list[dict] = [] if args.source_dir: print("Checking source docstrings for RST double-backtick notation...") _, rst_docstring_errors = validate_rst_docstrings(args.source_dir) @@ -620,7 +792,7 @@ def main(): print("Checking examples catalogue...") _, examples_catalogue_errors = validate_examples_catalogue(docs_root) - # Generate report + # Assemble report report = generate_report( source_link_errors, coverage_passed, @@ -634,88 +806,77 @@ def main(): examples_catalogue_errors, ) - # Print results + # Summary table print("\n" + "=" * 60) print("Validation Results") print("=" * 60) + checks = [ + ("Source links", report["source_links"]["passed"]), + ("Coverage", report["coverage"]["passed"]), + ("MDX syntax", report["mdx_syntax"]["passed"]), + ("Internal links", report["internal_links"]["passed"]), + ("Anchor collisions", report["anchor_collisions"]["passed"]), + ("Stale files", report["stale_files"]["passed"]), + ("Doc imports", report["doc_imports"]["passed"]), + ("Examples catalogue", report["examples_catalogue"]["passed"]), + ] + if args.source_dir: + checks.append(("RST docstrings (warning)", report["rst_docstrings"]["passed"])) - print(f"✅ Source links: {'PASS' if report['source_links']['passed'] else 'FAIL'}") - if not report["source_links"]["passed"]: - print(f" {report['source_links']['error_count']} errors found") + for label, passed in checks: + print(f"{_icon(passed)} {label}: {'PASS' if passed else 'FAIL'}") - print(f"✅ Coverage: {'PASS' if report['coverage']['passed'] else 'FAIL'}") if not args.skip_coverage: + cov = report["coverage"] print( - f" {report['coverage']['percentage']}% " - f"({report['coverage']['documented_symbols']}/{report['coverage']['total_symbols']} symbols)" + f" Coverage: {cov['percentage']}%" + f" ({cov['documented_symbols']}/{cov['total_symbols']} symbols)" ) - print(f"✅ MDX syntax: {'PASS' if report['mdx_syntax']['passed'] else 'FAIL'}") - if not report["mdx_syntax"]["passed"]: - print(f" {report['mdx_syntax']['error_count']} errors found") - - print( - f"✅ Internal links: {'PASS' if report['internal_links']['passed'] else 'FAIL'}" - ) - if not report["internal_links"]["passed"]: - print(f" {report['internal_links']['error_count']} errors found") - - print( - f"✅ Anchor collisions: {'PASS' if report['anchor_collisions']['passed'] else 'FAIL'}" - ) - if not report["anchor_collisions"]["passed"]: - print(f" {report['anchor_collisions']['error_count']} errors found") - - if args.source_dir: - print( - f"✅ RST docstrings: {'PASS' if report['rst_docstrings']['passed'] else 'FAIL'}" - ) - if not report["rst_docstrings"]["passed"]: - print(f" {report['rst_docstrings']['error_count']} occurrences found") - - print(f"✅ Stale files: {'PASS' if report['stale_files']['passed'] else 'FAIL'}") - if not report["stale_files"]["passed"]: - print(f" {report['stale_files']['error_count']} errors found") - - print(f"✅ Doc imports: {'PASS' if report['doc_imports']['passed'] else 'FAIL'}") - if not report["doc_imports"]["passed"]: - print(f" {report['doc_imports']['error_count']} errors found") - + print("\n" + "=" * 60) print( - f"✅ Examples catalogue: {'PASS' if report['examples_catalogue']['passed'] else 'FAIL'}" + f"Overall: {_icon(report['overall_passed'])} {'PASS' if report['overall_passed'] else 'FAIL'}" ) - if not report["examples_catalogue"]["passed"]: - print(f" {report['examples_catalogue']['error_count']} errors found") - - print("\n" + "=" * 60) - print(f"Overall: {'✅ PASS' if report['overall_passed'] else '❌ FAIL'}") print("=" * 60) - # Print detailed errors - if not report["overall_passed"]: - print("\nDetailed Errors:") - all_errors = ( - source_link_errors - + mdx_errors - + link_errors - + anchor_errors - + rst_docstring_errors - + stale_errors - + import_errors - + examples_catalogue_errors - ) - for error in all_errors: - print(f" • {error}") + # Detailed errors — grouped by check + error_sections = [ + ("Source links", source_link_errors), + ("MDX syntax", mdx_errors), + ("Internal links", link_errors), + ("Anchor collisions", anchor_errors), + ("RST docstrings", rst_docstring_errors), + ("Stale files", stale_errors), + ("Doc imports", import_errors), + ("Examples catalogue", examples_catalogue_errors), + ] + any_errors = any(errs for _, errs in error_sections) + if any_errors: + print("\nDetailed errors:") + gha_budget = [_GHA_ANNOTATION_CAP] + for label, errs in error_sections: + _print_check_errors(label, errs, gha_budget) + + # GHA summary annotation + if _IN_GHA: + if report["overall_passed"]: + _gha_annotation("notice", "MDX Validation", "All checks passed") + else: + total_errors = sum(len(e) for _, e in error_sections) + _gha_annotation( + "error", + "MDX Validation", + f"{total_errors} error(s) found across {sum(1 for _, e in error_sections if e)} check(s) — see job summary for details", + ) - # Save report + # Save JSON report if args.output: output_path = Path(args.output) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(report, indent=2)) print(f"\n📄 Report saved to {output_path}") - # Exit with appropriate code - sys.exit(0 if report["overall_passed"] else 1) + sys.exit(0 if (report["overall_passed"] or args.warn_only) else 1) if __name__ == "__main__":