Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions tooling/docs-autogen/decorate_api_mdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,39 @@ def replace_md(match):
return content


# =========================
# RST double-backtick normalisation
# =========================

_RST_DOUBLE_BACKTICK_RE = re.compile(r"``([^`]+)``")


def normalize_rst_backticks(content: str) -> str:
"""Convert RST-style double-backtick literals to single-backtick Markdown.

Replaces ``Symbol`` with `Symbol` in MDX prose (outside fenced code blocks).
This prevents add_cross_references from generating malformed link syntax such
as `[`Backend`](url)` where the link is wrapped in an extra code span and
renders as raw text rather than a clickable link.

Args:
content: MDX file content

Returns:
Content with ``x`` replaced by `x` outside code fences
"""
lines = content.splitlines(keepends=True)
result = []
in_fence = False
for line in lines:
if line.lstrip().startswith("```"):
in_fence = not in_fence
if not in_fence:
line = _RST_DOUBLE_BACKTICK_RE.sub(r"`\1`", line)
result.append(line)
return "".join(result)


# =========================
# MDX escaping
# =========================
Expand Down Expand Up @@ -801,11 +834,17 @@ def process_mdx_file(
else:
module_path = path.stem

# Step 0.5: Normalise RST double-backtick notation → single backtick
# Must run before add_cross_references so ``Symbol`` doesn't generate `[`Symbol`](url)`
text = normalize_rst_backticks(original)

# Step 1: Fix GitHub source links
text = fix_source_links(original, version)
text = fix_source_links(text, version)

# Step 2: Inject preamble
# Step 2: Inject preamble (docstring cache text may also contain RST notation;
# inject_preamble runs after normalize so the injected text needs a second pass)
text = inject_preamble(text, module_path, docstring_cache)
text = normalize_rst_backticks(text)

# Step 3: inject SidebarFix
text = inject_sidebar_fix(text)
Expand Down
71 changes: 70 additions & 1 deletion tooling/docs-autogen/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,51 @@ def mintlify_anchor(heading: str) -> str:
return len(errors), errors


def validate_rst_docstrings(source_dir: Path) -> tuple[int, list[str]]:
"""Scan Python source files for RST double-backtick notation in docstrings.

RST-style ``Symbol`` double-backtick markup interacts badly with the
add_cross_references step: the regex matches the inner single-backtick
boundary and generates a broken link wrapped in an extra code span, e.g.
``Backend`` → `[`Backend`](url)` which Mintlify renders as raw text
rather than a clickable link.

Args:
source_dir: Root of the Python source tree to scan (e.g. repo/mellea)

Returns:
Tuple of (error_count, error_messages)
"""
errors = []
# Match ``Word`` where Word starts with a letter (RST inline literal)
pattern = re.compile(r"``([A-Za-z][^`]*)``")

for py_file in source_dir.rglob("*.py"):
try:
content = py_file.read_text(encoding="utf-8")
except Exception:
continue

for line_num, line in enumerate(content.splitlines(), 1):
if pattern.search(line):
rel = py_file.relative_to(source_dir.parent)
errors.append(
f"{rel}:{line_num}: RST double-backtick notation — "
f"use single backticks for Markdown/MDX compatibility\n"
f" {line.strip()[:100]}"
)

return len(errors), errors


def generate_report(
source_link_errors: list[str],
coverage_passed: bool,
coverage_report: dict,
mdx_errors: list[str],
link_errors: list[str],
anchor_errors: list[str],
rst_docstring_errors: list[str] | None = None,
) -> dict:
"""Generate validation report.

Expand Down Expand Up @@ -294,12 +332,18 @@ def generate_report(
"error_count": len(anchor_errors),
"errors": anchor_errors,
},
"rst_docstrings": {
"passed": len(rst_docstring_errors or []) == 0,
"error_count": len(rst_docstring_errors or []),
"errors": rst_docstring_errors or [],
},
"overall_passed": (
len(source_link_errors) == 0
and coverage_passed
and len(mdx_errors) == 0
and len(link_errors) == 0
and len(anchor_errors) == 0
# rst_docstrings is a warning only — does not fail the build
),
}

Expand All @@ -320,6 +364,12 @@ def main():
parser.add_argument(
"--skip-coverage", action="store_true", help="Skip coverage validation"
)
parser.add_argument(
"--source-dir",
type=Path,
default=None,
help="Python source root to scan for RST double-backtick notation (e.g. mellea/)",
)
args = parser.parse_args()

docs_dir = Path(args.docs_dir)
Expand Down Expand Up @@ -356,6 +406,11 @@ def main():
print("Checking anchor collisions...")
_, anchor_errors = validate_anchor_collisions(docs_dir)

rst_docstring_errors: list[str] = []
if args.source_dir:
print("Checking source docstrings for RST double-backtick notation...")
_, rst_docstring_errors = validate_rst_docstrings(args.source_dir)

# Generate report
report = generate_report(
source_link_errors,
Expand All @@ -364,6 +419,7 @@ def main():
mdx_errors,
link_errors,
anchor_errors,
rst_docstring_errors,
)

# Print results
Expand Down Expand Up @@ -398,14 +454,27 @@ def main():
if not report["anchor_collisions"]["passed"]:
print(f" {report['anchor_collisions']['error_count']} errors found")

if args.source_dir:
print(
f"✅ RST docstrings: {'PASS' if report['rst_docstrings']['passed'] else 'FAIL'}"
)
if not report["rst_docstrings"]["passed"]:
print(f" {report['rst_docstrings']['error_count']} occurrences found")

print("\n" + "=" * 60)
print(f"Overall: {'✅ PASS' if report['overall_passed'] else '❌ FAIL'}")
print("=" * 60)

# Print detailed errors
if not report["overall_passed"]:
print("\nDetailed Errors:")
for error in source_link_errors + mdx_errors + link_errors + anchor_errors:
for error in (
source_link_errors
+ mdx_errors
+ link_errors
+ anchor_errors
+ rst_docstring_errors
):
print(f" • {error}")

# Save report
Expand Down
Loading