Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 29 additions & 18 deletions .github/workflows/mkdocs-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,6 @@ jobs:
with:
globs: '**/*.md'

- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-

- name: Set up JDK 8
uses: actions/setup-java@v5
with:
java-version: "8"
distribution: "adopt"

- name: Build javadoc documentation
working-directory: hopsworks-api/java
run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../../docs/javadoc

- uses: actions/setup-python@v5
with:
python-version: "3.10"
Expand All @@ -52,6 +34,14 @@ jobs:
activate-environment: true
working-directory: hopsworks-api/python

- name: Snakeoil (Python code blocks in markdown)
run: |
uv tool install md-snakeoil
snakeoil --line-length 88 --rules "E,F,B,C4,ISC,PIE,PYI,Q,RSE,RET,SIM,TC,I,W,D2,D3,D4,INP,UP,FA" docs
# Remove newlines added at the end of code blocks by snakeoil:
python3 -c 'import re,pathlib;sn=["python","py","Python","python3","py3"];inf="|".join(sn)+"| "+"| ".join(sn);p=rf"([ \t]*)(\`{{3}}(?:{inf})(?:[^\n]*)\n)(.*?)([ \t]*\`{{3}})";[f.write_text(re.sub(p,lambda m:m.group(1)+m.group(2)+(m.group(3)[:-1] if m.group(3).endswith("\n\n") else m.group(3))+m.group(4),f.read_text(),flags=re.DOTALL)) for f in pathlib.Path("docs").rglob("*.md")]'
git diff --exit-code

- name: Install Python API dependencies
run: uv sync --extra dev --project hopsworks-api/python

Expand All @@ -61,6 +51,27 @@ jobs:
- name: Install Ubuntu dependencies
run: sudo apt update && sudo apt-get install -y libxml2-dev libxslt-dev

- name: Check mkdocs warnings
run: touch docs/javadoc; mkdocs build -s; rm docs/javadoc

- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('java/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-

- name: Set up JDK 8
uses: actions/setup-java@v5
with:
java-version: "8"
distribution: "adopt"

- name: Build javadoc documentation
working-directory: hopsworks-api/java
run: mvn clean install javadoc:javadoc javadoc:aggregate -DskipTests && cp -r target/site/apidocs ../../docs/javadoc

- name: Check for broken links
run: |
# run the server
Expand Down
169 changes: 169 additions & 0 deletions build_great_expectations_inv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# The file is generated by Claude Code

"""Build docs/great_expectations.inv for GX 0.18.

GX 0.18 uses Docusaurus, not Sphinx, so no objects.inv is published.
Internally they still run sphinx-build as an intermediate step (to generate
MDX from HTML), but delete the output including objects.inv.

This script reconstructs it by:
1. Cloning the 0.18.x branch (if not already present at GX_CLONE_PATH)
2. Generating the MD autodoc stubs using GX's own build logic, capturing
the sidebar_entries mapping: Sphinx HTML stem -> Docusaurus URL path
3. Running sphinx-build to get a properly typed Sphinx objects.inv
4. Remapping each entry's URI from the Sphinx HTML path to the Docusaurus URL
5. Writing the result to docs/great_expectations.inv

Usage:
uv run --python 3.11 --with 'great_expectations==0.18.22' \
--with 'sphinx~=5.3.0' --with 'pydata-sphinx-theme==0.11.0' \
--with 'myst-parser' --with 'docstring-parser==0.15' \
--with 'sphobjinv' --with 'invoke' --with 'beautifulsoup4' \
build_great_expectations_inv.py

Or with a pre-created venv (see GX_VENV below):
python build_great_expectations_inv.py
"""

from __future__ import annotations

import pathlib
import subprocess
import sys
import tempfile

# --- Configuration ---

REPO_ROOT = pathlib.Path(__file__).parent
OUT_INV = REPO_ROOT / "docs" / "great_expectations.inv"

# Where to clone/find the GX 0.18.x source
GX_CLONE_PATH = pathlib.Path(tempfile.gettempdir()) / "gx_0_18"

# Sphinx binary (defaults to whatever is on PATH)
SPHINX_BUILD = "sphinx-build"

# Base URL of the published Docusaurus API docs for 0.18
DOCUSAURUS_BASE = "https://docs.greatexpectations.io/docs/0.18/reference/api/"


# --- Step 1: Ensure GX 0.18.x source is available ---

if not GX_CLONE_PATH.exists():
print(f"Cloning GX 0.18.x into {GX_CLONE_PATH} ...")
subprocess.run(
[
"git", "clone", "--depth", "1", "--branch", "0.18.x",
"https://github.com/great-expectations/great_expectations.git",
str(GX_CLONE_PATH),
],
check=True,
)
else:
print(f"Using existing clone at {GX_CLONE_PATH}")

sys.path.insert(0, str(GX_CLONE_PATH))

# --- Step 2: Generate stubs and capture sidebar_entries ---

from docs.sphinx_api_docs_source.build_sphinx_api_docs import ( # noqa: E402
SphinxInvokeDocsBuilder,
SidebarEntryType,
)

import invoke # noqa: E402

api_source = GX_CLONE_PATH / "docs" / "sphinx_api_docs_source"
ctx = invoke.Context()
builder = SphinxInvokeDocsBuilder(
ctx=ctx,
api_docs_source_path=api_source,
repo_root=GX_CLONE_PATH,
)

print("Generating autodoc stubs ...")
builder._build_class_md_stubs()
builder._build_module_md_stubs()

# Build the mapping: Sphinx HTML stem -> (py_domain_type, docusaurus_relative_url)
stem_to_info: dict[str, tuple[str, str]] = {}

for name, entry in builder.sidebar_entries.items():
doc_url = str(entry.mdx_relpath.with_suffix(""))
if entry.type == SidebarEntryType.CLASS:
stem_to_info[name] = ("py:class", doc_url)
else:
# Module: key is the flat path string, stem is the md_relpath stem
stem = entry.md_relpath.stem
stem_to_info[stem] = ("py:module", doc_url)

print(f" {len(stem_to_info)} sidebar entries captured")

# --- Step 3: Run sphinx-build ---

sphinx_out = GX_CLONE_PATH / "temp_inv_build"
sphinx_out.mkdir(exist_ok=True)

print("Running sphinx-build ...")
subprocess.run(
[SPHINX_BUILD, "-M", "html", str(api_source), str(sphinx_out), "-E", "-q"],
check=True,
)
inv_path = sphinx_out / "html" / "objects.inv"
print(f" Sphinx objects.inv: {inv_path.stat().st_size} bytes")

# --- Step 4: Remap entries ---

import sphobjinv as soi # noqa: E402

sphinx_inv = soi.Inventory(str(inv_path))
print(f" Sphinx inventory: {len(sphinx_inv.objects)} objects")

remapped: list[soi.DataObjStr] = []
skipped = 0

for obj in sphinx_inv.objects:
uri_raw = obj.uri
uri_path, _, fragment = uri_raw.partition("#")
stem = pathlib.Path(uri_path).stem

if stem not in stem_to_info:
skipped += 1
continue

_, doc_url = stem_to_info[stem]

# "$" means "use the object name as the anchor"
resolved_fragment = obj.name if fragment == "$" else fragment
full_uri = f"{doc_url}#{resolved_fragment}" if resolved_fragment else doc_url

remapped.append(
soi.DataObjStr(
name=obj.name,
domain=obj.domain,
role=obj.role,
priority=str(obj.priority),
uri=full_uri,
dispname=obj.dispname or "-",
)
)

print(f" Remapped: {len(remapped)}, skipped (index/search pages): {skipped}")

# --- Step 5: Write inventory ---

new_inv = soi.Inventory()
new_inv.project = "great_expectations"
new_inv.version = "0.18"
new_inv.objects = remapped

soi.writebytes(str(OUT_INV), soi.compress(new_inv.data_file()))
print(f"Written {len(remapped)} entries to {OUT_INV}")

# --- Cleanup ---

builder._remove_md_stubs()
import shutil # noqa: E402
shutil.rmtree(sphinx_out, ignore_errors=True)

print("Done.")
50 changes: 50 additions & 0 deletions build_polars_patch_inv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# The file is generated by Claude Code

"""Build docs/polars_patch.inv — a patch for polars' broken objects.inv.

The polars Sphinx inventory is missing py:class entries for all major classes
(DataFrame, LazyFrame, Series, Expr, Config, DataType). Their docs build
generates per-method pages and never emits the class-level entry.

This script constructs a small supplemental inventory with just those six
entries, each pointing to the relevant class overview page.

Usage:
uv run --with sphobjinv build_polars_patch_inv.py
"""

import pathlib
import sphobjinv as soi

OUT = pathlib.Path(__file__).parent / "docs" / "polars_patch.inv"

# Mapping: fully-qualified name -> relative URL (from docs.pola.rs/api/python/stable/)
# $ means "use the object name as the anchor" (standard Sphinx convention)
MISSING_CLASSES = {
"polars.Config": "reference/config.html#$",
"polars.DataFrame": "reference/dataframe/index.html#$",
"polars.DataType": "reference/datatypes.html#$",
"polars.Expr": "reference/expressions/index.html#$",
"polars.LazyFrame": "reference/lazyframe/index.html#$",
"polars.Series": "reference/series/index.html#$",
}

objects = [
soi.DataObjStr(
name=name,
domain="py",
role="class",
priority="1",
uri=uri,
dispname="-",
)
for name, uri in MISSING_CLASSES.items()
]

inv = soi.Inventory()
inv.project = "polars-patch"
inv.version = ""
inv.objects = objects

soi.writebytes(str(OUT), soi.compress(inv.data_file()))
print(f"Written {len(objects)} entries to {OUT}")
Loading