diff --git a/.github/workflows/live-test.yml b/.github/workflows/live-test.yml new file mode 100644 index 0000000..26de626 --- /dev/null +++ b/.github/workflows/live-test.yml @@ -0,0 +1,39 @@ +name: Live Test + +on: + push: + branches: [main, develop] + pull_request: + branches: [main, develop] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + + - name: Install + run: pip install -r tests/requirements.txt + + - name: Lint + run: ruff check tests/ + + - name: Test + if: env.CRIBL_CLIENT_ID != '' + env: + CRIBL_CLIENT_ID: ${{ secrets.CRIBL_CLIENT_ID }} + CRIBL_CLIENT_SECRET: ${{ secrets.CRIBL_CLIENT_SECRET }} + CRIBL_TENANT: ${{ secrets.CRIBL_TENANT }} + CRIBL_WORKSPACE: ${{ secrets.CRIBL_WORKSPACE }} + run: pytest tests/ -m live -vv + + - uses: actions/upload-artifact@v4 + if: always() + with: + name: test-artifacts + path: tests/artifacts/ + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index ba1870c..3ad9bc8 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ bin/packages/* !bin/packages/README goatsearch.py.exp goatsearch.py.bak +tests/artifacts/ +.env diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..82a1240 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,146 @@ +# GoatSearch: AI Code Guidelines + +GoatSearch is a Splunk custom command for Cribl Search integration. This +document defines mandatory standards for all AI-generated code contributions. + +## Code Understanding Mandate + +**Non-negotiable**: Every line of code must be justifiable by human +reasoning. AI-generated code lacking clear human purpose will be rejected at +review. + +- No "because the AI suggested it" rationales +- Each function must have explicit business purpose +- Complex logic requires explanation comments +- If Jacob cannot articulate the purpose, it fails review + +## Performance Requirements + +### Per-Job Impact + +- No net increase in search execution time between main/develop without + documented justification +- Memory consumption must remain bounded regardless of result set size +- CPU usage per job must scale linearly or better with event count + +### Per-Event Impact + +- Understand the cost per event processed +- Example: hash computation adds X ms per event - document and justify +- Cumulative effect: 1M events × 1ms overhead = 1000s added time + +### Validation + +- Benchmark before/after on representative data +- Use `_time` and processing metadata to measure actual impact +- Report findings in commit message or code comment + +## Testing Criteria for AI Code + +### Holistic 1:1 Matching + +- Results from Cribl must exactly match results via GoatSearch +- No unjustified differences in event ordering, filtering, or field + transformation +- Deviation must be explained (e.g., "removed duplicates per business logic") + +### Performance Validation + +- No unjustifiable performance degradation vs. direct Cribl API calls +- Acceptable overhead: credential retrieval + OAuth token acquisition +- Unacceptable overhead: inefficient pagination, redundant API calls + +### Test Execution + +- Live E2E tests use real Cribl API (credentials required) +- Tests detect pagination bugs via row_number() sequencing +- Artifacts capture analysis for review + +### Test Integrity + +**Tests must fail honestly.** When a test fails, the correct response is to +investigate and fix the root cause—never to bypass, suppress, or mark tests +as expected failures. + +Prohibited practices: + +- Using `xfail`, `skip`, or similar to hide legitimate failures +- Adjusting assertions to match broken behavior +- Disabling tests that expose real bugs +- Adding logic to ignore known issues + +If a test fails, either: + +1. Fix the underlying bug in the code +2. Fix the test if it contains an error +3. Document the investigation if the cause is unclear + +Failing CI is the correct outcome when code has defects. + +## Environment Requirements + +### Python Version + +Splunk embeds Python 3.9. All code must be compatible: + +- No walrus operators (`:=` - use explicit assignments) +- No 3.10+ features (match statements, etc.) +- Test with Python 3.9 first + +## Gitflow Workflow + +VisiCore repositories use gitflow: + +- **main**: Release-ready code, protected branch +- **develop**: Integration branch for features/fixes +- Feature branches: `feature/*` from develop +- Release branches: `release/*` from develop +- Hotfix branches: `hotfix/*` from main + +All development work on feature/fix branches before PR to develop or main. + +## Test Infrastructure + +### Live E2E Tests + +Located in `tests/test_e2e.py`, executed on main/develop pushes: + +- Requires credentials: `CRIBL_CLIENT_ID`, `CRIBL_CLIENT_SECRET`, + `CRIBL_TENANT`, `CRIBL_WORKSPACE` +- Fetches ~10k events to validate pagination and uniqueness +- Saves analysis artifacts for review + +### Test Markers + +- `@pytest.mark.live` - Real Cribl API calls (credentials required) +- `@pytest.mark.timeout(300)` - 5-minute maximum per test + +### Mocking Strategy + +Splunk objects are stubbed via `tests/conftest.py`: + +- `MockService` - storage_passwords, kvstore, users +- `MockMetadata` - search time ranges +- `MockRecordWriter` - output messages + +No HTTP calls are mocked—Cribl API interaction is live. + +## Known Issues + +### Duplicate Events at Scale + +Live testing at 10k+ events reveals pagination bugs manifesting as ~30% +duplicates. Detection method: sequential row IDs via `row_number(1)` in +queries. Same row_id appearing twice indicates repeated API offset requests. + +## Review Criteria Checklist + +Before committing AI-generated code, verify: + +- [ ] Every function has documented business purpose +- [ ] No performance increase without justification +- [ ] Per-job and per-event costs understood and documented +- [ ] E2E test passes with 1:1 Cribl↔Splunk matching +- [ ] No unjustified performance degradation +- [ ] Python 3.9 compatible (no 3.10+ features) +- [ ] Test coverage for new functionality diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5fbf317 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,166 @@ +"""Minimal Splunk stubs for testing GoatSearch outside Splunk.""" +import os +import sys +import time +from pathlib import Path + +import pytest + +# Add GoatSearch source to path +sys.path.insert(0, str(Path(__file__).parent.parent / "lib")) +sys.path.insert(0, str(Path(__file__).parent.parent / "bin")) + + +class MockPassword: + """Stub for password object returned by storage_passwords.list().""" + + def __init__(self, name, secret): + self.name = name + self._secret = secret + + def __getitem__(self, key): + if key == "clear_password": + return self._secret + raise KeyError(key) + + +class MockPasswordCollection: + """Stub for storage_passwords collection.""" + + def __init__(self, passwords_dict): + self._passwords = { + name: MockPassword(name, secret) for name, secret in passwords_dict.items() + } + + def list(self): + return list(self._passwords.values()) + + +class MockKVStoreData: + """Stub for kvstore query results.""" + + def __init__(self, data): + self._data = data + + def query(self, query=None): + return self._data + + +class MockKVStoreCollection: + """Stub for kvstore collection access.""" + + def __init__(self, data): + self.data = MockKVStoreData(data) + + +class MockKVStore: + """Stub for kvstore access by collection name.""" + + def __init__(self, data_by_collection): + self._data = data_by_collection + + def __getitem__(self, collection_name): + if collection_name not in self._data: + self._data[collection_name] = [] + return MockKVStoreCollection(self._data[collection_name]) + + +class MockUser: + """Stub for user object.""" + + def __init__(self, capabilities): + self._capabilities = capabilities + + def __getitem__(self, key): + if key == "capabilities": + return self._capabilities + raise KeyError(key) + + +class MockUsersCollection: + """Stub for users collection.""" + + def __init__(self, user_capabilities): + self._user = MockUser(user_capabilities) + + def __getitem__(self, username): + return self._user + + +class MockService: + """Stub Splunk service with credentials from environment.""" + + def __init__(self, passwords_dict=None, kvstore_data=None, user_capabilities=None): + self.storage_passwords = MockPasswordCollection(passwords_dict or {}) + self.kvstore = MockKVStore(kvstore_data or {}) + self.users = MockUsersCollection(user_capabilities or []) + + +@pytest.fixture(scope="session") +def live_credentials(): + """Load credentials from environment. Skip if not set.""" + creds = { + "client_id": os.environ.get("CRIBL_CLIENT_ID", ""), + "client_secret": os.environ.get("CRIBL_CLIENT_SECRET", ""), + "tenant": os.environ.get("CRIBL_TENANT", ""), + "workspace": os.environ.get("CRIBL_WORKSPACE", "main"), + } + if not all([creds["client_id"], creds["client_secret"], creds["tenant"]]): + pytest.skip("Cribl credentials not configured") + return creds + + +@pytest.fixture +def goatsearch_cmd(live_credentials): + """Create goatsearch command configured for live API.""" + from goatsearch import goatsearch + + # Setup mock Splunk service with credentials + password_key = f"{live_credentials['tenant']}:{live_credentials['client_id']}:" + mock_service = MockService( + passwords_dict={password_key: live_credentials["client_secret"]}, + kvstore_data={ + "goatsearch_env_kv": [ + { + "clientId": live_credentials["client_id"], + "tenant": live_credentials["tenant"], + "workspace": live_credentials["workspace"], + "default": 1, + } + ] + }, + user_capabilities=["goatsearch_user"], + ) + + cmd = goatsearch() + cmd._service = mock_service + + # Mock metadata + cmd._metadata = type("M", (), { + "searchinfo": type("S", (), { + "earliest_time": "0", + "latest_time": str(int(time.time())), + "username": "admin", + })() + })() + + # Mock record writer + cmd._record_writer = type("W", (), { + "_inspector": {"messages": []}, + "messages": [], + "flush": lambda s, finished=False: None, + "write_message": lambda s, level, msg, *args: None, + })() + + # Set command defaults + cmd.tenant = None + cmd.workspace = None + cmd.sample = None + cmd.page = 1000 + cmd.debug = False + cmd.earliest = "-24h" + cmd.latest = "now" + cmd.sid = None + cmd.retry = 3 + + return cmd diff --git a/tests/pytest.ini b/tests/pytest.ini new file mode 100644 index 0000000..9ecce03 --- /dev/null +++ b/tests/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +testpaths = . +markers = + live: Live Cribl API tests (requires credentials) +timeout = 300 +addopts = -v --tb=short +filterwarnings = + ignore::DeprecationWarning diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..43e390e --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,4 @@ +pytest>=7.0.0 +pytest-timeout>=2.2.0 +requests>=2.28.0 +ruff>=0.5.0 diff --git a/tests/test_e2e.py b/tests/test_e2e.py new file mode 100644 index 0000000..a3ee637 --- /dev/null +++ b/tests/test_e2e.py @@ -0,0 +1,51 @@ +"""Live E2E test for GoatSearch against Cribl API.""" +import json +from pathlib import Path + +import pytest + +QUERY = """dataset=default_logs channel="output:default_logs" +| limit 10000 +| extend row_id = row_number(1) +| project row_id, _time, source""" + + +@pytest.mark.live +@pytest.mark.timeout(300) +def test_e2e_10k_events(goatsearch_cmd): + """Full workflow: auth -> search -> paginate -> validate uniqueness.""" + goatsearch_cmd.query = QUERY + + # Auth and setup + goatsearch_cmd.prepare() + assert goatsearch_cmd.can_run, "Auth failed" + assert goatsearch_cmd.access_token, "No token" + + # Execute search + events = list(goatsearch_cmd.generate()) + data = [e for e in events if e.get("sourcetype") != "goatsearch:json"] + row_ids = [e.get("row_id") for e in data if e.get("row_id") is not None] + + assert len(data) == 10000, f"Expected 10000, got {len(data)}" + assert row_ids, "No row_id values returned" + + # Check for duplicates (same row_id = pagination bug) + unique = set(row_ids) + dupes = len(row_ids) - len(unique) + + # Save artifact + artifact_dir = Path(__file__).parent / "artifacts" + artifact_dir.mkdir(exist_ok=True) + (artifact_dir / "analysis.json").write_text( + json.dumps( + { + "total": len(data), + "unique": len(unique), + "duplicates": dupes, + "rate": f"{dupes / len(row_ids) * 100:.1f}%" if row_ids else "N/A", + }, + indent=2, + ) + ) + + assert dupes == 0, f"Found {dupes} duplicates ({dupes / len(row_ids) * 100:.1f}%)"