diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index e2d6ef9d..ba3d34c9 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -384,24 +384,14 @@ Examples: if err != nil { return fmt.Errorf("load provenance: %w", err) } - provMap := map[string]interface{}{} - if prov.Framework != "" { - provMap["framework"] = prov.Framework - } - if prov.MetricName != "" { - provMap["metricName"] = prov.MetricName - } - if prov.MetricValue != "" { - provMap["metricValue"] = prov.MetricValue - } - if prov.ExperimentID != "" { - provMap["experimentId"] = prov.ExperimentID - } - if prov.TrainHash != "" { - provMap["trainHash"] = prov.TrainHash + // Round-trip through JSON to build the map, respecting omitempty tags. + provBytes, err := json.Marshal(prov) + if err != nil { + return fmt.Errorf("marshal provenance: %w", err) } - if prov.ParamCount != "" { - provMap["paramCount"] = prov.ParamCount + var provMap map[string]interface{} + if err := json.Unmarshal(provBytes, &provMap); err != nil { + return fmt.Errorf("unmarshal provenance: %w", err) } spec["provenance"] = provMap fmt.Printf("Loaded provenance: %s (metric %s=%s, params %s)\n", diff --git a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml index af92693d..d01abf1e 100644 --- a/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml +++ b/internal/embed/infrastructure/base/templates/serviceoffer-crd.yaml @@ -167,7 +167,6 @@ spec: metricName: type: string description: "Name of the primary quality metric (e.g. val_bpb)." - default: "val_bpb" metricValue: type: string description: "Primary quality metric value (e.g. 0.9973)." diff --git a/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md index 063f8881..a766a06a 100644 --- a/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md +++ b/internal/embed/skills/autoresearch-coordinator/references/coordination-protocol.md @@ -149,8 +149,8 @@ Workers publish their best results in the `.well-known/agent-registration.json` | Field | Type | Description | |---|---|---| -| `metadata.best_val_bpb` | float | Best validation bits-per-byte achieved | -| `metadata.total_experiments` | int | Total experiments processed by this worker | +| `metadata.best_val_bpb` | string | Best validation bits-per-byte achieved (stringified float) | +| `metadata.total_experiments` | string | Total experiments processed by this worker (stringified int) | | `metadata.updated` | string | ISO 8601 timestamp of last result update | ### Optional Fields diff --git a/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py index 5f4bc716..1ac11d15 100644 --- a/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py +++ b/internal/embed/skills/autoresearch-coordinator/scripts/coordinate.py @@ -571,10 +571,16 @@ def submit_experiment(self, endpoint, train_py_source, config=None): print(" Pricing missing payTo or amount", file=sys.stderr) return None - print(f" Price: {amount} USDC micro-units to {pay_to}") + try: + amount_int = int(amount) + except (ValueError, TypeError): + print(f" Non-integer amount '{amount}', cannot sign payment", file=sys.stderr) + return None + + print(f" Price: {amount_int} USDC micro-units to {pay_to}") # Step 3: Sign ERC-3009 authorization - signed_auth = sign_erc3009_auth(pay_to, int(amount), self.chain) + signed_auth = sign_erc3009_auth(pay_to, amount_int, self.chain) if not signed_auth: print(" Failed to sign payment authorization", file=sys.stderr) return None diff --git a/internal/embed/skills/autoresearch-worker/docker/Dockerfile b/internal/embed/skills/autoresearch-worker/docker/Dockerfile index 3df8ff2d..aaba86c2 100644 --- a/internal/embed/skills/autoresearch-worker/docker/Dockerfile +++ b/internal/embed/skills/autoresearch-worker/docker/Dockerfile @@ -26,6 +26,9 @@ ENV DATA_DIR=/data \ EXPERIMENT_TIMEOUT_SECONDS=300 \ TRAIN_COMMAND="uv run train.py" +RUN useradd -m -s /bin/bash worker && mkdir -p /data && chown worker:worker /data + +USER worker VOLUME ["/data"] EXPOSE 8080 diff --git a/internal/embed/skills/autoresearch-worker/scripts/worker_api.py b/internal/embed/skills/autoresearch-worker/scripts/worker_api.py index 619b877c..1b4d953c 100644 --- a/internal/embed/skills/autoresearch-worker/scripts/worker_api.py +++ b/internal/embed/skills/autoresearch-worker/scripts/worker_api.py @@ -367,6 +367,9 @@ def do_GET(self) -> None: return if path.startswith("/experiments/"): exp_id = path.rsplit("/", 1)[-1] + if not re.fullmatch(r'[a-zA-Z0-9_-]+', exp_id): + self._json(400, {"error": "invalid experiment id"}) + return result_path = self.state.results_dir / exp_id / "result.json" if not result_path.exists(): self._json(404, {"error": f"experiment {exp_id} not found"}) @@ -413,6 +416,9 @@ def do_POST(self) -> None: if experiment_id is not None and not isinstance(experiment_id, str): self._json(400, {"error": "experiment_id must be a string when provided"}) return + if experiment_id is not None and not re.fullmatch(r'[a-zA-Z0-9_-]+', experiment_id): + self._json(400, {"error": "experiment_id must contain only alphanumeric characters, hyphens, or underscores"}) + return try: result = self.state.run_experiment(train_py, config_overrides, experiment_id) diff --git a/internal/embed/skills/autoresearch/SKILL.md b/internal/embed/skills/autoresearch/SKILL.md index 33815f6c..d917aaba 100644 --- a/internal/embed/skills/autoresearch/SKILL.md +++ b/internal/embed/skills/autoresearch/SKILL.md @@ -118,6 +118,7 @@ publish.py - **Python stdlib + uv** — no pip install; uv for environment management - **5-minute time budget** — each experiment must complete within 5 minutes - **GPU required** — training runs on local GPU (Ollama must have GPU access) +- **GGUF checkpoint required** — Ollama only accepts GGUF format; convert other formats (`.pt`, `.safetensors`) with `llama.cpp/convert_hf_to_gguf.py` - **Git repo required** — autoresearch directory must be a git repository for commit tracking - **results.tsv format** — tab-separated: `commit_hash`, `val_bpb`, `status`, `description` diff --git a/internal/embed/skills/autoresearch/scripts/publish.py b/internal/embed/skills/autoresearch/scripts/publish.py index 47e45382..799f597b 100755 --- a/internal/embed/skills/autoresearch/scripts/publish.py +++ b/internal/embed/skills/autoresearch/scripts/publish.py @@ -269,6 +269,13 @@ def main() -> None: ) print(f"Using checkpoint: {checkpoint}") + if not str(checkpoint).endswith(".gguf"): + die( + f"checkpoint {checkpoint.name} is not in GGUF format. " + "Ollama requires GGUF files. Convert with:\n" + " python llama.cpp/convert_hf_to_gguf.py --outfile model.gguf" + ) + model_name = args.name or f"autoresearch-{commit[:8]}" modelfile_path = create_modelfile(checkpoint, workdir) ollama_create(model_name, modelfile_path) diff --git a/internal/embed/skills/sell/scripts/monetize.py b/internal/embed/skills/sell/scripts/monetize.py index f893e054..28e0a96c 100644 --- a/internal/embed/skills/sell/scripts/monetize.py +++ b/internal/embed/skills/sell/scripts/monetize.py @@ -825,7 +825,10 @@ def stage_route_published(spec, ns, name, token, ssl_ctx): # GPU workers may need 300s+ for experiments; Traefik's default is 30s. # Add 120s overhead for facilitator verification + network latency. payment = spec.get("payment", {}) - max_timeout = int(payment.get("maxTimeoutSeconds", 0) or 0) + try: + max_timeout = int(payment.get("maxTimeoutSeconds", 0) or 0) + except (ValueError, TypeError): + max_timeout = 0 route_timeout_seconds = max(max_timeout + 120, 60) if max_timeout > 30 else 0 # Build the HTTPRoute resource. diff --git a/internal/inference/store_test.go b/internal/inference/store_test.go index 293d95fb..9771c657 100644 --- a/internal/inference/store_test.go +++ b/internal/inference/store_test.go @@ -106,15 +106,24 @@ func TestStoreCreate_PersistsCanonicalProvenance(t *testing.T) { if got.Provenance == nil { t.Fatal("Provenance should be persisted") } + if got.Provenance.Framework != "autoresearch" { + t.Errorf("Framework = %q, want %q", got.Provenance.Framework, "autoresearch") + } if got.Provenance.MetricName != "val_bpb" { t.Errorf("MetricName = %q, want %q", got.Provenance.MetricName, "val_bpb") } if got.Provenance.MetricValue != "0.9973" { t.Errorf("MetricValue = %q, want %q", got.Provenance.MetricValue, "0.9973") } + if got.Provenance.ExperimentID != "abc123" { + t.Errorf("ExperimentID = %q, want %q", got.Provenance.ExperimentID, "abc123") + } if got.Provenance.TrainHash != "sha256:deadbeef" { t.Errorf("TrainHash = %q, want %q", got.Provenance.TrainHash, "sha256:deadbeef") } + if got.Provenance.ParamCount != "50000000" { + t.Errorf("ParamCount = %q, want %q", got.Provenance.ParamCount, "50000000") + } } func TestStoreCreateDuplicate(t *testing.T) { diff --git a/internal/schemas/payment_test.go b/internal/schemas/payment_test.go index 7fcee0c8..39e72dd1 100644 --- a/internal/schemas/payment_test.go +++ b/internal/schemas/payment_test.go @@ -30,14 +30,22 @@ func TestEffectiveRequestPrice_PerHour(t *testing.T) { } func TestApproximateRequestPriceFromPerHour(t *testing.T) { - // 0.50 USDC/hour * (5/60) = 0.04166... ≈ "0.0416666666666667" + // 0.50 USDC/hour * (5/60) = 0.04166... got, err := ApproximateRequestPriceFromPerHour("0.50") if err != nil { t.Fatalf("ApproximateRequestPriceFromPerHour() error = %v", err) } - // Verify it's approximately right (5/60 * 0.50 ≈ 0.0417) - if got == "" || got == "0" || got == "0.50" { - t.Errorf("ApproximateRequestPriceFromPerHour(0.50) = %q, expected approximated value", got) + // 6.00 USDC/hour gives exactly 0.5, so use that for an exact check too. + got6, err := ApproximateRequestPriceFromPerHour("6.00") + if err != nil { + t.Fatalf("ApproximateRequestPriceFromPerHour(6.00) error = %v", err) + } + if got6 != "0.5" { + t.Errorf("ApproximateRequestPriceFromPerHour(6.00) = %q, want %q", got6, "0.5") + } + // 0.50 * 5/60 should start with "0.0416" + if len(got) < 6 || got[:6] != "0.0416" { + t.Errorf("ApproximateRequestPriceFromPerHour(0.50) = %q, expected value starting with 0.0416", got) } } diff --git a/tests/test_sell_registration_metadata.py b/tests/test_sell_registration_metadata.py index 1e8d1d01..cbb316b8 100644 --- a/tests/test_sell_registration_metadata.py +++ b/tests/test_sell_registration_metadata.py @@ -43,6 +43,32 @@ def test_build_registration_doc_includes_custom_metadata(self): self.assertEqual(doc["metadata"]["best_val_bpb"], "1.234") self.assertTrue(any(s.get("name") == "OASF" for s in doc["services"])) + def test_build_registration_doc_includes_provenance(self): + mod = load_monetize_module() + spec = { + "type": "inference", + "path": "/services/my-model", + "payment": {"price": {"perRequest": "0.001"}}, + "provenance": { + "framework": "autoresearch", + "metricName": "val_bpb", + "metricValue": "0.9973", + "experimentId": "abc123", + "trainHash": "sha256:deadbeef", + "paramCount": "50000000", + }, + "registration": { + "name": "My Model", + }, + } + + doc = mod.build_registration_doc(spec, "my-model", "99", "http://obol.stack:8080") + self.assertIn("provenance", doc) + self.assertEqual(doc["provenance"]["framework"], "autoresearch") + self.assertEqual(doc["provenance"]["metricValue"], "0.9973") + self.assertEqual(doc["provenance"]["trainHash"], "sha256:deadbeef") + self.assertEqual(doc["provenance"]["paramCount"], "50000000") + def test_build_indexed_metadata_includes_registration_metadata(self): mod = load_monetize_module() spec = {