From 37a7274b62619f9e75979596e907a34df80eae65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= Date: Tue, 17 Mar 2026 00:36:10 +0000 Subject: [PATCH 01/11] Delete duplicate openclaw. stop tunnelling ui to public internet. make sale UI. needs frontend pr too. --- cmd/obol/main.go | 7 + cmd/obol/sell.go | 205 +++++++++++++- internal/agent/agent.go | 57 +--- .../templates/obol-agent-monetize-rbac.yaml | 12 +- internal/embed/infrastructure/helmfile.yaml | 6 + internal/inference/gateway.go | 11 +- internal/inference/store.go | 4 + .../openclaw/monetize_integration_test.go | 11 +- internal/openclaw/openclaw.go | 6 +- internal/stack/stack.go | 28 +- internal/tunnel/agent.go | 2 +- internal/tunnel/tunnel.go | 251 +++++++++++++++++- internal/x402/bdd_integration_test.go | 6 +- internal/x402/setup.go | 2 +- 14 files changed, 526 insertions(+), 82 deletions(-) diff --git a/cmd/obol/main.go b/cmd/obol/main.go index af92301a..e8d80385 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -278,6 +278,13 @@ GLOBAL OPTIONS:{{template "visibleFlagTemplate" .}}{{end}} return tunnel.Restart(cfg, getUI(cmd)) }, }, + { + Name: "stop", + Usage: "Stop the tunnel (scale cloudflared to 0 replicas)", + Action: func(ctx context.Context, cmd *cli.Command) error { + return tunnel.Stop(cfg, getUI(cmd)) + }, + }, { Name: "logs", Usage: "View cloudflared logs", diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 1e99ebc6..54452d66 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -5,8 +5,10 @@ import ( "encoding/hex" "encoding/json" "fmt" + "net" "os" "os/signal" + "runtime" "strings" "syscall" @@ -210,6 +212,63 @@ Examples: return err } + // If a cluster is available, route through the cluster's x402 flow + // (tunnel → Traefik → x402-verifier → host gateway → Ollama). + // The gateway's built-in x402 is disabled to avoid double-gating. + kubeconfigPath := fmt.Sprintf("%s/kubeconfig.yaml", cfg.ConfigDir) + clusterAvailable := false + if _, statErr := os.Stat(kubeconfigPath); statErr == nil { + clusterAvailable = true + } + + if clusterAvailable { + d.NoPaymentGate = true + + // Resolve the gateway port from the listen address. + listenAddr := d.ListenAddr + port := "8402" + if idx := strings.LastIndex(listenAddr, ":"); idx >= 0 { + port = listenAddr[idx+1:] + } + + // Create a K8s Service + Endpoints pointing to the host. + svcNs := "llm" // co-locate with LiteLLM for simplicity + if err := createHostService(cfg, name, svcNs, port); err != nil { + fmt.Printf("Warning: could not create cluster service: %v\n", err) + fmt.Println("Falling back to standalone mode with built-in x402 payment gate.") + d.NoPaymentGate = false + } else { + // Create a ServiceOffer CR pointing at the host service. + soSpec := buildInferenceServiceOfferSpec(d, priceTable, svcNs, port) + soManifest := map[string]interface{}{ + "apiVersion": "obol.org/v1alpha1", + "kind": "ServiceOffer", + "metadata": map[string]interface{}{ + "name": name, + "namespace": svcNs, + }, + "spec": soSpec, + } + if err := kubectlApply(cfg, soManifest); err != nil { + fmt.Printf("Warning: could not create ServiceOffer: %v\n", err) + d.NoPaymentGate = false + } else { + fmt.Printf("ServiceOffer %s/%s created (type: inference, routed via cluster)\n", svcNs, name) + + // Ensure tunnel is active. + u := getUI(cmd) + u.Blank() + u.Info("Ensuring tunnel is active for public access...") + if tunnelURL, tErr := tunnel.EnsureTunnelForSell(cfg, u); tErr != nil { + u.Warnf("Tunnel not started: %v", tErr) + u.Dim(" Start manually with: obol tunnel restart") + } else { + u.Successf("Tunnel active: %s", tunnelURL) + } + } + } + } + return runInferenceGateway(d, chain) }, } @@ -397,6 +456,17 @@ Examples: } fmt.Printf("The agent will reconcile: health-check → payment gate → route\n") fmt.Printf("Check status: obol sell status %s -n %s\n", name, ns) + + // Ensure tunnel is active for public access. + u := getUI(cmd) + u.Blank() + u.Info("Ensuring tunnel is active for public access...") + if tunnelURL, err := tunnel.EnsureTunnelForSell(cfg, u); err != nil { + u.Warnf("Tunnel not started: %v", err) + u.Dim(" Start manually with: obol tunnel restart") + } else { + u.Successf("Tunnel active: %s", tunnelURL) + } return nil }, } @@ -621,7 +691,24 @@ func sellDeleteCommand(cfg *config.Config) *cli.Command { } } - return kubectlRun(cfg, "delete", "serviceoffers.obol.org", name, "-n", ns) + if err := kubectlRun(cfg, "delete", "serviceoffers.obol.org", name, "-n", ns); err != nil { + return err + } + + // Auto-stop quick tunnel when no ServiceOffers remain. + remaining, listErr := kubectlOutput(cfg, "get", "serviceoffers.obol.org", "-A", + "-o", "jsonpath={.items}") + if listErr == nil && (remaining == "[]" || strings.TrimSpace(remaining) == "") { + st, _ := tunnel.LoadTunnelState(cfg) + if st == nil || st.Mode != "dns" { + u := getUI(cmd) + u.Blank() + u.Info("No ServiceOffers remaining. Stopping quick tunnel.") + _ = tunnel.Stop(cfg, u) + _ = tunnel.DeleteStorefront(cfg) + } + } + return nil }, } } @@ -791,6 +878,7 @@ func runInferenceGateway(d *inference.Deployment, chain x402.ChainConfig) error VMHostPort: d.VMHostPort, TEEType: d.TEEType, ModelHash: d.ModelHash, + NoPaymentGate: d.NoPaymentGate, }) if err != nil { return fmt.Errorf("failed to create gateway: %w", err) @@ -1024,6 +1112,121 @@ func formatInferencePriceSummary(d *inference.Deployment) string { return fmt.Sprintf("%s USDC/request", d.PricePerRequest) } +// createHostService creates a headless Service + Endpoints in the cluster +// pointing to the Docker host IP on the given port, so that the cluster can +// route traffic to a host-side inference gateway. +// +// Kubernetes Endpoints require an IP address, not a hostname. We resolve the +// host IP using the same strategy as ollamaHostIPForBackend in internal/stack. +func createHostService(cfg *config.Config, name, ns, port string) error { + hostIP, err := resolveHostIP() + if err != nil { + return fmt.Errorf("cannot resolve host IP for cluster routing: %w", err) + } + + svc := map[string]interface{}{ + "apiVersion": "v1", + "kind": "Service", + "metadata": map[string]interface{}{ + "name": name, + "namespace": ns, + }, + "spec": map[string]interface{}{ + "ports": []map[string]interface{}{ + {"port": 8402, "targetPort": 8402, "protocol": "TCP"}, + }, + }, + } + ep := map[string]interface{}{ + "apiVersion": "v1", + "kind": "Endpoints", + "metadata": map[string]interface{}{ + "name": name, + "namespace": ns, + }, + "subsets": []map[string]interface{}{ + { + "addresses": []map[string]interface{}{ + {"ip": hostIP}, + }, + "ports": []map[string]interface{}{ + {"port": 8402, "protocol": "TCP"}, + }, + }, + }, + } + + if err := kubectlApply(cfg, svc); err != nil { + return fmt.Errorf("failed to create service: %w", err) + } + if err := kubectlApply(cfg, ep); err != nil { + return fmt.Errorf("failed to create endpoints: %w", err) + } + return nil +} + +// resolveHostIP returns the Docker host IP reachable from k3d containers. +// Same resolution strategy as stack.ollamaHostIPForBackend. +func resolveHostIP() (string, error) { + // Try DNS resolution of host.docker.internal (macOS) or host.k3d.internal (Linux). + for _, host := range []string{"host.docker.internal", "host.k3d.internal"} { + if addrs, err := net.LookupHost(host); err == nil && len(addrs) > 0 { + return addrs[0], nil + } + } + // macOS Docker Desktop fallback: well-known VM gateway. + if runtime.GOOS == "darwin" { + return "192.168.65.254", nil + } + // Linux fallback: docker0 bridge IP. + if iface, err := net.InterfaceByName("docker0"); err == nil { + if addrs, err := iface.Addrs(); err == nil { + for _, addr := range addrs { + if ipNet, ok := addr.(*net.IPNet); ok && ipNet.IP.To4() != nil { + return ipNet.IP.String(), nil + } + } + } + } + return "", fmt.Errorf("cannot determine Docker host IP; ensure Docker is running") +} + +// buildInferenceServiceOfferSpec builds a ServiceOffer spec for a host-side +// inference gateway routed through the cluster's x402 flow. +func buildInferenceServiceOfferSpec(d *inference.Deployment, pt schemas.PriceTable, ns, port string) map[string]interface{} { + spec := map[string]interface{}{ + "type": "inference", + "upstream": map[string]interface{}{ + "service": d.Name, + "namespace": ns, + "port": 8402, + "healthPath": "/health", + }, + "payment": map[string]interface{}{ + "scheme": "exact", + "network": d.Chain, + "payTo": d.WalletAddress, + "price": map[string]interface{}{}, + }, + "path": fmt.Sprintf("/services/%s", d.Name), + } + + price := spec["payment"].(map[string]interface{})["price"].(map[string]interface{}) + if pt.PerMTok != "" { + price["perMTok"] = pt.PerMTok + } else { + price["perRequest"] = d.PricePerRequest + } + + if d.UpstreamURL != "" { + spec["model"] = map[string]interface{}{ + "name": "ollama", + "runtime": "ollama", + } + } + return spec +} + // removePricingRoute removes the x402-verifier pricing route for the given offer. func removePricingRoute(cfg *config.Config, name string) { urlPath := fmt.Sprintf("/services/%s", name) diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 01d6ca0c..63f8ae94 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -8,49 +8,19 @@ import ( "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/kubectl" - "github.com/ObolNetwork/obol-stack/internal/openclaw" "github.com/ObolNetwork/obol-stack/internal/ui" ) -const agentID = "obol-agent" +// DefaultInstanceID is the canonical OpenClaw instance that runs both +// user-facing inference and agent-mode monetize/heartbeat reconciliation. +const DefaultInstanceID = "default" -// Init sets up the singleton obol-agent OpenClaw instance. -// It enforces a single agent by using a fixed deployment ID. -// After onboarding, it patches the monetize RBAC bindings -// to grant the agent's ServiceAccount monetization permissions, -// and injects HEARTBEAT.md to drive periodic reconciliation. +// Init patches the default OpenClaw instance with agent capabilities: +// monetize RBAC bindings and HEARTBEAT.md for periodic reconciliation. +// The actual OpenClaw deployment is created by openclaw.SetupDefault() +// during `obol stack up`; Init() adds the agent superpowers on top. func Init(cfg *config.Config, u *ui.UI) error { - // Check if obol-agent already exists. - instances, err := openclaw.ListInstanceIDs(cfg) - if err != nil { - return fmt.Errorf("failed to list OpenClaw instances: %w", err) - } - - exists := false - for _, id := range instances { - if id == agentID { - exists = true - break - } - } - - opts := openclaw.OnboardOptions{ - ID: agentID, - Sync: true, - Interactive: true, - AgentMode: true, - } - - if exists { - u.Warn("obol-agent already exists, re-syncing...") - opts.Force = true - } - - if err := openclaw.Onboard(cfg, opts, u); err != nil { - return fmt.Errorf("failed to onboard obol-agent: %w", err) - } - - // Patch ClusterRoleBinding to add the agent's ServiceAccount. + // Patch ClusterRoleBinding to add the default instance's ServiceAccount. if err := patchMonetizeBinding(cfg, u); err != nil { return fmt.Errorf("failed to patch ClusterRoleBinding: %w", err) } @@ -60,12 +30,11 @@ func Init(cfg *config.Config, u *ui.UI) error { return fmt.Errorf("failed to inject HEARTBEAT.md: %w", err) } - u.Print("") - u.Success("Agent initialized. To reconfigure, you can safely re-run: obol agent init") + u.Success("Agent capabilities applied to default OpenClaw instance") return nil } -// patchMonetizeBinding adds the obol-agent's OpenClaw ServiceAccount +// patchMonetizeBinding adds the default OpenClaw instance's ServiceAccount // as a subject on the monetize ClusterRoleBindings and x402 RoleBinding. // // ClusterRoleBindings patched: @@ -74,7 +43,7 @@ func Init(cfg *config.Config, u *ui.UI) error { // RoleBindings patched: // openclaw-x402-pricing-binding (x402 namespace, pricing ConfigMap) func patchMonetizeBinding(cfg *config.Config, u *ui.UI) error { - namespace := fmt.Sprintf("openclaw-%s", agentID) + namespace := fmt.Sprintf("openclaw-%s", DefaultInstanceID) subject := []map[string]interface{}{ { @@ -128,13 +97,13 @@ func patchMonetizeBinding(cfg *config.Config, u *ui.UI) error { return nil } -// injectHeartbeatFile writes HEARTBEAT.md to the obol-agent's workspace path +// injectHeartbeatFile writes HEARTBEAT.md to the default instance's workspace // so OpenClaw runs monetize.py reconciliation on every heartbeat cycle. // OpenClaw reads HEARTBEAT.md from the agent workspace directory // (resolveAgentWorkspaceDir → /data/.openclaw/workspace/HEARTBEAT.md), // NOT the root .openclaw directory. func injectHeartbeatFile(cfg *config.Config, u *ui.UI) error { - namespace := fmt.Sprintf("openclaw-%s", agentID) + namespace := fmt.Sprintf("openclaw-%s", DefaultInstanceID) heartbeatDir := filepath.Join(cfg.DataDir, namespace, "openclaw-data", ".openclaw", "workspace") if err := os.MkdirAll(heartbeatDir, 0755); err != nil { diff --git a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml index 95658d46..72d7fc38 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml @@ -6,8 +6,8 @@ # 2. openclaw-monetize-workload — cluster-wide mutate for agent-managed resources # 3. openclaw-x402-pricing — namespace-scoped x402 pricing ConfigMap access # -# Subjects pre-populated with obol-agent ServiceAccount. -# Patched dynamically by `obol agent init` for additional instances. +# Subjects pre-populated with default OpenClaw instance ServiceAccount. +# Patched dynamically by `obol agent init` if needed. #------------------------------------------------------------------------------ # ClusterRole - Read-only permissions (low privilege, cluster-wide) @@ -98,7 +98,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-obol-agent + namespace: openclaw-default --- #------------------------------------------------------------------------------ @@ -115,7 +115,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-obol-agent + namespace: openclaw-default --- @@ -151,7 +151,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-obol-agent + namespace: openclaw-default --- #------------------------------------------------------------------------------ @@ -186,4 +186,4 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-obol-agent + namespace: openclaw-default diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 7363150d..7eab9ee7 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -174,6 +174,8 @@ releases: name: erpc namespace: erpc spec: + hostnames: + - "obol.stack" parentRefs: - name: traefik-gateway namespace: traefik @@ -286,6 +288,10 @@ releases: - apiGroups: [""] resources: ["pods", "configmaps", "secrets"] verbs: ["get", "list"] + # ServiceOffer CRD — frontend sell modal creates offers + - apiGroups: ["obol.org"] + resources: ["serviceoffers", "serviceoffers/status"] + verbs: ["get", "list", "create", "update", "patch", "delete"] - apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: diff --git a/internal/inference/gateway.go b/internal/inference/gateway.go index 9250ae36..a9bc65eb 100644 --- a/internal/inference/gateway.go +++ b/internal/inference/gateway.go @@ -93,6 +93,12 @@ type GatewayConfig struct { // Required when TEEType is set. Bound into the TEE attestation user_data // so verifiers can confirm the model identity. ModelHash string + + // NoPaymentGate disables the built-in x402 payment middleware. Use this + // when the gateway runs behind the cluster's x402 verifier (via Traefik + // ForwardAuth) to avoid double-gating requests. Enclave/TEE encryption + // middleware remains active when enabled. + NoPaymentGate bool } // Gateway is an x402-enabled reverse proxy for LLM inference with optional @@ -212,7 +218,10 @@ func (g *Gateway) buildHandler(upstreamURL string) (http.Handler, error) { if em != nil { h = em.wrap(h) } - return paymentMiddleware(h) + if !g.config.NoPaymentGate { + h = paymentMiddleware(h) + } + return h } // Build HTTP mux. diff --git a/internal/inference/store.go b/internal/inference/store.go index 2a48784e..46d38673 100644 --- a/internal/inference/store.go +++ b/internal/inference/store.go @@ -85,6 +85,10 @@ type Deployment struct { // Required when TEEType is set. Bound into the TEE attestation user_data. ModelHash string `json:"model_hash,omitempty"` + // NoPaymentGate disables the built-in x402 payment middleware when the + // gateway is routed through the cluster's x402 verifier via Traefik. + NoPaymentGate bool `json:"no_payment_gate,omitempty"` + // CreatedAt is the RFC3339 timestamp of when this deployment was created. CreatedAt string `json:"created_at"` diff --git a/internal/openclaw/monetize_integration_test.go b/internal/openclaw/monetize_integration_test.go index 2d818dc7..5d26e81b 100644 --- a/internal/openclaw/monetize_integration_test.go +++ b/internal/openclaw/monetize_integration_test.go @@ -300,12 +300,11 @@ func TestIntegration_CRD_Delete(t *testing.T) { // ───────────────────────────────────────────────────────────────────────────── // agentNamespace returns the namespace of the OpenClaw instance that has -// monetize RBAC. Prefers "openclaw-obol-agent" (set up by `obol agent init`) -// over other instances, because only that SA gets the ClusterRoleBinding. +// monetize RBAC. This is always the "default" instance ("openclaw-default"). func agentNamespace(cfg *config.Config) string { out, err := obolRunErr(cfg, "openclaw", "list") if err != nil { - return "openclaw-obol-agent" + return "openclaw-default" } // Collect all namespaces from output. var namespaces []string @@ -318,16 +317,16 @@ func agentNamespace(cfg *config.Config) string { } } } - // Prefer obol-agent (has RBAC from `obol agent init`). + // Prefer default (has RBAC from `obol agent init`). for _, ns := range namespaces { - if ns == "openclaw-obol-agent" { + if ns == "openclaw-default" { return ns } } if len(namespaces) > 0 { return namespaces[0] } - return "openclaw-obol-agent" + return "openclaw-default" } // requireAgent skips the test if no OpenClaw instance is deployed. diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 3f514216..96db3382 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -66,7 +66,9 @@ type OnboardOptions struct { OllamaModels []string // Available Ollama models detected on host (nil = not queried) } -// SetupDefault deploys a default OpenClaw instance as part of stack setup. +// SetupDefault deploys the default OpenClaw instance as part of stack setup. +// This is the single canonical instance that handles both user-facing inference +// and agent-mode monetize/heartbeat reconciliation. // It is idempotent: if a "default" deployment already exists, it re-syncs. // When Ollama is not detected on the host and no existing ~/.openclaw config // is found, it skips provider setup gracefully so the user can configure @@ -81,6 +83,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error { ID: "default", Sync: true, IsDefault: true, + AgentMode: true, }, u) } @@ -117,6 +120,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error { ID: "default", Sync: true, IsDefault: true, + AgentMode: true, OllamaModels: ollamaModels, }, u) } diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 0c39971e..e5046c90 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -451,24 +451,30 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s u.Dim(" You can manually set up OpenClaw later with: obol openclaw onboard") } - // Deploy the obol-agent singleton (monetize reconciliation, heartbeat). + // Apply agent capabilities (RBAC + heartbeat) to the default instance. // Non-fatal: the user can always run `obol agent init` later. u.Blank() - u.Info("Deploying obol-agent") + u.Info("Applying agent capabilities") if err := agent.Init(cfg, u); err != nil { - u.Warnf("Failed to deploy obol-agent: %v", err) - u.Dim(" You can manually deploy later with: obol agent init") + u.Warnf("Failed to apply agent capabilities: %v", err) + u.Dim(" You can manually apply later with: obol agent init") } - // Start the Cloudflare tunnel so the stack is publicly accessible. - // Non-fatal: the user can start it later with `obol tunnel restart`. + // Start the Cloudflare tunnel only if a persistent DNS tunnel is provisioned. + // Quick tunnels are dormant by default and activate on first `obol sell`. u.Blank() - u.Info("Starting Cloudflare tunnel") - if tunnelURL, err := tunnel.EnsureRunning(cfg, u); err != nil { - u.Warnf("Tunnel not started: %v", err) - u.Dim(" Start manually with: obol tunnel restart") + if st, _ := tunnel.LoadTunnelState(cfg); st != nil && st.Mode == "dns" && st.Hostname != "" { + u.Info("Starting persistent Cloudflare tunnel") + if tunnelURL, err := tunnel.EnsureRunning(cfg, u); err != nil { + u.Warnf("Tunnel not started: %v", err) + u.Dim(" Start manually with: obol tunnel restart") + } else { + u.Successf("Tunnel active: %s", tunnelURL) + } } else { - u.Successf("Tunnel active: %s", tunnelURL) + u.Dim("Tunnel dormant (activates on first 'obol sell http')") + u.Dim(" Start manually with: obol tunnel restart") + u.Dim(" For a persistent URL: obol tunnel login --hostname stack.example.com") } return nil diff --git a/internal/tunnel/agent.go b/internal/tunnel/agent.go index 3656ea7d..03b90d1f 100644 --- a/internal/tunnel/agent.go +++ b/internal/tunnel/agent.go @@ -10,7 +10,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/config" ) -const agentDeploymentID = "obol-agent" +const agentDeploymentID = "default" // SyncAgentBaseURL patches AGENT_BASE_URL in the obol-agent's values-obol.yaml // and runs helmfile sync to apply the change. It is a no-op if the obol-agent diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go index d82eac9d..02fefe94 100644 --- a/internal/tunnel/tunnel.go +++ b/internal/tunnel/tunnel.go @@ -1,7 +1,9 @@ package tunnel import ( + "encoding/json" "fmt" + "net/url" "os" "os/exec" "path/filepath" @@ -39,12 +41,12 @@ func Status(cfg *config.Config, u *ui.UI) error { if err != nil { mode, url := tunnelModeAndURL(st) if mode == "quick" { - // No tunnel credentials configured — tunnel is dormant by design. - printStatusBox(u, "disabled", "not running", "(no tunnel configured)", time.Now()) + // Quick tunnel is dormant — activates on first `obol sell`. + printStatusBox(u, "quick", "dormant", "(activates on 'obol sell')", time.Now()) u.Blank() - u.Print("To expose your stack publicly, set up a tunnel:") - u.Print(" obol tunnel login --hostname stack.example.com") - u.Print(" obol tunnel provision --hostname stack.example.com --account-id ... --zone-id ... --api-token ...") + u.Print("The tunnel will start automatically when you sell a service.") + u.Print(" Start manually: obol tunnel restart") + u.Print(" Persistent URL: obol tunnel login --hostname stack.example.com") return nil } printStatusBox(u, mode, "not running", url, time.Now()) @@ -91,7 +93,7 @@ func Status(cfg *config.Config, u *ui.UI) error { return nil } -// InjectBaseURL sets AGENT_BASE_URL on the obol-agent deployment so that +// InjectBaseURL sets AGENT_BASE_URL on the default OpenClaw deployment so that // monetize.py uses the tunnel URL in registration JSON. func InjectBaseURL(cfg *config.Config, tunnelURL string) error { kubectlPath := filepath.Join(cfg.BinDir, "kubectl") @@ -100,7 +102,7 @@ func InjectBaseURL(cfg *config.Config, tunnelURL string) error { cmd := exec.Command(kubectlPath, "--kubeconfig", kubeconfigPath, "set", "env", "deployment/openclaw", - "-n", "openclaw-obol-agent", + "-n", "openclaw-default", fmt.Sprintf("AGENT_BASE_URL=%s", strings.TrimRight(tunnelURL, "/")), ) return cmd.Run() @@ -329,6 +331,241 @@ data: return nil } +// EnsureTunnelForSell ensures the tunnel is running and propagates the URL to +// all downstream consumers (obol-agent env, frontend ConfigMap, agent overlay). +// It also creates a storefront landing page at the tunnel hostname. +func EnsureTunnelForSell(cfg *config.Config, u *ui.UI) (string, error) { + tunnelURL, err := EnsureRunning(cfg, u) + if err != nil { + return "", err + } + // EnsureRunning already calls InjectBaseURL + SyncTunnelConfigMap. + // Also sync the agent overlay for helmfile consistency. + if err := SyncAgentBaseURL(cfg, tunnelURL); err != nil { + u.Warnf("could not sync AGENT_BASE_URL to obol-agent overlay: %v", err) + } + // Create the storefront landing page for the tunnel hostname. + if err := CreateStorefront(cfg, tunnelURL); err != nil { + u.Warnf("could not create storefront: %v", err) + } + return tunnelURL, nil +} + +// Stop scales the cloudflared deployment to 0 replicas. +func Stop(cfg *config.Config, u *ui.UI) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return nil // stack not running, nothing to stop + } + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "scale", "deployment/cloudflared", + "-n", tunnelNamespace, + "--replicas=0", + ) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to scale cloudflared to 0: %w: %s", err, strings.TrimSpace(string(out))) + } + u.Success("Tunnel stopped") + return nil +} + +// storefrontNamespace is where the storefront landing page resources live. +const storefrontNamespace = "traefik" + +// CreateStorefront creates (or updates) a simple HTML landing page served at +// the tunnel hostname's root path. This uses the same busybox-httpd + ConfigMap +// pattern as the .well-known registration in monetize.py. +func CreateStorefront(cfg *config.Config, tunnelURL string) error { + parsed, err := url.Parse(tunnelURL) + if err != nil { + return fmt.Errorf("invalid tunnel URL: %w", err) + } + hostname := parsed.Hostname() + + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + html := fmt.Sprintf(` + + + + + Obol Stack + + + +

Obol Stack

+

This node sells services via x402 micropayments.

+
+

Available Services

+

See the machine-readable catalog: /skill.md

+

Agent registration: /.well-known/agent-registration.json

+
+ +`, tunnelURL, tunnelURL) + + // Build the resources as a multi-document YAML manifest. + resources := []map[string]interface{}{ + // ConfigMap with HTML content + httpd mime config. + { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": map[string]interface{}{ + "name": "tunnel-storefront", + "namespace": storefrontNamespace, + }, + "data": map[string]string{ + "index.html": html, + "httpd.conf": "", + "mime.types": "text/html\thtml htm\n", + }, + }, + // Deployment: busybox httpd serving the ConfigMap. + { + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": map[string]interface{}{ + "name": "tunnel-storefront", + "namespace": storefrontNamespace, + }, + "spec": map[string]interface{}{ + "replicas": 1, + "selector": map[string]interface{}{ + "matchLabels": map[string]string{"app": "tunnel-storefront"}, + }, + "template": map[string]interface{}{ + "metadata": map[string]interface{}{ + "labels": map[string]string{"app": "tunnel-storefront"}, + }, + "spec": map[string]interface{}{ + "containers": []map[string]interface{}{ + { + "name": "httpd", + "image": "busybox:1.37", + "command": []string{"httpd", "-f", "-p", "8080", "-h", "/www"}, + "ports": []map[string]interface{}{ + {"containerPort": 8080}, + }, + "volumeMounts": []map[string]interface{}{ + {"name": "html", "mountPath": "/www"}, + }, + "resources": map[string]interface{}{ + "requests": map[string]string{"cpu": "5m", "memory": "8Mi"}, + "limits": map[string]string{"cpu": "20m", "memory": "16Mi"}, + }, + }, + }, + "volumes": []map[string]interface{}{ + { + "name": "html", + "configMap": map[string]interface{}{ + "name": "tunnel-storefront", + }, + }, + }, + }, + }, + }, + }, + // Service + { + "apiVersion": "v1", + "kind": "Service", + "metadata": map[string]interface{}{ + "name": "tunnel-storefront", + "namespace": storefrontNamespace, + }, + "spec": map[string]interface{}{ + "selector": map[string]string{"app": "tunnel-storefront"}, + "ports": []map[string]interface{}{ + {"port": 8080, "targetPort": 8080}, + }, + }, + }, + // HTTPRoute: tunnel hostname → storefront (more specific than frontend catch-all). + { + "apiVersion": "gateway.networking.k8s.io/v1", + "kind": "HTTPRoute", + "metadata": map[string]interface{}{ + "name": "tunnel-storefront", + "namespace": storefrontNamespace, + }, + "spec": map[string]interface{}{ + "hostnames": []string{hostname}, + "parentRefs": []map[string]interface{}{ + { + "name": "traefik-gateway", + "namespace": "traefik", + "sectionName": "web", + }, + }, + "rules": []map[string]interface{}{ + { + "matches": []map[string]interface{}{ + {"path": map[string]string{"type": "PathPrefix", "value": "/"}}, + }, + "backendRefs": []map[string]interface{}{ + { + "name": "tunnel-storefront", + "port": 8080, + }, + }, + }, + }, + }, + }, + } + + // Apply each resource via kubectl apply. + for _, res := range resources { + data, err := json.Marshal(res) + if err != nil { + return fmt.Errorf("failed to marshal resource: %w", err) + } + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "apply", "-f", "-", + ) + cmd.Stdin = strings.NewReader(string(data)) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to apply storefront resource: %w: %s", err, strings.TrimSpace(string(out))) + } + } + return nil +} + +// DeleteStorefront removes the storefront landing page resources. +func DeleteStorefront(cfg *config.Config) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return nil + } + + for _, resource := range []string{ + "httproute/tunnel-storefront", + "service/tunnel-storefront", + "deployment/tunnel-storefront", + "configmap/tunnel-storefront", + } { + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "delete", resource, + "-n", storefrontNamespace, + "--ignore-not-found", + ) + _ = cmd.Run() // best-effort cleanup + } + return nil +} + func parseQuickTunnelURL(logs string) (string, bool) { // Quick tunnel logs print a random *.trycloudflare.com URL. re := regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) diff --git a/internal/x402/bdd_integration_test.go b/internal/x402/bdd_integration_test.go index 8a41ae37..b17190f5 100644 --- a/internal/x402/bdd_integration_test.go +++ b/internal/x402/bdd_integration_test.go @@ -162,7 +162,7 @@ func TestMain(m *testing.M) { // Wait for the obol-agent pod to be Running. log.Println(" Waiting for obol-agent pod...") - if err := waitForAnyPod(kubectlBin, kubeconfigPath, "openclaw-obol-agent", + if err := waitForAnyPod(kubectlBin, kubeconfigPath, "openclaw-default", []string{"app=openclaw", "app.kubernetes.io/name=openclaw"}, 300*time.Second); err != nil { teardown(obolBin) log.Fatalf("obol-agent not ready: %v", err) @@ -299,7 +299,7 @@ func ensureExistingClusterBootstrap(obolBin, kubectlBin, kubeconfig string) erro if err := waitForPod(kubectlBin, kubeconfig, "x402", "app=x402-verifier", 120*time.Second); err != nil { return fmt.Errorf("x402-verifier not ready: %w", err) } - if err := waitForAnyPod(kubectlBin, kubeconfig, "openclaw-obol-agent", + if err := waitForAnyPod(kubectlBin, kubeconfig, "openclaw-default", []string{"app=openclaw", "app.kubernetes.io/name=openclaw"}, 180*time.Second); err != nil { return fmt.Errorf("obol-agent not ready: %w", err) } @@ -393,7 +393,7 @@ func waitForServiceOfferReady(kubectlBin, kubeconfig, name, namespace string, ti // This simulates the heartbeat cron firing. func triggerReconciliation(kubectlBin, kubeconfig string) { out, err := kubectl.Output(kubectlBin, kubeconfig, - "exec", "-i", "-n", "openclaw-obol-agent", "deploy/openclaw", "-c", "openclaw", + "exec", "-i", "-n", "openclaw-default", "deploy/openclaw", "-c", "openclaw", "--", "python3", "/data/.openclaw/skills/sell/scripts/monetize.py", "process", "--all") if err != nil { log.Printf(" manual reconciliation error: %v\n%s", err, out) diff --git a/internal/x402/setup.go b/internal/x402/setup.go index 6bc77696..b9d644e0 100644 --- a/internal/x402/setup.go +++ b/internal/x402/setup.go @@ -169,7 +169,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-obol-agent + namespace: openclaw-default `) // EnsureVerifier deploys the x402 verifier subsystem if it doesn't exist. From 3a09375c1546f7e027229db37e76ca42f117cd20 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 11:12:24 +0800 Subject: [PATCH 02/11] security: restrict frontend HTTPRoute to local-only via hostname binding The frontend catch-all `/` HTTPRoute had no hostname restriction, meaning the entire UI (dashboard, sell modal, settings) was publicly accessible through the Cloudflare tunnel. Add `hostnames: ["obol.stack"]` to match the eRPC route pattern already in this branch. Also add CLAUDE.md guardrails documenting the local-only vs public route split and explicit NEVER rules to prevent future regressions. --- CLAUDE.md | 18 +++++++++++++++++- internal/embed/infrastructure/helmfile.yaml | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index a3b81238..1de55441 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,7 +37,7 @@ Integration tests use `//go:build integration` and skip gracefully when prerequi **Design**: Deployment-centric (unique namespaces via petnames), local-first (k3d), XDG-compliant, two-stage templating (CLI flags → Go templates → Helmfile → K8s). -**Routing**: Traefik + Kubernetes Gateway API. GatewayClass `traefik`, Gateway `traefik-gateway` in `traefik` ns. Routes: `/` → frontend, `/rpc` → eRPC, `/services//*` → x402 ForwardAuth → upstream, `/.well-known/agent-registration.json` → ERC-8004 httpd, `/ethereum-/execution|beacon`. +**Routing**: Traefik + Kubernetes Gateway API. GatewayClass `traefik`, Gateway `traefik-gateway` in `traefik` ns. Local-only routes (restricted to `hostnames: ["obol.stack"]`): `/` → frontend, `/rpc` → eRPC. Public routes (accessible via tunnel, no hostname restriction): `/services//*` → x402 ForwardAuth → upstream, `/.well-known/agent-registration.json` → ERC-8004 httpd, `/skill.md` → service catalog. Tunnel hostname gets a storefront landing page at `/`. NEVER remove hostname restrictions from frontend or eRPC HTTPRoutes — exposing the frontend/RPC to the public internet is a critical security flaw. **Config**: `Config{ConfigDir, DataDir, BinDir}`. Precedence: `OBOL_CONFIG_DIR` > `XDG_CONFIG_HOME/obol` > `~/.config/obol`. `OBOL_DEVELOPMENT=true` → `.workspace/` dirs. All K8s tools auto-set `KUBECONFIG=$OBOL_CONFIG_DIR/kubeconfig.yaml`. @@ -156,6 +156,22 @@ Skills = SKILL.md + optional scripts/references, embedded in `obol` binary (`int 4. **ExternalName services** — don't work with Traefik Gateway API, use ClusterIP + Endpoints 5. **eRPC `eth_call` cache** — default TTL is 10s for unfinalized reads, so `buy.py balance` can lag behind an already-settled paid request for a few seconds +### Security: Tunnel Exposure + +The Cloudflare tunnel exposes the cluster to the public internet. Only x402-gated endpoints and discovery metadata should be reachable via the tunnel hostname. Internal services (frontend, eRPC, LiteLLM, monitoring) MUST have `hostnames: ["obol.stack"]` on their HTTPRoutes to restrict them to local access. + +**NEVER**: +- Remove `hostnames` restrictions from frontend or eRPC HTTPRoutes +- Create HTTPRoutes without `hostnames` for internal services +- Expose the frontend UI, Prometheus/monitoring, or LiteLLM admin to the tunnel +- Run `obol stack down` or `obol stack purge` unless explicitly asked + +**Public routes** (no hostname restriction, intentional): +- `/services/*` — x402 payment-gated, safe by design +- `/.well-known/agent-registration.json` — ERC-8004 discovery +- `/skill.md` — machine-readable service catalog +- `/` on tunnel hostname — static storefront landing page (busybox httpd) + ## Key Packages | Package | Key Files | Role | diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 7eab9ee7..05ff5f27 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -253,6 +253,8 @@ releases: name: obol-frontend namespace: obol-frontend spec: + hostnames: + - "obol.stack" parentRefs: - name: traefik-gateway namespace: traefik From fa0779198caaf9d47e413eb282ef34b8dc2adc5c Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 14:48:02 +0800 Subject: [PATCH 03/11] fix: address security and routing issues from PR #267 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four fixes for the sell-inference cluster routing introduced in #267: 1. Security: bind gateway to 127.0.0.1 when NoPaymentGate=true so only cluster traffic (via K8s Service+Endpoints bridge) can reach the unpaid listener — no host/LAN exposure. 2. Critical: use parsed --listen port in Service, Endpoints, and ServiceOffer spec instead of hardcoded 8402. Non-default ports now work correctly. 3. k3s support: resolveHostIP() now checks DetectExistingBackend() for k3s and returns 127.0.0.1, matching the existing ollamaHostIPForBackend() strategy in internal/stack. 4. Migration: keep "obol-agent" as default instance ID to preserve existing openclaw-obol-agent namespaces on upgrade. Avoids orphaned deployments when upgrading from pre-#267 installs. Also bumps frontend to v0.1.13-rc.1. --- cmd/obol/sell.go | 34 ++++++++++++++----- internal/agent/agent.go | 2 +- .../values/obol-frontend.yaml.gotmpl | 2 +- internal/openclaw/openclaw.go | 8 ++--- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/cmd/obol/sell.go b/cmd/obol/sell.go index 54452d66..36978a82 100644 --- a/cmd/obol/sell.go +++ b/cmd/obol/sell.go @@ -9,6 +9,7 @@ import ( "os" "os/signal" "runtime" + "strconv" "strings" "syscall" @@ -18,6 +19,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/inference" "github.com/ObolNetwork/obol-stack/internal/kubectl" "github.com/ObolNetwork/obol-stack/internal/schemas" + "github.com/ObolNetwork/obol-stack/internal/stack" "github.com/ObolNetwork/obol-stack/internal/tee" "github.com/ObolNetwork/obol-stack/internal/tunnel" x402verifier "github.com/ObolNetwork/obol-stack/internal/x402" @@ -231,6 +233,11 @@ Examples: port = listenAddr[idx+1:] } + // Bind to loopback only — the cluster reaches us via the + // K8s Service+Endpoints bridge; there is no reason to expose + // the unpaid gateway on all interfaces. + d.ListenAddr = "127.0.0.1:" + port + // Create a K8s Service + Endpoints pointing to the host. svcNs := "llm" // co-locate with LiteLLM for simplicity if err := createHostService(cfg, name, svcNs, port); err != nil { @@ -1119,11 +1126,13 @@ func formatInferencePriceSummary(d *inference.Deployment) string { // Kubernetes Endpoints require an IP address, not a hostname. We resolve the // host IP using the same strategy as ollamaHostIPForBackend in internal/stack. func createHostService(cfg *config.Config, name, ns, port string) error { - hostIP, err := resolveHostIP() + hostIP, err := resolveHostIP(cfg) if err != nil { return fmt.Errorf("cannot resolve host IP for cluster routing: %w", err) } + portNum, _ := strconv.Atoi(port) + svc := map[string]interface{}{ "apiVersion": "v1", "kind": "Service", @@ -1133,7 +1142,7 @@ func createHostService(cfg *config.Config, name, ns, port string) error { }, "spec": map[string]interface{}{ "ports": []map[string]interface{}{ - {"port": 8402, "targetPort": 8402, "protocol": "TCP"}, + {"port": portNum, "targetPort": portNum, "protocol": "TCP"}, }, }, } @@ -1150,7 +1159,7 @@ func createHostService(cfg *config.Config, name, ns, port string) error { {"ip": hostIP}, }, "ports": []map[string]interface{}{ - {"port": 8402, "protocol": "TCP"}, + {"port": portNum, "protocol": "TCP"}, }, }, }, @@ -1165,10 +1174,16 @@ func createHostService(cfg *config.Config, name, ns, port string) error { return nil } -// resolveHostIP returns the Docker host IP reachable from k3d containers. -// Same resolution strategy as stack.ollamaHostIPForBackend. -func resolveHostIP() (string, error) { - // Try DNS resolution of host.docker.internal (macOS) or host.k3d.internal (Linux). +// resolveHostIP returns the host IP reachable from cluster containers. +// For k3s (bare-metal) the host is localhost; for k3d the host is +// reachable via Docker networking. +func resolveHostIP(cfg *config.Config) (string, error) { + // Check if this is a k3s (bare-metal) backend — host is localhost. + if backend := stack.DetectExistingBackend(cfg); backend == stack.BackendK3s { + return "127.0.0.1", nil + } + + // k3d / Docker: try DNS resolution of host.docker.internal or host.k3d.internal. for _, host := range []string{"host.docker.internal", "host.k3d.internal"} { if addrs, err := net.LookupHost(host); err == nil && len(addrs) > 0 { return addrs[0], nil @@ -1188,18 +1203,19 @@ func resolveHostIP() (string, error) { } } } - return "", fmt.Errorf("cannot determine Docker host IP; ensure Docker is running") + return "", fmt.Errorf("cannot determine host IP; ensure Docker is running or using k3s backend") } // buildInferenceServiceOfferSpec builds a ServiceOffer spec for a host-side // inference gateway routed through the cluster's x402 flow. func buildInferenceServiceOfferSpec(d *inference.Deployment, pt schemas.PriceTable, ns, port string) map[string]interface{} { + portNum, _ := strconv.Atoi(port) spec := map[string]interface{}{ "type": "inference", "upstream": map[string]interface{}{ "service": d.Name, "namespace": ns, - "port": 8402, + "port": portNum, "healthPath": "/health", }, "payment": map[string]interface{}{ diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 63f8ae94..63b6f602 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -13,7 +13,7 @@ import ( // DefaultInstanceID is the canonical OpenClaw instance that runs both // user-facing inference and agent-mode monetize/heartbeat reconciliation. -const DefaultInstanceID = "default" +const DefaultInstanceID = "obol-agent" // Init patches the default OpenClaw instance with agent capabilities: // monetize RBAC bindings and HEARTBEAT.md for periodic reconciliation. diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 5d9d7235..043efb8b 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -35,7 +35,7 @@ image: repository: obolnetwork/obol-stack-front-end pullPolicy: IfNotPresent - tag: "v0.1.12" + tag: "v0.1.13-rc.1" service: type: ClusterIP diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 96db3382..17a21260 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -76,11 +76,11 @@ type OnboardOptions struct { func SetupDefault(cfg *config.Config, u *ui.UI) error { // Check whether the default deployment already exists (re-sync path). // If it does, proceed unconditionally — the overlay was already written. - deploymentDir := DeploymentPath(cfg, "default") + deploymentDir := DeploymentPath(cfg, "obol-agent") if _, err := os.Stat(deploymentDir); err == nil { // Existing deployment — always re-sync regardless of Ollama status. return Onboard(cfg, OnboardOptions{ - ID: "default", + ID: "obol-agent", Sync: true, IsDefault: true, AgentMode: true, @@ -117,7 +117,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error { } return Onboard(cfg, OnboardOptions{ - ID: "default", + ID: "obol-agent", Sync: true, IsDefault: true, AgentMode: true, @@ -129,7 +129,7 @@ func SetupDefault(cfg *config.Config, u *ui.UI) error { func Onboard(cfg *config.Config, opts OnboardOptions, u *ui.UI) error { id := opts.ID if opts.IsDefault { - id = "default" + id = "obol-agent" } if id == "" { id = petname.Generate(2, "-") From 0b91ec269dcfd98b88d30c70a0a6ecaba6f70816 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 14:55:18 +0800 Subject: [PATCH 04/11] chore: bump OpenClaw to v2026.3.13-1 --- internal/openclaw/openclaw.go | 2 +- obolup.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go index 17a21260..a2896afd 100644 --- a/internal/openclaw/openclaw.go +++ b/internal/openclaw/openclaw.go @@ -48,7 +48,7 @@ const ( // openclawImageTag overrides the chart's default image tag. // Must match the version in OPENCLAW_VERSION (without "v" prefix). - openclawImageTag = "2026.3.11" + openclawImageTag = "2026.3.13-1" // remoteSignerChartVersion pins the remote-signer Helm chart version. // renovate: datasource=helm depName=remote-signer registryUrl=https://obolnetwork.github.io/helm-charts/ diff --git a/obolup.sh b/obolup.sh index c11ca4d0..f2ed6ae3 100755 --- a/obolup.sh +++ b/obolup.sh @@ -60,7 +60,7 @@ readonly K3D_VERSION="5.8.3" readonly HELMFILE_VERSION="1.2.3" readonly K9S_VERSION="0.50.18" readonly HELM_DIFF_VERSION="3.14.1" -readonly OPENCLAW_VERSION="2026.3.11" +readonly OPENCLAW_VERSION="2026.3.13-1" # Repository URL for building from source readonly OBOL_REPO_URL="git@github.com:ObolNetwork/obol-stack.git" From ee92f932cd2ee6f78e4d54e1439fb65e3646392e Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 16:19:49 +0800 Subject: [PATCH 05/11] feat: auto-configure cloud providers during stack up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ~/.openclaw/openclaw.json specifies a cloud model as the agent's primary (e.g. anthropic/claude-sonnet-4-6), autoConfigureLLM() now detects the provider and API key from the environment (or .env in dev mode) and configures LiteLLM before OpenClaw setup runs. This makes agent chat work out of the box without a separate `obol model setup`. Changes: - internal/stack: add autoConfigureCloudProviders() with env + .env key resolution (dev-mode only for .env) - internal/model: export ProviderFromModelName(), ProviderEnvVar(); add HasProviderConfigured(), LoadDotEnv() - cmd/obol/model: update defaults — claude-sonnet-4-6, gpt-4.1 - internal/model: update WellKnownModels with current flagship models (claude-opus-4-6, gpt-5.4, gpt-4.1, o4-mini) - obolup.sh: add check_agent_model_api_key() to warn users before cluster start if a required API key is missing --- cmd/obol/model.go | 4 +- internal/model/model.go | 88 +++++++++++++++++++++++++++----- internal/model/model_test.go | 77 ++++++++++++++++++++++++++-- internal/stack/stack.go | 97 ++++++++++++++++++++++++++++-------- obolup.sh | 43 ++++++++++++++++ 5 files changed, 271 insertions(+), 38 deletions(-) diff --git a/cmd/obol/model.go b/cmd/obol/model.go index 1a9a5909..c52199a4 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -145,9 +145,9 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m // Sensible defaults switch provider { case "anthropic": - models = []string{"claude-sonnet-4-5-20250929"} + models = []string{"claude-sonnet-4-6"} case "openai": - models = []string{"gpt-4o"} + models = []string{"gpt-4.1"} } } diff --git a/internal/model/model.go b/internal/model/model.go index 362d8db9..1ad61558 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -93,6 +93,69 @@ func HasConfiguredModels(cfg *config.Config) bool { return false } +// HasProviderConfigured returns true if LiteLLM already has at least one +// model entry for the given provider (e.g., "anthropic", "openai"). +func HasProviderConfigured(cfg *config.Config, provider string) bool { + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + raw, err := kubectl.Output(kubectlBinary, kubeconfigPath, + "get", "configmap", configMapName, "-n", namespace, "-o", "jsonpath={.data.config\\.yaml}") + if err != nil { + return false + } + + var litellmConfig LiteLLMConfig + if err := yaml.Unmarshal([]byte(raw), &litellmConfig); err != nil { + return false + } + + for _, entry := range litellmConfig.ModelList { + // Check wildcard entries like "anthropic/*" + if entry.ModelName == provider+"/*" { + return true + } + // Check if the model's litellm_params.model starts with "provider/" + if strings.HasPrefix(entry.LiteLLMParams.Model, provider+"/") { + return true + } + // Check via model name inference + if ProviderFromModelName(entry.ModelName) == provider { + return true + } + } + return false +} + +// LoadDotEnv reads KEY=value pairs from a .env file. +// Returns an empty map if the file doesn't exist or is unreadable. +// Skips comments (#) and blank lines. Does not call os.Setenv. +func LoadDotEnv(path string) map[string]string { + result := make(map[string]string) + data, err := os.ReadFile(path) + if err != nil { + return result + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + idx := strings.IndexByte(line, '=') + if idx < 1 { + continue + } + key := strings.TrimSpace(line[:idx]) + val := strings.TrimSpace(line[idx+1:]) + // Strip surrounding quotes + if len(val) >= 2 && ((val[0] == '"' && val[len(val)-1] == '"') || (val[0] == '\'' && val[len(val)-1] == '\'')) { + val = val[1 : len(val)-1] + } + result[key] = val + } + return result +} + // ConfigureLiteLLM adds a provider to the LiteLLM gateway. // For cloud providers, it patches the Secret with the API key and adds // the model to config.yaml. For Ollama, it discovers local models and adds them. @@ -105,7 +168,7 @@ func ConfigureLiteLLM(cfg *config.Config, u *ui.UI, provider, apiKey string, mod } // 1. Patch Secret with API key (if cloud provider) - envVar := providerEnvVar(provider) + envVar := ProviderEnvVar(provider) if envVar != "" && apiKey != "" { u.Infof("Setting %s API key", provider) patchJSON := fmt.Sprintf(`{"stringData":{"%s":"%s"}}`, envVar, apiKey) @@ -546,7 +609,7 @@ func expandWildcard(provider string, liveModels []string) []string { if len(liveModels) > 0 { var matched []string for _, m := range liveModels { - p := detectProviderFromModelName(m) + p := ProviderFromModelName(m) if p == provider { matched = append(matched, m) } @@ -562,12 +625,12 @@ func expandWildcard(provider string, liveModels []string) []string { return nil } -// detectProviderFromModelName infers the provider from a model name string. -func detectProviderFromModelName(name string) string { +// ProviderFromModelName infers the provider from a model name string. +func ProviderFromModelName(name string) string { if strings.Contains(name, "claude") { return "anthropic" } - if strings.HasPrefix(name, "gpt") || strings.HasPrefix(name, "o1") || strings.HasPrefix(name, "o3") { + if strings.HasPrefix(name, "gpt") || strings.HasPrefix(name, "o1") || strings.HasPrefix(name, "o3") || strings.HasPrefix(name, "o4") { return "openai" } return "" @@ -575,8 +638,8 @@ func detectProviderFromModelName(name string) string { // --- Internal helpers --- -// providerEnvVar returns the env var name for a provider's API key. -func providerEnvVar(provider string) string { +// ProviderEnvVar returns the env var name for a provider's API key. +func ProviderEnvVar(provider string) string { for _, p := range knownProviders { if p.ID == provider { return p.EnvVar @@ -590,16 +653,17 @@ func providerEnvVar(provider string) string { // and the LiteLLM pod is not reachable for a live /v1/models query. var WellKnownModels = map[string][]string{ "anthropic": { + "claude-opus-4-6", "claude-sonnet-4-6", - "claude-opus-4", + "claude-haiku-4-5-20251001", "claude-sonnet-4-5-20250929", - "claude-haiku-3-5-20241022", }, "openai": { - "gpt-4o", - "gpt-4o-mini", + "gpt-5.4", + "gpt-4.1", + "gpt-4.1-mini", + "o4-mini", "o3", - "o3-mini", }, } diff --git a/internal/model/model_test.go b/internal/model/model_test.go index a69eaa5b..c0d8d604 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -5,6 +5,8 @@ import ( "fmt" "net/http" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" ) @@ -201,20 +203,87 @@ general_settings: } func TestProviderEnvVar(t *testing.T) { - if got := providerEnvVar("anthropic"); got != "ANTHROPIC_API_KEY" { + if got := ProviderEnvVar("anthropic"); got != "ANTHROPIC_API_KEY" { t.Errorf("got %q, want ANTHROPIC_API_KEY", got) } - if got := providerEnvVar("openai"); got != "OPENAI_API_KEY" { + if got := ProviderEnvVar("openai"); got != "OPENAI_API_KEY" { t.Errorf("got %q, want OPENAI_API_KEY", got) } - if got := providerEnvVar("ollama"); got != "" { + if got := ProviderEnvVar("ollama"); got != "" { t.Errorf("got %q, want empty string for ollama", got) } - if got := providerEnvVar("custom_thing"); got != "CUSTOM_THING_API_KEY" { + if got := ProviderEnvVar("custom_thing"); got != "CUSTOM_THING_API_KEY" { t.Errorf("got %q, want CUSTOM_THING_API_KEY", got) } } +func TestProviderFromModelName(t *testing.T) { + tests := []struct { + name string + model string + expected string + }{ + {"anthropic claude", "claude-sonnet-4-6", "anthropic"}, + {"anthropic full", "claude-opus-4", "anthropic"}, + {"openai gpt", "gpt-4o", "openai"}, + {"openai o3", "o3-mini", "openai"}, + {"ollama model", "qwen3.5:9b", ""}, + {"unknown", "llama-3.2", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := ProviderFromModelName(tt.model); got != tt.expected { + t.Errorf("ProviderFromModelName(%q) = %q, want %q", tt.model, got, tt.expected) + } + }) + } +} + +func TestLoadDotEnv(t *testing.T) { + t.Run("happy path", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, ".env") + os.WriteFile(path, []byte("FOO=bar\nBAZ=qux\n"), 0o644) + m := LoadDotEnv(path) + if m["FOO"] != "bar" { + t.Errorf("FOO = %q, want bar", m["FOO"]) + } + if m["BAZ"] != "qux" { + t.Errorf("BAZ = %q, want qux", m["BAZ"]) + } + }) + + t.Run("missing file", func(t *testing.T) { + m := LoadDotEnv("/nonexistent/.env") + if len(m) != 0 { + t.Errorf("expected empty map, got %v", m) + } + }) + + t.Run("comments and blanks", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, ".env") + os.WriteFile(path, []byte("# comment\n\nKEY=val\n"), 0o644) + m := LoadDotEnv(path) + if len(m) != 1 || m["KEY"] != "val" { + t.Errorf("expected {KEY:val}, got %v", m) + } + }) + + t.Run("quoted values", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, ".env") + os.WriteFile(path, []byte(`KEY="hello world"`+"\n"+`KEY2='single'`+"\n"), 0o644) + m := LoadDotEnv(path) + if m["KEY"] != "hello world" { + t.Errorf("KEY = %q, want 'hello world'", m["KEY"]) + } + if m["KEY2"] != "single" { + t.Errorf("KEY2 = %q, want 'single'", m["KEY2"]) + } + }) +} + func TestValidateCustomEndpoint(t *testing.T) { t.Run("full validation success", func(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/internal/stack/stack.go b/internal/stack/stack.go index e5046c90..a8348864 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -480,37 +480,94 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s return nil } -// autoConfigureLLM detects the host Ollama and auto-configures LiteLLM with -// available models so that inference works out of the box. Skipped silently -// if Ollama is unreachable, has no models, or LiteLLM already has non-paid -// models configured. +// autoConfigureLLM detects host Ollama and imported cloud providers, then +// auto-configures LiteLLM so inference works out of the box. func autoConfigureLLM(cfg *config.Config, u *ui.UI) { + // --- Ollama auto-configuration --- ollamaModels, err := model.ListOllamaModels() - if err != nil || len(ollamaModels) == 0 { - // Ollama not running or no models — skip silently. + if err == nil && len(ollamaModels) > 0 && !model.HasConfiguredModels(cfg) { + u.Blank() + u.Infof("Ollama detected with %d model(s) — auto-configuring LiteLLM", len(ollamaModels)) + + var names []string + for _, m := range ollamaModels { + name := m.Name + if strings.HasSuffix(name, ":latest") { + name = strings.TrimSuffix(name, ":latest") + } + names = append(names, name) + } + + if err := model.ConfigureLiteLLM(cfg, u, "ollama", "", names); err != nil { + u.Warnf("Auto-configure LiteLLM failed: %v", err) + u.Dim(" Run 'obol model setup' to configure manually.") + } + } + + // --- Cloud provider auto-configuration from ~/.openclaw --- + autoConfigureCloudProviders(cfg, u) +} + +// autoConfigureCloudProviders reads the imported ~/.openclaw config and, if a +// cloud model is the agent's primary model, auto-configures LiteLLM with the +// matching provider when an API key is available in the environment (or .env +// in dev mode). +func autoConfigureCloudProviders(cfg *config.Config, u *ui.UI) { + imported, err := openclaw.DetectExistingConfig() + if err != nil || imported == nil { return } - // Check if LiteLLM already has real models (not just the paid/* catch-all). - if model.HasConfiguredModels(cfg) { + agentModel := imported.AgentModel + if agentModel == "" { return } - u.Blank() - u.Infof("Ollama detected with %d model(s) — auto-configuring LiteLLM", len(ollamaModels)) + // Extract provider and model name from "anthropic/claude-sonnet-4-6". + provider, modelName := "", agentModel + if i := strings.Index(agentModel, "/"); i >= 0 { + provider = agentModel[:i] + modelName = agentModel[i+1:] + } + if provider == "" { + provider = model.ProviderFromModelName(agentModel) + } - var names []string - for _, m := range ollamaModels { - name := m.Name - if strings.HasSuffix(name, ":latest") { - name = strings.TrimSuffix(name, ":latest") - } - names = append(names, name) + if provider == "" || provider == "ollama" { + return } - if err := model.ConfigureLiteLLM(cfg, u, "ollama", "", names); err != nil { - u.Warnf("Auto-configure LiteLLM failed: %v", err) - u.Dim(" Run 'obol model setup' to configure manually.") + // Already configured — skip. + if model.HasProviderConfigured(cfg, provider) { + return + } + + envVar := model.ProviderEnvVar(provider) + if envVar == "" { + return + } + + // Resolve API key: environment first, .env in dev mode only. + apiKey := os.Getenv(envVar) + if apiKey == "" && os.Getenv("OBOL_DEVELOPMENT") == "true" { + dotEnv := model.LoadDotEnv(filepath.Join(".", ".env")) + apiKey = dotEnv[envVar] + } + + if apiKey == "" { + u.Blank() + u.Warnf("Agent model %s detected but %s is not set", agentModel, envVar) + u.Dim(fmt.Sprintf(" Set it in your environment: export %s=...", envVar)) + u.Dim(fmt.Sprintf(" Or configure after startup: obol model setup --provider %s", provider)) + return + } + + u.Blank() + u.Infof("Cloud model %s detected — auto-configuring LiteLLM with %s provider", agentModel, provider) + + if err := model.ConfigureLiteLLM(cfg, u, provider, apiKey, []string{modelName}); err != nil { + u.Warnf("Auto-configure LiteLLM for %s failed: %v", provider, err) + u.Dim(fmt.Sprintf(" Run 'obol model setup --provider %s' to configure manually.", provider)) } } diff --git a/obolup.sh b/obolup.sh index f2ed6ae3..b8ec7078 100755 --- a/obolup.sh +++ b/obolup.sh @@ -60,6 +60,8 @@ readonly K3D_VERSION="5.8.3" readonly HELMFILE_VERSION="1.2.3" readonly K9S_VERSION="0.50.18" readonly HELM_DIFF_VERSION="3.14.1" +# Must match internal/openclaw/OPENCLAW_VERSION (without "v" prefix). +# Tested by TestOpenClawVersionConsistency. readonly OPENCLAW_VERSION="2026.3.13-1" # Repository URL for building from source @@ -1494,6 +1496,44 @@ configure_path() { } # Print post-install instructions +# Check if ~/.openclaw/openclaw.json specifies a cloud model that needs an API key. +# Prints guidance before the "start cluster" prompt so the user can set the key first. +check_agent_model_api_key() { + local config_file="$HOME/.openclaw/openclaw.json" + [[ -f "$config_file" ]] || return 0 + + # Extract agents.defaults.model.primary (e.g., "anthropic/claude-sonnet-4-6") + local primary_model="" + if command_exists python3; then + primary_model=$(python3 -c " +import json, sys +try: + d = json.load(open('$config_file')) + print(d.get('agents',{}).get('defaults',{}).get('model',{}).get('primary','')) +except: pass +" 2>/dev/null) + fi + + [[ -n "$primary_model" ]] || return 0 + + # Determine provider and required env var + local provider="" env_var="" + case "$primary_model" in + *claude*) provider="anthropic"; env_var="ANTHROPIC_API_KEY" ;; + gpt*|o1*|o3*) provider="openai"; env_var="OPENAI_API_KEY" ;; + *) return 0 ;; + esac + + echo "" + if [[ -n "${!env_var:-}" ]]; then + log_success "$env_var detected for $primary_model" + else + log_warn "Your agent uses $primary_model but $env_var is not set." + log_dim " Set it before starting: export $env_var=..." + log_dim " Or configure after startup: obol model setup --provider $provider" + fi +} + print_instructions() { local install_mode="$1" @@ -1505,6 +1545,9 @@ print_instructions() { fi echo "" + # Check if the agent's primary model requires a cloud API key. + check_agent_model_api_key + # Check if we can prompt the user for bootstrap (works with curl | bash via /dev/tty) if [[ -c /dev/tty ]] && [[ -f "$OBOL_BIN_DIR/obol" ]]; then echo "" From a5d39940de01da89998f68c8a4a22e79d1d75295 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 16:37:18 +0800 Subject: [PATCH 06/11] perf: batch LiteLLM provider config into single restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split ConfigureLiteLLM into PatchLiteLLMProvider (config-only) and RestartLiteLLM (restart+wait). autoConfigureLLM now patches Ollama and cloud providers first, then does one restart — halving startup time when both are configured. --- internal/model/model.go | 22 +++++++++++++-- internal/stack/stack.go | 59 ++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/internal/model/model.go b/internal/model/model.go index 1ad61558..a4247527 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -159,7 +159,19 @@ func LoadDotEnv(path string) map[string]string { // ConfigureLiteLLM adds a provider to the LiteLLM gateway. // For cloud providers, it patches the Secret with the API key and adds // the model to config.yaml. For Ollama, it discovers local models and adds them. +// Restarts the deployment after patching. Use PatchLiteLLMProvider + +// RestartLiteLLM to batch multiple providers with a single restart. func ConfigureLiteLLM(cfg *config.Config, u *ui.UI, provider, apiKey string, models []string) error { + if err := PatchLiteLLMProvider(cfg, u, provider, apiKey, models); err != nil { + return err + } + return RestartLiteLLM(cfg, u, provider) +} + +// PatchLiteLLMProvider patches the LiteLLM Secret (API key) and ConfigMap +// (model_list) for a provider without restarting the deployment. Call +// RestartLiteLLM afterwards (once, after batching multiple providers). +func PatchLiteLLMProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, models []string) error { kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") @@ -191,14 +203,20 @@ func ConfigureLiteLLM(cfg *config.Config, u *ui.UI, provider, apiKey string, mod return fmt.Errorf("failed to update LiteLLM config: %w", err) } - // 4. Restart deployment + return nil +} + +// RestartLiteLLM restarts the LiteLLM deployment and waits for rollout. +func RestartLiteLLM(cfg *config.Config, u *ui.UI, provider string) error { + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + u.Info("Restarting LiteLLM") if err := kubectl.Run(kubectlBinary, kubeconfigPath, "rollout", "restart", fmt.Sprintf("deployment/%s", deployName), "-n", namespace); err != nil { return fmt.Errorf("failed to restart LiteLLM: %w", err) } - // 5. Wait for rollout if err := kubectl.Run(kubectlBinary, kubeconfigPath, "rollout", "status", fmt.Sprintf("deployment/%s", deployName), "-n", namespace, "--timeout=90s"); err != nil { diff --git a/internal/stack/stack.go b/internal/stack/stack.go index a8348864..adb350d2 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -482,12 +482,15 @@ func syncDefaults(cfg *config.Config, u *ui.UI, kubeconfigPath string, dataDir s // autoConfigureLLM detects host Ollama and imported cloud providers, then // auto-configures LiteLLM so inference works out of the box. +// Patches all providers first, then does a single restart. func autoConfigureLLM(cfg *config.Config, u *ui.UI) { - // --- Ollama auto-configuration --- + var configured []string // provider names that were patched + + // --- Ollama --- ollamaModels, err := model.ListOllamaModels() if err == nil && len(ollamaModels) > 0 && !model.HasConfiguredModels(cfg) { u.Blank() - u.Infof("Ollama detected with %d model(s) — auto-configuring LiteLLM", len(ollamaModels)) + u.Infof("Ollama detected with %d model(s)", len(ollamaModels)) var names []string for _, m := range ollamaModels { @@ -498,29 +501,40 @@ func autoConfigureLLM(cfg *config.Config, u *ui.UI) { names = append(names, name) } - if err := model.ConfigureLiteLLM(cfg, u, "ollama", "", names); err != nil { - u.Warnf("Auto-configure LiteLLM failed: %v", err) - u.Dim(" Run 'obol model setup' to configure manually.") + if err := model.PatchLiteLLMProvider(cfg, u, "ollama", "", names); err != nil { + u.Warnf("Auto-configure Ollama failed: %v", err) + } else { + configured = append(configured, "ollama") } } - // --- Cloud provider auto-configuration from ~/.openclaw --- - autoConfigureCloudProviders(cfg, u) + // --- Cloud provider from ~/.openclaw --- + if cloudProvider := autoDetectCloudProvider(cfg, u); cloudProvider != "" { + configured = append(configured, cloudProvider) + } + + // --- Single restart for all providers --- + if len(configured) > 0 { + label := strings.Join(configured, " + ") + if err := model.RestartLiteLLM(cfg, u, label); err != nil { + u.Warnf("LiteLLM restart failed: %v", err) + u.Dim(" Run 'obol model setup' to configure manually.") + } + } } -// autoConfigureCloudProviders reads the imported ~/.openclaw config and, if a -// cloud model is the agent's primary model, auto-configures LiteLLM with the -// matching provider when an API key is available in the environment (or .env -// in dev mode). -func autoConfigureCloudProviders(cfg *config.Config, u *ui.UI) { +// autoDetectCloudProvider reads ~/.openclaw config, resolves the cloud +// provider API key, and patches LiteLLM (without restart). Returns the +// provider name on success, or "" if nothing was configured. +func autoDetectCloudProvider(cfg *config.Config, u *ui.UI) string { imported, err := openclaw.DetectExistingConfig() if err != nil || imported == nil { - return + return "" } agentModel := imported.AgentModel if agentModel == "" { - return + return "" } // Extract provider and model name from "anthropic/claude-sonnet-4-6". @@ -534,17 +548,17 @@ func autoConfigureCloudProviders(cfg *config.Config, u *ui.UI) { } if provider == "" || provider == "ollama" { - return + return "" } // Already configured — skip. if model.HasProviderConfigured(cfg, provider) { - return + return "" } envVar := model.ProviderEnvVar(provider) if envVar == "" { - return + return "" } // Resolve API key: environment first, .env in dev mode only. @@ -559,16 +573,19 @@ func autoConfigureCloudProviders(cfg *config.Config, u *ui.UI) { u.Warnf("Agent model %s detected but %s is not set", agentModel, envVar) u.Dim(fmt.Sprintf(" Set it in your environment: export %s=...", envVar)) u.Dim(fmt.Sprintf(" Or configure after startup: obol model setup --provider %s", provider)) - return + return "" } u.Blank() - u.Infof("Cloud model %s detected — auto-configuring LiteLLM with %s provider", agentModel, provider) + u.Infof("Cloud model %s detected — configuring %s provider", agentModel, provider) - if err := model.ConfigureLiteLLM(cfg, u, provider, apiKey, []string{modelName}); err != nil { - u.Warnf("Auto-configure LiteLLM for %s failed: %v", provider, err) + if err := model.PatchLiteLLMProvider(cfg, u, provider, apiKey, []string{modelName}); err != nil { + u.Warnf("Auto-configure %s failed: %v", provider, err) u.Dim(fmt.Sprintf(" Run 'obol model setup --provider %s' to configure manually.", provider)) + return "" } + + return provider } // localImage describes a Docker image built from source in this repo. From 9d73fa23f73067dead7cd926e0238b063a027e75 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 16:43:25 +0800 Subject: [PATCH 07/11] ux: prompt for cloud API key interactively in obolup.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of printing a warning that users miss, prompt for the API key during setup when a cloud model is detected in ~/.openclaw config. The key is exported so the subsequent obol bootstrap → stack up → autoConfigureLLM picks it up automatically. Falls back to a warning in non-interactive mode. Inspired by hermes-agent's interactive setup wizard pattern. --- obolup.sh | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/obolup.sh b/obolup.sh index b8ec7078..6b237142 100755 --- a/obolup.sh +++ b/obolup.sh @@ -1497,7 +1497,8 @@ configure_path() { # Print post-install instructions # Check if ~/.openclaw/openclaw.json specifies a cloud model that needs an API key. -# Prints guidance before the "start cluster" prompt so the user can set the key first. +# If the key is missing and we have a TTY, prompt for it interactively and export +# it so the subsequent obol bootstrap / stack up picks it up via autoConfigureLLM. check_agent_model_api_key() { local config_file="$HOME/.openclaw/openclaw.json" [[ -f "$config_file" ]] || return 0 @@ -1517,18 +1518,35 @@ except: pass [[ -n "$primary_model" ]] || return 0 # Determine provider and required env var - local provider="" env_var="" + local provider="" env_var="" provider_name="" case "$primary_model" in - *claude*) provider="anthropic"; env_var="ANTHROPIC_API_KEY" ;; - gpt*|o1*|o3*) provider="openai"; env_var="OPENAI_API_KEY" ;; + *claude*) provider="anthropic"; env_var="ANTHROPIC_API_KEY"; provider_name="Anthropic" ;; + gpt*|o1*|o3*|o4*) provider="openai"; env_var="OPENAI_API_KEY"; provider_name="OpenAI" ;; *) return 0 ;; esac echo "" if [[ -n "${!env_var:-}" ]]; then log_success "$env_var detected for $primary_model" + return 0 + fi + + # Interactive: prompt for the API key (like hermes-agent's setup wizard) + if [[ -c /dev/tty ]]; then + log_info "Your agent uses $primary_model ($provider_name)" + echo "" + local api_key="" + read -r -p " $provider_name API key ($env_var): " api_key Date: Tue, 17 Mar 2026 17:09:42 +0800 Subject: [PATCH 08/11] feat: detect CLAUDE_CODE_OAUTH_TOKEN as Anthropic API key fallback obolup.sh check_agent_model_api_key() now checks CLAUDE_CODE_OAUTH_TOKEN when ANTHROPIC_API_KEY is missing, so developers with Claude Code subscriptions skip the interactive prompt. Also documents the LLM auto-configuration flow and OpenClaw version management in CLAUDE.md. Closes #272 (Unit 3) --- CLAUDE.md | 11 +++++++++++ obolup.sh | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 1de55441..e0ed19a9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -107,6 +107,8 @@ k3d: 1 server, ports 80:80 + 8080:80 + 443:443 + 8443:443, `rancher/k3s:v1.35.1- **LiteLLM gateway** (`llm` ns, port 4000): OpenAI-compatible proxy routing to Ollama/Anthropic/OpenAI. ConfigMap `litellm-config` (YAML config.yaml with model_list), Secret `litellm-secrets` (master key + API keys). Auto-configured with Ollama models during `obol stack up` (no manual `obol model setup` needed). `ConfigureLiteLLM()` patches config + Secret + restarts. Custom endpoints: `obol model setup custom --name --endpoint --model` (validates before adding). Paid remote inference stays on vanilla LiteLLM with a static route `paid/* -> openai/* -> http://127.0.0.1:8402`; no LiteLLM fork is required. OpenClaw always routes through LiteLLM (openai provider slot), never native providers; `dangerouslyDisableDeviceAuth` is enabled for Traefik-proxied access. +**Auto-configuration**: During `obol stack up`, `autoConfigureLLM()` detects host Ollama models and patches LiteLLM config so agent chat works immediately without manual `obol model setup`. During install, `obolup.sh` `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json` agent model, resolves API key from environment (`ANTHROPIC_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN` for Anthropic; `OPENAI_API_KEY` for OpenAI), and exports it for downstream tools. + **Per-instance overlay**: `buildLiteLLMRoutedOverlay()` reuses "ollama" provider slot pointing at `litellm.llm.svc:4000/v1` with `api: openai-completions`. App → litellm:4000 → routes by model name → actual API. ## Standalone Inference Gateway @@ -148,6 +150,15 @@ Skills = SKILL.md + optional scripts/references, embedded in `obol` binary (`int 4. **`OBOL_DEVELOPMENT=true`** — required for `obol stack up` to auto-build local images (x402-verifier, x402-buyer) 5. **Root-owned PVCs** — `-f` flag required to remove in `obol stack purge` +### OpenClaw Version Management + +Three places pin the OpenClaw version — all must agree: +1. `internal/openclaw/OPENCLAW_VERSION` — source of truth (Renovate watches, CI reads) +2. `internal/openclaw/openclaw.go` — `openclawImageTag` constant +3. `obolup.sh` — `OPENCLAW_VERSION` shell constant for standalone installs + +`TestOpenClawVersionConsistency` in `internal/openclaw/version_test.go` catches drift. + ### Pitfalls 1. **Kubeconfig port drift** — k3d API port can change between restarts. Fix: `k3d kubeconfig write -o .workspace/config/kubeconfig.yaml --overwrite` diff --git a/obolup.sh b/obolup.sh index 6b237142..28ade1e1 100755 --- a/obolup.sh +++ b/obolup.sh @@ -1531,6 +1531,13 @@ except: pass return 0 fi + # Anthropic-specific fallback: Claude Code subscription token + if [[ "$provider" == "anthropic" && -n "${CLAUDE_CODE_OAUTH_TOKEN:-}" ]]; then + export ANTHROPIC_API_KEY="$CLAUDE_CODE_OAUTH_TOKEN" + log_success "Claude Code subscription detected (CLAUDE_CODE_OAUTH_TOKEN)" + return 0 + fi + # Interactive: prompt for the API key (like hermes-agent's setup wizard) if [[ -c /dev/tty ]]; then log_info "Your agent uses $primary_model ($provider_name)" From e9ed07c549b7234f1c93490c42b5f703bf118cfa Mon Sep 17 00:00:00 2001 From: bussyjd Date: Tue, 17 Mar 2026 17:05:59 +0800 Subject: [PATCH 09/11] feat: detect existing credentials in interactive model setup menu When `obol model setup` runs without --provider, the interactive menu now checks the environment for existing API keys and Ollama availability, showing detection badges next to each provider. If the user picks a provider with a detected credential, they are offered the option to reuse it instead of being prompted for a new key. Detected sources: - Anthropic: ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN - OpenAI: OPENAI_API_KEY - Ollama: reachable with N model(s) available The flag-based path (--provider, --api-key) is unchanged. Closes #272 (Unit 2) --- cmd/obol/model.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/cmd/obol/model.go b/cmd/obol/model.go index c52199a4..5e1840db 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -60,10 +60,15 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { // Interactive mode if flags not provided if provider == "" { + creds := detectCredentials() providers, _ := model.GetAvailableProviders(cfg) options := make([]string, len(providers)) for i, p := range providers { - options[i] = fmt.Sprintf("%s (%s)", p.Name, p.ID) + label := fmt.Sprintf("%s (%s)", p.Name, p.ID) + if det, ok := creds[p.ID]; ok { + label += fmt.Sprintf(" — detected: %s", det.source) + } + options[i] = label } idx, err := u.Select("Select a provider:", options, 0) @@ -71,6 +76,14 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { return err } provider = providers[idx].ID + + // If a credential was detected for the chosen provider, offer to use it + if det, ok := creds[provider]; ok && det.key != "" && apiKey == "" { + u.Infof("%s API key detected (%s)", providers[idx].Name, det.source) + if u.Confirm("Use detected credential?", true) { + apiKey = det.key + } + } } // Provider-specific flow @@ -370,6 +383,40 @@ func providerInfo(id string) model.ProviderInfo { return model.ProviderInfo{ID: id, Name: id} } +// detectedCredential describes a credential found in the environment. +type detectedCredential struct { + key string // the actual API key value (empty for Ollama) + source string // human-readable description of where it was found +} + +// detectCredentials checks the environment for existing provider credentials. +// It returns a map of provider ID to detected credential info. Only providers +// with a detected credential appear in the map. +func detectCredentials() map[string]detectedCredential { + creds := make(map[string]detectedCredential) + + // Anthropic: check ANTHROPIC_API_KEY, then CLAUDE_CODE_OAUTH_TOKEN + if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { + creds["anthropic"] = detectedCredential{key: key, source: "ANTHROPIC_API_KEY"} + } else if key := os.Getenv("CLAUDE_CODE_OAUTH_TOKEN"); key != "" { + creds["anthropic"] = detectedCredential{key: key, source: "CLAUDE_CODE_OAUTH_TOKEN"} + } + + // OpenAI: check OPENAI_API_KEY + if key := os.Getenv("OPENAI_API_KEY"); key != "" { + creds["openai"] = detectedCredential{key: key, source: "OPENAI_API_KEY"} + } + + // Ollama: check if reachable with models + if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 { + creds["ollama"] = detectedCredential{ + source: fmt.Sprintf("%d model(s) available", len(ollamaModels)), + } + } + + return creds +} + // promptModelPull interactively asks the user which Ollama model to pull. func promptModelPull() (string, error) { type suggestion struct { From 7f4ab6bd3beffba3b61e36fce503d83e398c8560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= Date: Fri, 20 Mar 2026 11:37:16 +0000 Subject: [PATCH 10/11] Change agent default --- .../base/templates/obol-agent-monetize-rbac.yaml | 8 ++++---- internal/tunnel/tunnel.go | 2 +- internal/x402/bdd_integration_test.go | 6 +++--- internal/x402/setup.go | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml index 72d7fc38..cc08b09b 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent-monetize-rbac.yaml @@ -98,7 +98,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-default + namespace: openclaw-obol-agent --- #------------------------------------------------------------------------------ @@ -115,7 +115,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-default + namespace: openclaw-obol-agent --- @@ -151,7 +151,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-default + namespace: openclaw-obol-agent --- #------------------------------------------------------------------------------ @@ -186,4 +186,4 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-default + namespace: openclaw-obol-agent diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go index 02fefe94..4927c048 100644 --- a/internal/tunnel/tunnel.go +++ b/internal/tunnel/tunnel.go @@ -102,7 +102,7 @@ func InjectBaseURL(cfg *config.Config, tunnelURL string) error { cmd := exec.Command(kubectlPath, "--kubeconfig", kubeconfigPath, "set", "env", "deployment/openclaw", - "-n", "openclaw-default", + "-n", "openclaw-obol-agent", fmt.Sprintf("AGENT_BASE_URL=%s", strings.TrimRight(tunnelURL, "/")), ) return cmd.Run() diff --git a/internal/x402/bdd_integration_test.go b/internal/x402/bdd_integration_test.go index b17190f5..8a41ae37 100644 --- a/internal/x402/bdd_integration_test.go +++ b/internal/x402/bdd_integration_test.go @@ -162,7 +162,7 @@ func TestMain(m *testing.M) { // Wait for the obol-agent pod to be Running. log.Println(" Waiting for obol-agent pod...") - if err := waitForAnyPod(kubectlBin, kubeconfigPath, "openclaw-default", + if err := waitForAnyPod(kubectlBin, kubeconfigPath, "openclaw-obol-agent", []string{"app=openclaw", "app.kubernetes.io/name=openclaw"}, 300*time.Second); err != nil { teardown(obolBin) log.Fatalf("obol-agent not ready: %v", err) @@ -299,7 +299,7 @@ func ensureExistingClusterBootstrap(obolBin, kubectlBin, kubeconfig string) erro if err := waitForPod(kubectlBin, kubeconfig, "x402", "app=x402-verifier", 120*time.Second); err != nil { return fmt.Errorf("x402-verifier not ready: %w", err) } - if err := waitForAnyPod(kubectlBin, kubeconfig, "openclaw-default", + if err := waitForAnyPod(kubectlBin, kubeconfig, "openclaw-obol-agent", []string{"app=openclaw", "app.kubernetes.io/name=openclaw"}, 180*time.Second); err != nil { return fmt.Errorf("obol-agent not ready: %w", err) } @@ -393,7 +393,7 @@ func waitForServiceOfferReady(kubectlBin, kubeconfig, name, namespace string, ti // This simulates the heartbeat cron firing. func triggerReconciliation(kubectlBin, kubeconfig string) { out, err := kubectl.Output(kubectlBin, kubeconfig, - "exec", "-i", "-n", "openclaw-default", "deploy/openclaw", "-c", "openclaw", + "exec", "-i", "-n", "openclaw-obol-agent", "deploy/openclaw", "-c", "openclaw", "--", "python3", "/data/.openclaw/skills/sell/scripts/monetize.py", "process", "--all") if err != nil { log.Printf(" manual reconciliation error: %v\n%s", err, out) diff --git a/internal/x402/setup.go b/internal/x402/setup.go index b9d644e0..6bc77696 100644 --- a/internal/x402/setup.go +++ b/internal/x402/setup.go @@ -169,7 +169,7 @@ roleRef: subjects: - kind: ServiceAccount name: openclaw - namespace: openclaw-default + namespace: openclaw-obol-agent `) // EnsureVerifier deploys the x402 verifier subsystem if it doesn't exist. From f2b465273407db1a9915af6c59b89eebcc3aedd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ois=C3=ADn=20Kyne?= Date: Fri, 20 Mar 2026 11:39:13 +0000 Subject: [PATCH 11/11] Missing file --- internal/tunnel/agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/tunnel/agent.go b/internal/tunnel/agent.go index 03b90d1f..3656ea7d 100644 --- a/internal/tunnel/agent.go +++ b/internal/tunnel/agent.go @@ -10,7 +10,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/config" ) -const agentDeploymentID = "default" +const agentDeploymentID = "obol-agent" // SyncAgentBaseURL patches AGENT_BASE_URL in the obol-agent's values-obol.yaml // and runs helmfile sync to apply the change. It is a no-op if the obol-agent