From c830bfedbbb59117cbd4eedbf08b657442b187f2 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 12 Mar 2026 10:12:00 +0100 Subject: [PATCH 1/3] Extract registerWithManager from SetupWithManager for testability Split SetupWithManager into two methods for controllers with OpenStack dependencies: registerWithManager handles only the controller-runtime registration, while SetupWithManager acquires OpenStack clients first then delegates to registerWithManager. This allows integration tests to inject fake OpenStack clients and call registerWithManager directly. --- internal/controller/aggregates_controller.go | 14 ++++++++++---- internal/controller/decomission_controller.go | 14 ++++++++++---- internal/controller/eviction_controller.go | 14 ++++++++++---- .../hypervisor_maintenance_controller.go | 16 +++++++++++----- internal/controller/onboarding_controller.go | 14 ++++++++++---- internal/controller/traits_controller.go | 14 ++++++++++---- 6 files changed, 61 insertions(+), 25 deletions(-) diff --git a/internal/controller/aggregates_controller.go b/internal/controller/aggregates_controller.go index e3a3a71..d816c6e 100644 --- a/internal/controller/aggregates_controller.go +++ b/internal/controller/aggregates_controller.go @@ -208,6 +208,15 @@ func slicesEqualUnordered(a, b []string) bool { return true } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (ac *AggregatesController) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(AggregatesControllerName). + For(&kvmv1.Hypervisor{}, builder.WithPredicates(utils.LifecycleEnabledPredicate)). + Complete(ac) +} + // SetupWithManager sets up the controller with the Manager. func (ac *AggregatesController) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -218,8 +227,5 @@ func (ac *AggregatesController) SetupWithManager(mgr ctrl.Manager) error { return err } - return ctrl.NewControllerManagedBy(mgr). - Named(AggregatesControllerName). - For(&kvmv1.Hypervisor{}, builder.WithPredicates(utils.LifecycleEnabledPredicate)). - Complete(ac) + return ac.registerWithManager(mgr) } diff --git a/internal/controller/decomission_controller.go b/internal/controller/decomission_controller.go index 1bbd18a..14ee69d 100644 --- a/internal/controller/decomission_controller.go +++ b/internal/controller/decomission_controller.go @@ -178,6 +178,15 @@ func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvm return nil } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (r *NodeDecommissionReconciler) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(DecommissionControllerName). + For(&kvmv1.Hypervisor{}). + Complete(r) +} + // SetupWithManager sets up the controller with the Manager. func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -195,8 +204,5 @@ func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error { } r.placementClient.Microversion = "1.39" // yoga, or later - return ctrl.NewControllerManagedBy(mgr). - Named(DecommissionControllerName). - For(&kvmv1.Hypervisor{}). - Complete(r) + return r.registerWithManager(mgr) } diff --git a/internal/controller/eviction_controller.go b/internal/controller/eviction_controller.go index b8fc678..2dedd18 100644 --- a/internal/controller/eviction_controller.go +++ b/internal/controller/eviction_controller.go @@ -391,6 +391,15 @@ func (r *EvictionReconciler) coldMigrate(ctx context.Context, uuid string, evict return nil } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (r *EvictionReconciler) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(EvictionControllerName). + For(&kvmv1.Eviction{}). + Complete(r) +} + // SetupWithManager sets up the controller with the Manager. func (r *EvictionReconciler) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -401,8 +410,5 @@ func (r *EvictionReconciler) SetupWithManager(mgr ctrl.Manager) error { } r.computeClient.Microversion = "2.90" // Xena (or later) - return ctrl.NewControllerManagedBy(mgr). - Named(EvictionControllerName). - For(&kvmv1.Eviction{}). - Complete(r) + return r.registerWithManager(mgr) } diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index eeaee0a..b41d6c7 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -256,6 +256,16 @@ func (hec *HypervisorMaintenanceController) ensureEviction(ctx context.Context, } } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (hec *HypervisorMaintenanceController) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(HypervisorMaintenanceControllerName). + For(&kvmv1.Hypervisor{}). + Owns(&kvmv1.Eviction{}). // trigger Reconcile whenever an Own-ed eviction is created/updated/deleted + Complete(hec) +} + // SetupWithManager sets up the controller with the Manager. func (hec *HypervisorMaintenanceController) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -267,9 +277,5 @@ func (hec *HypervisorMaintenanceController) SetupWithManager(mgr ctrl.Manager) e } hec.computeClient.Microversion = "2.90" // Xena (or later) - return ctrl.NewControllerManagedBy(mgr). - Named(HypervisorMaintenanceControllerName). - For(&kvmv1.Hypervisor{}). - Owns(&kvmv1.Eviction{}). // trigger Reconcile whenever an Own-ed eviction is created/updated/deleted - Complete(hec) + return hec.registerWithManager(mgr) } diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index b743624..74b449a 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -613,6 +613,15 @@ func (r *OnboardingController) patchStatus(ctx context.Context, hv, base *kvmv1. k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OnboardingControllerName)) } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (r *OnboardingController) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(OnboardingControllerName). + For(&kvmv1.Hypervisor{}). + Complete(r) +} + // SetupWithManager sets up the controller with the Manager. func (r *OnboardingController) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -644,8 +653,5 @@ func (r *OnboardingController) SetupWithManager(mgr ctrl.Manager) error { } r.testNetworkClient.ResourceBase = fmt.Sprintf("%vv2.0/", r.testNetworkClient.Endpoint) - return ctrl.NewControllerManagedBy(mgr). - Named(OnboardingControllerName). - For(&kvmv1.Hypervisor{}). - Complete(r) + return r.registerWithManager(mgr) } diff --git a/internal/controller/traits_controller.go b/internal/controller/traits_controller.go index 260efa8..cace12e 100644 --- a/internal/controller/traits_controller.go +++ b/internal/controller/traits_controller.go @@ -181,6 +181,15 @@ func getTraitCondition(err error, msg string) metav1.Condition { } } +// registerWithManager registers the controller with the Manager without acquiring OpenStack clients. +// This is useful for testing where clients are injected directly. +func (tc *TraitsController) registerWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named(TraitsControllerName). + For(&kvmv1.Hypervisor{}). + Complete(tc) +} + // SetupWithManager sets up the controller with the Manager. func (tc *TraitsController) SetupWithManager(mgr ctrl.Manager) error { ctx := context.Background() @@ -192,8 +201,5 @@ func (tc *TraitsController) SetupWithManager(mgr ctrl.Manager) error { } tc.serviceClient.Microversion = "1.39" // yoga, or later - return ctrl.NewControllerManagedBy(mgr). - Named(TraitsControllerName). - For(&kvmv1.Hypervisor{}). - Complete(tc) + return tc.registerWithManager(mgr) } From 6e389f2dfbd7d0bf64820be5c4d959ceb8b5a703 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 12 Mar 2026 12:46:51 +0100 Subject: [PATCH 2/3] Add integration test for full onboarding lifecycle Test all controllers running together in a real ctrl.Manager with envtest and fake OpenStack HTTP servers. Covers two scenarios: - SkipTests=true (fast path, ~3s) - SkipTests=false (full test VM validation, ~60s) Verifies the complete Node -> Hypervisor -> onboarding -> Ready=True flow with stateful mock handlers and catch-all failure on unhandled OpenStack requests. --- go.mod | 2 +- internal/controller/integration_test.go | 791 ++++++++++++++++++++++++ 2 files changed, 792 insertions(+), 1 deletion(-) create mode 100644 internal/controller/integration_test.go diff --git a/go.mod b/go.mod index 6953213..23dd885 100644 --- a/go.mod +++ b/go.mod @@ -54,7 +54,7 @@ require ( github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 // indirect - github.com/google/uuid v1.6.0 // indirect + github.com/google/uuid v1.6.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect diff --git a/internal/controller/integration_test.go b/internal/controller/integration_test.go new file mode 100644 index 0000000..c3ec096 --- /dev/null +++ b/internal/controller/integration_test.go @@ -0,0 +1,791 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "slices" + "strings" + "sync" + "time" + + "github.com/google/uuid" + "github.com/gophercloud/gophercloud/v2/testhelper" + thclient "github.com/gophercloud/gophercloud/v2/testhelper/client" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/config" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" +) + +// --------------------------------------------------------------------------- +// Stateful mock state for OpenStack endpoints +// --------------------------------------------------------------------------- + +type mockAggregate struct { + ID int + Name string + UUID string + Hosts []string +} + +type mockServer struct { + ID string + Name string + Status string +} + +type mockState struct { + mu sync.Mutex + + // Nova hypervisor identity + hypervisorID string + serviceID string + + // Nova service state + serviceEnabled bool + serviceForcedDown bool + + // Aggregates (keyed by ID) + aggregates map[int]*mockAggregate + + // Placement traits + traits []string + rpGeneration int + + // Test VMs (keyed by server ID) + servers map[string]*mockServer + + // HA service + haEnabled bool + + // K8s client for dynamic lookups (e.g. Hypervisor UID) + k8sClient client.Client + hvName string +} + +func newMockState(k8sClient client.Client, hvName string) *mockState { + return &mockState{ + hypervisorID: "hv-uuid-1234", + serviceID: "service-id-1", + serviceEnabled: false, + serviceForcedDown: true, + aggregates: map[int]*mockAggregate{ + 1: {ID: 1, Name: "test-az", UUID: "az-uuid-1", Hosts: []string{}}, + 2: {ID: 2, Name: testAggregateName, UUID: "test-agg-uuid-2", Hosts: []string{}}, + 3: {ID: 3, Name: "prod-aggregate", UUID: "prod-uuid-3", Hosts: []string{}}, + }, + traits: []string{"HW_CPU_X86_VMX"}, + rpGeneration: 1, + servers: map[string]*mockServer{}, + haEnabled: false, + k8sClient: k8sClient, + hvName: hvName, + } +} + +// --------------------------------------------------------------------------- +// Helper: JSON writing +// --------------------------------------------------------------------------- + +func writeJSON(w http.ResponseWriter, code int, body string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + _, _ = fmt.Fprint(w, body) +} + +// --------------------------------------------------------------------------- +// Mock handler registration +// --------------------------------------------------------------------------- + +func (s *mockState) registerHandlers(mux *http.ServeMux) { + // Catch-all: fail on any unhandled request + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + body, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + Fail(fmt.Sprintf("Unhandled request to fake server: %s %s body=%s", r.Method, r.URL.String(), string(body))) + }) + + // --- Nova: Hypervisors --- + mux.HandleFunc("GET /os-hypervisors/detail", s.handleListHypervisors) + + // --- Nova: Services --- + mux.HandleFunc("PUT /os-services/service-id-1", s.handleUpdateService) + + // --- Nova: Aggregates --- + mux.HandleFunc("GET /os-aggregates", s.handleListAggregates) + mux.HandleFunc("POST /os-aggregates/1/action", s.handleAggregateAction(1)) + mux.HandleFunc("POST /os-aggregates/2/action", s.handleAggregateAction(2)) + mux.HandleFunc("POST /os-aggregates/3/action", s.handleAggregateAction(3)) + + // --- Placement: Traits --- + mux.HandleFunc("GET /resource_providers/hv-uuid-1234/traits", s.handleGetTraits) + mux.HandleFunc("PUT /resource_providers/hv-uuid-1234/traits", s.handlePutTraits) + + // --- Nova: Flavors (test compute client) --- + mux.HandleFunc("GET /flavors/detail", s.handleListFlavors) + + // --- Glance: Images (test image client, no ResourceBase so path is /images) --- + mux.HandleFunc("GET /images", s.handleListImages) + + // --- Neutron: Networks (test network client, no ResourceBase so path is /networks) --- + mux.HandleFunc("GET /networks", s.handleListNetworks) + + // --- Nova: Servers --- + mux.HandleFunc("POST /servers", s.handleCreateServer) + mux.HandleFunc("GET /servers/detail", s.handleListServersDetail) + mux.HandleFunc("GET /servers", s.handleListServers) + + // Dynamic server routes: individual server operations (GET, DELETE, POST action). + mux.HandleFunc("GET /servers/", s.handleServerGet) + mux.HandleFunc("DELETE /servers/", s.handleServerDelete) + mux.HandleFunc("POST /servers/", s.handleServerAction) + + // --- HA Service --- + mux.HandleFunc("POST /instance-ha", s.handleInstanceHA) +} + +// --- Handler implementations --- + +func (s *mockState) handleListHypervisors(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + hostname := s.hvName + body := fmt.Sprintf(`{ + "hypervisors": [{ + "service": {"host": "compute-host", "id": "%s", "disabled_reason": ""}, + "cpu_info": {"arch": "x86_64", "model": "Nehalem", "vendor": "Intel", "features": ["pge"], "topology": {"cores": 1, "threads": 1, "sockets": 4}}, + "current_workload": 0, + "status": "enabled", + "state": "up", + "disk_available_least": 0, + "host_ip": "1.1.1.1", + "free_disk_gb": 1028, + "free_ram_mb": 7680, + "hypervisor_hostname": %q, + "hypervisor_type": "fake", + "hypervisor_version": 2002000, + "id": "%s", + "local_gb": 1028, + "local_gb_used": 0, + "memory_mb": 8192, + "memory_mb_used": 512, + "running_vms": 0, + "vcpus": 1, + "vcpus_used": 0 + }] + }`, s.serviceID, hostname, s.hypervisorID) + writeJSON(w, http.StatusOK, body) +} + +func (s *mockState) handleUpdateService(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + s.mu.Lock() + defer s.mu.Unlock() + + bodyBytes, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + + var req map[string]any + Expect(json.Unmarshal(bodyBytes, &req)).To(Succeed()) + + if status, ok := req["status"].(string); ok { + s.serviceEnabled = (status == "enabled") + } + if fd, ok := req["forced_down"].(bool); ok { + s.serviceForcedDown = fd + } + + status := "disabled" + if s.serviceEnabled { + status = "enabled" + } + writeJSON(w, http.StatusOK, fmt.Sprintf(`{"service": {"id": "%s", "status": "%s"}}`, s.serviceID, status)) +} + +func (s *mockState) handleListAggregates(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + var parts []string + for _, agg := range s.aggregates { + hostsJSON, err := json.Marshal(agg.Hosts) + Expect(err).NotTo(HaveOccurred()) + parts = append(parts, fmt.Sprintf(`{ + "id": %d, + "name": %q, + "uuid": %q, + "hosts": %s, + "availability_zone": null, + "metadata": {} + }`, agg.ID, agg.Name, agg.UUID, string(hostsJSON))) + } + writeJSON(w, http.StatusOK, fmt.Sprintf(`{"aggregates": [%s]}`, strings.Join(parts, ","))) +} + +func (s *mockState) handleAggregateAction(aggID int) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + s.mu.Lock() + defer s.mu.Unlock() + + bodyBytes, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + + var req map[string]map[string]string + Expect(json.Unmarshal(bodyBytes, &req)).To(Succeed()) + + agg, exists := s.aggregates[aggID] + Expect(exists).To(BeTrue(), "aggregate %d not found", aggID) + + if addHost, ok := req["add_host"]; ok { + host := addHost["host"] + if !slices.Contains(agg.Hosts, host) { + agg.Hosts = append(agg.Hosts, host) + } + } + + if removeHost, ok := req["remove_host"]; ok { + host := removeHost["host"] + newHosts := make([]string, 0, len(agg.Hosts)) + for _, h := range agg.Hosts { + if h != host { + newHosts = append(newHosts, h) + } + } + agg.Hosts = newHosts + } + + hostsJSON, err := json.Marshal(agg.Hosts) + Expect(err).NotTo(HaveOccurred()) + writeJSON(w, http.StatusOK, fmt.Sprintf(`{ + "aggregate": { + "id": %d, + "name": %q, + "uuid": %q, + "hosts": %s, + "availability_zone": null, + "metadata": {} + } + }`, agg.ID, agg.Name, agg.UUID, string(hostsJSON))) + } +} + +func (s *mockState) handleGetTraits(w http.ResponseWriter, _ *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + traitsJSON, err := json.Marshal(s.traits) + Expect(err).NotTo(HaveOccurred()) + writeJSON(w, http.StatusOK, fmt.Sprintf(`{ + "traits": %s, + "resource_provider_generation": %d + }`, string(traitsJSON), s.rpGeneration)) +} + +func (s *mockState) handlePutTraits(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + s.mu.Lock() + defer s.mu.Unlock() + + bodyBytes, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + + var req struct { + Traits []string `json:"traits"` + ResourceProviderGeneration int `json:"resource_provider_generation"` + } + Expect(json.Unmarshal(bodyBytes, &req)).To(Succeed()) + + s.traits = req.Traits + s.rpGeneration++ + + traitsJSON, err := json.Marshal(s.traits) + Expect(err).NotTo(HaveOccurred()) + writeJSON(w, http.StatusOK, fmt.Sprintf(`{ + "traits": %s, + "resource_provider_generation": %d + }`, string(traitsJSON), s.rpGeneration)) +} + +func (s *mockState) handleListFlavors(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, `{ + "flavors": [{ + "id": "flavor-1", + "name": "c_k_c2_m2_v2", + "ram": 2048, + "vcpus": 2, + "disk": 0, + "swap": 0, + "OS-FLV-DISABLED:disabled": false, + "OS-FLV-EXT-DATA:ephemeral": 0, + "os-flavor-access:is_public": true, + "rxtx_factor": 1.0, + "description": null, + "extra_specs": {}, + "links": [] + }] + }`) +} + +func (s *mockState) handleListImages(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, `{ + "images": [{ + "id": "image-1", + "name": "cirros-kvm", + "status": "active", + "visibility": "public", + "container_format": "bare", + "disk_format": "qcow2", + "min_disk": 0, + "min_ram": 0, + "size": 13167616, + "created_at": "2024-01-01T00:00:00Z", + "updated_at": "2024-01-01T00:00:00Z", + "self": "/v2/images/image-1", + "file": "/v2/images/image-1/file", + "schema": "/v2/schemas/image" + }] + }`) +} + +func (s *mockState) handleListNetworks(w http.ResponseWriter, _ *http.Request) { + writeJSON(w, http.StatusOK, `{ + "networks": [{ + "id": "network-1", + "name": "test-net", + "admin_state_up": true, + "shared": false, + "status": "ACTIVE", + "subnets": [], + "tenant_id": "project-id", + "project_id": "project-id" + }] + }`) +} + +func (s *mockState) handleCreateServer(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + s.mu.Lock() + defer s.mu.Unlock() + + bodyBytes, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + + var req struct { + Server struct { + Name string `json:"name"` + } `json:"server"` + } + Expect(json.Unmarshal(bodyBytes, &req)).To(Succeed()) + + serverID := "server-" + uuid.New().String()[:8] + srv := &mockServer{ + ID: serverID, + Name: req.Server.Name, + Status: "BUILD", + } + s.servers[serverID] = srv + + writeJSON(w, http.StatusAccepted, fmt.Sprintf(`{ + "server": { + "id": "%s", + "name": "%s", + "status": "BUILD" + } + }`, srv.ID, srv.Name)) +} + +func (s *mockState) handleListServersDetail(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + nameFilter := r.URL.Query().Get("name") + var parts []string + for _, srv := range s.servers { + if nameFilter != "" && !strings.HasPrefix(srv.Name, nameFilter) { + continue + } + // Transition BUILD -> ACTIVE on list (simulates async boot completing) + if srv.Status == "BUILD" { + srv.Status = "ACTIVE" + } + parts = append(parts, fmt.Sprintf(`{ + "id": "%s", + "name": "%s", + "status": "%s" + }`, srv.ID, srv.Name, srv.Status)) + } + writeJSON(w, http.StatusOK, fmt.Sprintf(`{"servers": [%s], "servers_links": []}`, strings.Join(parts, ","))) +} + +func (s *mockState) handleListServers(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + nameFilter := r.URL.Query().Get("name") + var parts []string + for _, srv := range s.servers { + if nameFilter != "" && !strings.HasPrefix(srv.Name, nameFilter) { + continue + } + parts = append(parts, fmt.Sprintf(`{ + "id": "%s", + "name": "%s", + "status": "%s" + }`, srv.ID, srv.Name, srv.Status)) + } + writeJSON(w, http.StatusOK, fmt.Sprintf(`{"servers": [%s], "servers_links": []}`, strings.Join(parts, ","))) +} + +func (s *mockState) handleServerGet(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/servers/"), "/") + serverID := parts[0] + + srv, exists := s.servers[serverID] + if !exists { + writeJSON(w, http.StatusNotFound, `{"itemNotFound": {"message": "server not found"}}`) + return + } + + writeJSON(w, http.StatusOK, fmt.Sprintf(`{ + "server": { + "id": "%s", + "name": "%s", + "status": "%s" + } + }`, srv.ID, srv.Name, srv.Status)) +} + +func (s *mockState) handleServerDelete(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/servers/"), "/") + serverID := parts[0] + + if _, exists := s.servers[serverID]; !exists { + w.WriteHeader(http.StatusNotFound) + return + } + delete(s.servers, serverID) + w.WriteHeader(http.StatusNoContent) +} + +func (s *mockState) handleServerAction(w http.ResponseWriter, r *http.Request) { + defer GinkgoRecover() + s.mu.Lock() + defer s.mu.Unlock() + + pathParts := strings.Split(strings.TrimPrefix(r.URL.Path, "/servers/"), "/") + serverID := pathParts[0] + + srv, exists := s.servers[serverID] + if !exists { + writeJSON(w, http.StatusNotFound, `{"itemNotFound": {"message": "server not found"}}`) + return + } + + bodyBytes, err := io.ReadAll(r.Body) + Expect(err).NotTo(HaveOccurred()) + + var req map[string]any + Expect(json.Unmarshal(bodyBytes, &req)).To(Succeed()) + + if _, ok := req["os-getConsoleOutput"]; ok { + // Return console output containing the server name (magic string) + writeJSON(w, http.StatusOK, fmt.Sprintf(`{"output": "FAKE CONSOLE OUTPUT\n%s\nDONE\n"}`, srv.Name)) + return + } + + // Unhandled action + Fail(fmt.Sprintf("Unhandled server action for %s: %s", serverID, string(bodyBytes))) +} + +func (s *mockState) handleInstanceHA(w http.ResponseWriter, _ *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + s.haEnabled = true + writeJSON(w, http.StatusOK, `{}`) +} + +// --------------------------------------------------------------------------- +// Integration test +// --------------------------------------------------------------------------- + +var _ = Describe("Integration: Full Onboarding Lifecycle", func() { + const ( + eventuallyTimeout = 2 * time.Minute + pollingInterval = 500 * time.Millisecond + ) + + // Shared helper to get the Hypervisor + getHypervisor := func(ctx context.Context, name string) func() (*kvmv1.Hypervisor, error) { + return func() (*kvmv1.Hypervisor, error) { + hv := &kvmv1.Hypervisor{} + err := k8sClient.Get(ctx, types.NamespacedName{Name: name}, hv) + return hv, err + } + } + + setupAndRun := func(nodeName, lifecycleLabel string) { + var ( + fakeServer testhelper.FakeServer + state *mockState + mgrCancel context.CancelFunc + ) + + BeforeEach(func(ctx SpecContext) { + By("Setting up the OpenStack fake server") + fakeServer = testhelper.SetupHTTP() + DeferCleanup(fakeServer.Teardown) + + // Point HA service URL at our fake server + os.Setenv("KVM_HA_SERVICE_URL", fakeServer.Endpoint()+"instance-ha") + DeferCleanup(func() { os.Unsetenv("KVM_HA_SERVICE_URL") }) + + // Create stateful mock + state = newMockState(k8sClient, nodeName) + state.registerHandlers(fakeServer.Mux) + + By("Creating a controller manager with all controllers") + skipNameValidation := true + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: metricsserver.Options{ + BindAddress: "0", // disable metrics server + }, + Controller: config.Controller{ + SkipNameValidation: &skipNameValidation, + }, + }) + Expect(err).NotTo(HaveOccurred()) + + // Build service clients pointing at the fake server + computeClient := thclient.ServiceClient(fakeServer) + testComputeClient := thclient.ServiceClient(fakeServer) + testImageClient := thclient.ServiceClient(fakeServer) + testNetworkClient := thclient.ServiceClient(fakeServer) + placementClient := thclient.ServiceClient(fakeServer) + + // -- Controllers without OpenStack deps -- + hvCtrl := &HypervisorController{Client: mgr.GetClient(), Scheme: mgr.GetScheme()} + Expect(hvCtrl.SetupWithManager(mgr)).To(Succeed()) + + gardenerCtrl := &GardenerNodeLifecycleController{Client: mgr.GetClient(), Scheme: mgr.GetScheme()} + Expect(gardenerCtrl.SetupWithManager(mgr, "default")).To(Succeed()) + + // -- Controllers with OpenStack deps (use registerWithManager) -- + onboardingCtrl := &OnboardingController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + computeClient: computeClient, + testComputeClient: testComputeClient, + testImageClient: testImageClient, + testNetworkClient: testNetworkClient, + } + Expect(onboardingCtrl.registerWithManager(mgr)).To(Succeed()) + + aggregatesCtrl := &AggregatesController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + computeClient: computeClient, + } + Expect(aggregatesCtrl.registerWithManager(mgr)).To(Succeed()) + + traitsCtrl := &TraitsController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + serviceClient: placementClient, + } + Expect(traitsCtrl.registerWithManager(mgr)).To(Succeed()) + + maintenanceCtrl := &HypervisorMaintenanceController{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + computeClient: computeClient, + } + Expect(maintenanceCtrl.registerWithManager(mgr)).To(Succeed()) + + decommissionCtrl := &NodeDecommissionReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + computeClient: computeClient, + placementClient: placementClient, + } + Expect(decommissionCtrl.registerWithManager(mgr)).To(Succeed()) + + evictionCtrl := &EvictionReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + computeClient: computeClient, + } + Expect(evictionCtrl.registerWithManager(mgr)).To(Succeed()) + + By("Starting the manager") + var mgrCtx context.Context + mgrCtx, mgrCancel = context.WithCancel(context.Background()) + go func() { + defer GinkgoRecover() + Expect(mgr.Start(mgrCtx)).To(Succeed()) + }() + + // Wait for caches to sync + Eventually(func() bool { + return mgr.GetCache().WaitForCacheSync(mgrCtx) + }).WithTimeout(30 * time.Second).WithPolling(100 * time.Millisecond).Should(BeTrue()) + + By("Creating the Node to trigger the lifecycle") + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Labels: map[string]string{ + labelHypervisor: "kvm", + "cobaltcore.cloud.sap/node-hypervisor-lifecycle": lifecycleLabel, + corev1.LabelTopologyZone: "test-az", + corev1.LabelTopologyRegion: "test-region", + corev1.LabelHostname: nodeName, + }, + Annotations: map[string]string{ + "nova.openstack.cloud.sap/aggregates": "prod-aggregate", + "nova.openstack.cloud.sap/custom-traits": "CUSTOM_INTEG_TEST", + }, + }, + } + Expect(k8sClient.Create(ctx, node)).To(Succeed()) + DeferCleanup(func(ctx SpecContext) { + Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, node))).To(Succeed()) + }) + }) + + AfterEach(func() { + By("Stopping the manager") + if mgrCancel != nil { + mgrCancel() + } + + // Cleanup the Hypervisor CRD (may have been created by the HypervisorController) + hv := &kvmv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} + Expect(client.IgnoreNotFound(k8sClient.Delete(context.Background(), hv))).To(Succeed()) + }) + + It("should complete the full onboarding lifecycle to Ready=True", func(ctx SpecContext) { + By("1. Waiting for Hypervisor CRD to be created by HypervisorController") + Eventually(getHypervisor(ctx, nodeName)). + WithTimeout(eventuallyTimeout).WithPolling(pollingInterval). + Should(SatisfyAll( + HaveField("Name", nodeName), + HaveField("Spec.LifecycleEnabled", true), + )) + + By("2. Waiting for HypervisorID and ServiceID to be populated") + Eventually(func(g Gomega) { + hv, err := getHypervisor(ctx, nodeName)() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(hv.Status.HypervisorID).NotTo(BeEmpty()) + g.Expect(hv.Status.ServiceID).NotTo(BeEmpty()) + }).WithTimeout(eventuallyTimeout).WithPolling(pollingInterval).Should(Succeed()) + + By("3. Waiting for final state: Ready=True, Onboarding=False/Succeeded") + // Controllers race through phases quickly: Initial → Testing → Handover → Succeeded. + // Intermediate phases may be too transient to observe, so we assert on the final state. + Eventually(func(g Gomega) { + hv, err := getHypervisor(ctx, nodeName)() + g.Expect(err).NotTo(HaveOccurred()) + + readyCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeReady) + g.Expect(readyCond).NotTo(BeNil()) + g.Expect(readyCond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(readyCond.Reason).To(Equal(kvmv1.ConditionReasonReadyReady)) + + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + g.Expect(onboardingCond).NotTo(BeNil()) + g.Expect(onboardingCond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonSucceeded)) + + aggCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeAggregatesUpdated) + g.Expect(aggCond).NotTo(BeNil()) + g.Expect(aggCond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(aggCond.Reason).To(Equal(kvmv1.ConditionReasonSucceeded)) + + traitsCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeTraitsUpdated) + g.Expect(traitsCond).NotTo(BeNil()) + g.Expect(traitsCond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(traitsCond.Reason).To(Equal(kvmv1.ConditionReasonSucceeded)) + }).WithTimeout(eventuallyTimeout).WithPolling(pollingInterval).Should(Succeed()) + + By("4. Verifying final mock state") + state.mu.Lock() + defer state.mu.Unlock() + + // Nova service was enabled and forced_down cleared + Expect(state.serviceEnabled).To(BeTrue(), "Nova service should be enabled") + Expect(state.serviceForcedDown).To(BeFalse(), "Nova service should not be forced down") + + // Instance HA was registered + Expect(state.haEnabled).To(BeTrue(), "Instance HA should be enabled") + + // All test servers cleaned up + Expect(state.servers).To(BeEmpty(), "All test servers should have been cleaned up") + + // Custom trait was applied to placement + Expect(state.traits).To(ContainElement("CUSTOM_INTEG_TEST"), "Custom trait should be applied") + + // Final aggregates: host in spec aggregates (test-az + prod-aggregate), + // removed from test aggregate + for _, agg := range state.aggregates { + switch agg.Name { + case "test-az": + Expect(agg.Hosts).To(ContainElement(nodeName), "host should be in zone aggregate") + case "prod-aggregate": + Expect(agg.Hosts).To(ContainElement(nodeName), "host should be in prod aggregate") + case testAggregateName: + Expect(agg.Hosts).NotTo(ContainElement(nodeName), "host should be removed from test aggregate") + } + } + }) + } + + Context("with SkipTests=true (skip-tests label)", func() { + setupAndRun("integ-skip-tests-hv", "skip-tests") + }) + + Context("with SkipTests=false (full smoke test VM flow)", func() { + setupAndRun("integ-full-test-hv", "") + }) +}) From 7daa16b306b5abb8adc0aaa9a33915aa1bdc6c3a Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 12 Mar 2026 13:43:20 +0100 Subject: [PATCH 3/3] Make onboarding requeue interval configurable for faster integration tests Add requeueInterval field to OnboardingController with getRequeueInterval() fallback to defaultWaitTime (1m) when unset. Integration test sets 1s requeue interval, reducing test runtime from ~60s to ~15s. --- internal/controller/integration_test.go | 1 + internal/controller/onboarding_controller.go | 26 +++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/internal/controller/integration_test.go b/internal/controller/integration_test.go index c3ec096..5bd981c 100644 --- a/internal/controller/integration_test.go +++ b/internal/controller/integration_test.go @@ -618,6 +618,7 @@ var _ = Describe("Integration: Full Onboarding Lifecycle", func() { testComputeClient: testComputeClient, testImageClient: testImageClient, testNetworkClient: testNetworkClient, + requeueInterval: 1 * time.Second, } Expect(onboardingCtrl.registerWithManager(mgr)).To(Succeed()) diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 74b449a..f67e13f 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -67,6 +67,14 @@ type OnboardingController struct { testComputeClient *gophercloud.ServiceClient testImageClient *gophercloud.ServiceClient testNetworkClient *gophercloud.ServiceClient + requeueInterval time.Duration +} + +func (r *OnboardingController) getRequeueInterval() time.Duration { + if r.requeueInterval > 0 { + return r.requeueInterval + } + return defaultWaitTime } // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;patch @@ -97,7 +105,7 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) if hv.Status.HypervisorID == "" || hv.Status.ServiceID == "" { if err := r.ensureNovaProperties(ctx, hv); err != nil { if errors.Is(err, errRequeue) { - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } return ctrl.Result{}, err } @@ -246,7 +254,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis if err != nil { // should not happened log.Error(err, "failed to get test instance, instance vanished", "id", id) - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } base := hv.DeepCopy() @@ -266,7 +274,7 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis log.Error(err, "failed to delete test instance", "id", id) } - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil case "ACTIVE": consoleOutput, err := servers. ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). @@ -282,11 +290,11 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis if err := r.patchStatus(ctx, hv, base); err != nil { return ctrl.Result{}, err } - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } if !strings.Contains(consoleOutput, server.Name) { - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { @@ -300,12 +308,12 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis if err := r.patchStatus(ctx, hv, base); err != nil { return ctrl.Result{}, err } - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } return r.completeOnboarding(ctx, host, hv) default: - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } } @@ -318,11 +326,11 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri // We're waiting for aggregates and traits controllers to sync if !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeAggregatesUpdated) { log.Info("waiting for aggregates to be updated", "condition", kvmv1.ConditionTypeAggregatesUpdated) - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } if !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeTraitsUpdated) { log.Info("waiting for traits to be updated", "condition", kvmv1.ConditionTypeTraitsUpdated) - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + return ctrl.Result{RequeueAfter: r.getRequeueInterval()}, nil } // Aggregates and traits have been synced, mark onboarding as complete