openshift-hyperfleet · tzhou5 · Mar 24, 2026 · Mar 24, 2026 · coderabbitai · Mar 24, 2026
diff --git a/deploy-scripts/lib/sentinel.sh b/deploy-scripts/lib/sentinel.sh
@@ -64,7 +64,11 @@ install_sentinel_instance() {
     # This enables the sentinel to include ownerReferences from the Kubernetes resource
     # in the message data sent to adapters, which is required for nodepools management
     if [[ "${resource_type}" == "nodepools" ]]; then
-        helm_cmd+=(--set "config.messageData.owner_references=resource.owner_references")
+        helm_cmd+=(
+            --set "config.messageData.owner_references.id=resource.owner_references.id"
+            --set "config.messageData.owner_references.href=resource.owner_references.href"
+            --set "config.messageData.owner_references.kind=resource.owner_references.kind"
+        )
     fi
 
     log_info "Executing: ${helm_cmd[*]}"

diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go
@@ -0,0 +1,184 @@
+package cluster
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability
+
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
+)
+
+const concurrentClusterCount = 5
+
+var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurrent cluster creations without resource conflicts",
+	ginkgo.Label(labels.Tier1),
+	func() {
+		var h *helper.Helper
+		var clusterIDs []string
+
+		ginkgo.BeforeEach(func() {
+			h = helper.New()
+			clusterIDs = nil
+		})
+
+		ginkgo.It("should create multiple clusters concurrently and all reach Ready state with isolated resources",
+			func(ctx context.Context) {
+				ginkgo.By(fmt.Sprintf("Submit %d cluster creation requests simultaneously", concurrentClusterCount))
+
+				type clusterResult struct {
+					id   string
+					name string
+					err  error
+				}
+
+				results := make([]clusterResult, concurrentClusterCount)
+				var wg sync.WaitGroup
+				wg.Add(concurrentClusterCount)
+
+				for i := 0; i < concurrentClusterCount; i++ {
+					go func(idx int) {
+						defer wg.Done()
+						defer ginkgo.GinkgoRecover()
+
+						cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
+						if err != nil {
+							results[idx] = clusterResult{err: fmt.Errorf("failed to create cluster %d: %w", idx, err)}
+							return
+						}
+						if cluster.Id == nil {
+							results[idx] = clusterResult{err: fmt.Errorf("cluster %d has nil ID", idx)}
+							return
+						}
+						results[idx] = clusterResult{
+							id:   *cluster.Id,
+							name: cluster.Name,
+						}
+					}(i)
+				}
+				wg.Wait()
+
+				// Verify all creations succeeded and collect IDs
+				for i, r := range results {
+					Expect(r.err).NotTo(HaveOccurred(), "cluster creation %d failed", i)
+					Expect(r.id).NotTo(BeEmpty(), "cluster %d should have a non-empty ID", i)
+					clusterIDs = append(clusterIDs, r.id)
+					ginkgo.GinkgoWriter.Printf("Created cluster %d: ID=%s, Name=%s\n", i, r.id, r.name)
+				}
+
+				// Verify all cluster IDs are unique
+				idSet := make(map[string]bool, len(clusterIDs))
+				for _, id := range clusterIDs {
+					Expect(idSet[id]).To(BeFalse(), "duplicate cluster ID detected: %s", id)
+					idSet[id] = true
+				}
+
+				ginkgo.By("Wait for all clusters to reach Ready=True and Available=True")
+				for i, clusterID := range clusterIDs {
+					ginkgo.GinkgoWriter.Printf("Waiting for cluster %d (%s) to become Ready...\n", i, clusterID)
+					err := h.WaitForClusterCondition(
+						ctx,
+						clusterID,
+						client.ConditionTypeReady,
+						openapi.ResourceConditionStatusTrue,
+						h.Cfg.Timeouts.Cluster.Ready,
+					)
+					Expect(err).NotTo(HaveOccurred(), "cluster %d (%s) should reach Ready=True", i, clusterID)
+
+					cluster, err := h.Client.GetCluster(ctx, clusterID)
+					Expect(err).NotTo(HaveOccurred(), "failed to get cluster %d (%s)", i, clusterID)
+
+					hasAvailable := h.HasResourceCondition(cluster.Status.Conditions,
+						client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
+					Expect(hasAvailable).To(BeTrue(),
+						"cluster %d (%s) should have Available=True", i, clusterID)
+
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) reached Ready=True, Available=True\n", i, clusterID)
+				}
+
+				ginkgo.By("Verify each cluster has isolated Kubernetes resources (separate namespaces)")
+				for i, clusterID := range clusterIDs {
+					expectedLabels := map[string]string{
+						"hyperfleet.io/cluster-id": clusterID,
+					}
+					err := h.VerifyNamespaceActive(ctx, clusterID, expectedLabels, nil)
+					Expect(err).NotTo(HaveOccurred(),
+						"cluster %d (%s) should have its own active namespace", i, clusterID)
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has isolated namespace\n", i, clusterID)
+				}
+
+				ginkgo.By("Verify all adapter statuses are complete for each cluster")
+				for i, clusterID := range clusterIDs {
+					Eventually(func(g Gomega) {
+						statuses, err := h.Client.GetClusterStatuses(ctx, clusterID)
+						g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses for cluster %d (%s)", i, clusterID)
+						g.Expect(statuses.Items).NotTo(BeEmpty(), "cluster %d (%s) should have adapter statuses", i, clusterID)
+
+						// Build adapter status map
+						adapterMap := make(map[string]openapi.AdapterStatus)
+						for _, adapter := range statuses.Items {
+							adapterMap[adapter.Adapter] = adapter
+						}
+
+						// Verify each required adapter has completed successfully
+						for _, requiredAdapter := range h.Cfg.Adapters.Cluster {
+							adapter, exists := adapterMap[requiredAdapter]
+							g.Expect(exists).To(BeTrue(),
+								"cluster %d (%s): required adapter %s should be present", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Applied=True", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Available=True", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Health=True", i, clusterID, requiredAdapter)
+						}
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())
+
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has all adapter statuses complete\n", i, clusterID)
+				}
+
+				ginkgo.GinkgoWriter.Printf("Successfully validated %d concurrent cluster creations with resource isolation\n", concurrentClusterCount)
+			})
+
+		ginkgo.AfterEach(func(ctx context.Context) {
+			if h == nil || len(clusterIDs) == 0 {
+				return
+			}
+
+			ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs)))
+			var cleanupErrors []error
+			for _, clusterID := range clusterIDs {
+				// Wait for cluster Ready before cleanup to prevent namespace deletion conflicts
+				// Without this, adapters may still be creating resources during cleanup
+				err := h.WaitForClusterCondition(
+					ctx,
+					clusterID,
+					client.ConditionTypeReady,
+					openapi.ResourceConditionStatusTrue,
+					h.Cfg.Timeouts.Cluster.Ready,
+				)
+				if err != nil {
+					ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
+				}
+
+				ginkgo.By("cleaning up cluster " + clusterID)
+				if err := h.CleanupTestCluster(ctx, clusterID); err != nil {
+					ginkgo.GinkgoWriter.Printf("ERROR: failed to cleanup cluster %s: %v\n", clusterID, err)
+					cleanupErrors = append(cleanupErrors, err)
+				}
+			}
+			Expect(cleanupErrors).To(BeEmpty(), "some clusters failed to cleanup")
+		})
+	},
+)
diff --git a/e2e/nodepool/concurrent_creation.go b/e2e/nodepool/concurrent_creation.go
@@ -0,0 +1,201 @@
+package nodepool
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability
+
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
+)
+
+const concurrentNodePoolCount = 3
+
+var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can coexist under same cluster without conflicts",
+	ginkgo.Label(labels.Tier1),
+	func() {
+		var h *helper.Helper
+		var clusterID string
+		var nodepoolIDs []string
+
+		ginkgo.BeforeEach(func(ctx context.Context) {
+			h = helper.New()
+			nodepoolIDs = nil
+
+			// Get or create a cluster for nodepool tests
+			var err error
+			clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
+			Expect(err).NotTo(HaveOccurred(), "failed to get test cluster")
+			ginkgo.GinkgoWriter.Printf("Using cluster ID: %s\n", clusterID)
+		})
+
+		ginkgo.It("should create multiple nodepools under the same cluster and all reach Ready state with isolated resources",
+			func(ctx context.Context) {
+				ginkgo.By(fmt.Sprintf("Submit %d nodepool creation requests simultaneously", concurrentNodePoolCount))
+
+				type nodepoolResult struct {
+					id   string
+					name string
+					err  error
+				}
+
+				results := make([]nodepoolResult, concurrentNodePoolCount)
+				var wg sync.WaitGroup
+				wg.Add(concurrentNodePoolCount)
+
+				for i := 0; i < concurrentNodePoolCount; i++ {
+					go func(idx int) {
+						defer wg.Done()
+						defer ginkgo.GinkgoRecover()
+
+						nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json"))
+						if err != nil {
+							results[idx] = nodepoolResult{err: fmt.Errorf("failed to create nodepool %d: %w", idx, err)}
+							return
+						}
+						if nodepool.Id == nil {
+							results[idx] = nodepoolResult{err: fmt.Errorf("nodepool %d has nil ID", idx)}
+							return
+						}
+						results[idx] = nodepoolResult{
+							id:   *nodepool.Id,
+							name: nodepool.Name,
+						}
+					}(i)
+				}
+				wg.Wait()
+
+				// Verify all creations succeeded and collect IDs
+				for i, r := range results {
+					Expect(r.err).NotTo(HaveOccurred(), "nodepool creation %d failed", i)
+					Expect(r.id).NotTo(BeEmpty(), "nodepool %d should have a non-empty ID", i)
+					nodepoolIDs = append(nodepoolIDs, r.id)
+					ginkgo.GinkgoWriter.Printf("Created nodepool %d: ID=%s, Name=%s\n", i, r.id, r.name)
+				}
+
+				// Verify all nodepool IDs are unique
+				idSet := make(map[string]bool, len(nodepoolIDs))
+				for _, id := range nodepoolIDs {
+					Expect(idSet[id]).To(BeFalse(), "duplicate nodepool ID detected: %s", id)
+					idSet[id] = true
+				}
+
+				ginkgo.By("Verify all nodepools appear in the list API")
+				nodepoolList, err := h.Client.ListNodePools(ctx, clusterID)
+				Expect(err).NotTo(HaveOccurred(), "failed to list nodepools")
+
+				listedIDs := make(map[string]bool)
+				for _, np := range nodepoolList.Items {
+					if np.Id != nil {
+						listedIDs[*np.Id] = true
+					}
+				}
+				for i, npID := range nodepoolIDs {
+					Expect(listedIDs[npID]).To(BeTrue(),
+						"nodepool %d (%s) should appear in the list API", i, npID)
+				}
+				ginkgo.GinkgoWriter.Printf("All %d nodepools found in list API\n", concurrentNodePoolCount)
+
+				ginkgo.By("Wait for all nodepools to reach Ready=True and Available=True")
+				for i, npID := range nodepoolIDs {
+					ginkgo.GinkgoWriter.Printf("Waiting for nodepool %d (%s) to become Ready...\n", i, npID)
+					err := h.WaitForNodePoolCondition(
+						ctx,
+						clusterID,
+						npID,
+						client.ConditionTypeReady,
+						openapi.ResourceConditionStatusTrue,
+						h.Cfg.Timeouts.NodePool.Ready,
+					)
+					Expect(err).NotTo(HaveOccurred(), "nodepool %d (%s) should reach Ready=True", i, npID)
+
+					np, err := h.Client.GetNodePool(ctx, clusterID, npID)
+					Expect(err).NotTo(HaveOccurred(), "failed to get nodepool %d (%s)", i, npID)
+
+					hasAvailable := h.HasResourceCondition(np.Status.Conditions,
+						client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
+					Expect(hasAvailable).To(BeTrue(),
+						"nodepool %d (%s) should have Available=True", i, npID)
+
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) reached Ready=True, Available=True\n", i, npID)
+				}
+
+				ginkgo.By("Verify Kubernetes resources are isolated per nodepool")
+				for i, npID := range nodepoolIDs {
+					expectedLabels := map[string]string{
+						"hyperfleet.io/cluster-id":  clusterID,
+						"hyperfleet.io/nodepool-id": npID,
+					}
+					Eventually(func() error {
+						return h.VerifyConfigMap(ctx, clusterID, expectedLabels, nil)
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed(),
+						"nodepool %d (%s) should have its own configmap resource", i, npID)
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has isolated K8s resources\n", i, npID)
+				}
+
+				ginkgo.By("Verify all adapter statuses are complete for each nodepool")
+				for i, npID := range nodepoolIDs {
+					Eventually(func(g Gomega) {
+						statuses, err := h.Client.GetNodePoolStatuses(ctx, clusterID, npID)
+						g.Expect(err).NotTo(HaveOccurred(), "failed to get nodepool statuses for nodepool %d (%s)", i, npID)
+						g.Expect(statuses.Items).NotTo(BeEmpty(), "nodepool %d (%s) should have adapter statuses", i, npID)
+
+						adapterMap := make(map[string]openapi.AdapterStatus)
+						for _, adapter := range statuses.Items {
+							adapterMap[adapter.Adapter] = adapter
+						}
+
+						for _, requiredAdapter := range h.Cfg.Adapters.NodePool {
+							adapter, exists := adapterMap[requiredAdapter]
+							g.Expect(exists).To(BeTrue(),
+								"nodepool %d (%s): required adapter %s should be present", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Applied=True", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Available=True", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Health=True", i, npID, requiredAdapter)
+						}
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())
+
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has all adapter statuses complete\n", i, npID)
+				}
+
+				ginkgo.GinkgoWriter.Printf("Successfully validated %d nodepools coexisting under cluster %s with resource isolation\n",
+					concurrentNodePoolCount, clusterID)
+			})
+
+		ginkgo.AfterEach(func(ctx context.Context) {
+			if h == nil || clusterID == "" {
+				return
+			}
+
+			ginkgo.By("Verify final cluster state to ensure Ready before cleanup")
+			err := h.WaitForClusterCondition(
+				ctx,
+				clusterID,
+				client.ConditionTypeReady,
+				openapi.ResourceConditionStatusTrue,
+				h.Cfg.Timeouts.Cluster.Ready,
+			)
+			if err != nil {
+				ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
+			}
+
+			ginkgo.By("cleaning up test cluster " + clusterID)
+			err = h.CleanupTestCluster(ctx, clusterID)
+			Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID)
+		})
+	},
+)