From d1c93b56141ec593b5aa59231935d9be1c612d36 Mon Sep 17 00:00:00 2001
From: Tongtong Zhou <tzhou@redhat.com>
Date: Tue, 24 Mar 2026 13:48:53 +0800
Subject: [PATCH 1/2] HYPERFLEET-757 HYPERFLEET-758 - test: automate concurrent
 processing E2E tests

---
 deploy-scripts/lib/sentinel.sh                |   6 +-
 e2e/cluster/concurrent_creation.go            | 180 ++++++++++++++++
 e2e/nodepool/concurrent_creation.go           | 201 ++++++++++++++++++
 .../testcases/concurrent-processing.md        |  17 +-
 4 files changed, 395 insertions(+), 9 deletions(-)
 create mode 100644 e2e/cluster/concurrent_creation.go
 create mode 100644 e2e/nodepool/concurrent_creation.go

diff --git a/deploy-scripts/lib/sentinel.sh b/deploy-scripts/lib/sentinel.sh
index 1127db0..b04da41 100755
--- a/deploy-scripts/lib/sentinel.sh
+++ b/deploy-scripts/lib/sentinel.sh
@@ -64,7 +64,11 @@ install_sentinel_instance() {
     # This enables the sentinel to include ownerReferences from the Kubernetes resource
     # in the message data sent to adapters, which is required for nodepools management
     if [[ "${resource_type}" == "nodepools" ]]; then
-        helm_cmd+=(--set "config.messageData.owner_references=resource.owner_references")
+        helm_cmd+=(
+            --set "config.messageData.owner_references.id=resource.owner_references.id"
+            --set "config.messageData.owner_references.href=resource.owner_references.href"
+            --set "config.messageData.owner_references.kind=resource.owner_references.kind"
+        )
     fi
 
     log_info "Executing: ${helm_cmd[*]}"
diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go
new file mode 100644
index 0000000..b3fc8c4
--- /dev/null
+++ b/e2e/cluster/concurrent_creation.go
@@ -0,0 +1,180 @@
+package cluster
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability
+
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
+)
+
+const concurrentClusterCount = 5
+
+var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurrent cluster creations without resource conflicts",
+	ginkgo.Label(labels.Tier1),
+	func() {
+		var h *helper.Helper
+		var clusterIDs []string
+
+		ginkgo.BeforeEach(func() {
+			h = helper.New()
+			clusterIDs = nil
+		})
+
+		ginkgo.It("should create multiple clusters concurrently and all reach Ready state with isolated resources",
+			func(ctx context.Context) {
+				ginkgo.By(fmt.Sprintf("Submit %d cluster creation requests simultaneously", concurrentClusterCount))
+
+				type clusterResult struct {
+					id   string
+					name string
+					err  error
+				}
+
+				results := make([]clusterResult, concurrentClusterCount)
+				var wg sync.WaitGroup
+				wg.Add(concurrentClusterCount)
+
+				for i := 0; i < concurrentClusterCount; i++ {
+					go func(idx int) {
+						defer wg.Done()
+						defer ginkgo.GinkgoRecover()
+
+						cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
+						if err != nil {
+							results[idx] = clusterResult{err: fmt.Errorf("failed to create cluster %d: %w", idx, err)}
+							return
+						}
+						if cluster.Id == nil {
+							results[idx] = clusterResult{err: fmt.Errorf("cluster %d has nil ID", idx)}
+							return
+						}
+						results[idx] = clusterResult{
+							id:   *cluster.Id,
+							name: cluster.Name,
+						}
+					}(i)
+				}
+				wg.Wait()
+
+				// Verify all creations succeeded and collect IDs
+				for i, r := range results {
+					Expect(r.err).NotTo(HaveOccurred(), "cluster creation %d failed", i)
+					Expect(r.id).NotTo(BeEmpty(), "cluster %d should have a non-empty ID", i)
+					clusterIDs = append(clusterIDs, r.id)
+					ginkgo.GinkgoWriter.Printf("Created cluster %d: ID=%s, Name=%s\n", i, r.id, r.name)
+				}
+
+				// Verify all cluster IDs are unique
+				idSet := make(map[string]bool, len(clusterIDs))
+				for _, id := range clusterIDs {
+					Expect(idSet[id]).To(BeFalse(), "duplicate cluster ID detected: %s", id)
+					idSet[id] = true
+				}
+
+				ginkgo.By("Wait for all clusters to reach Ready=True and Available=True")
+				for i, clusterID := range clusterIDs {
+					ginkgo.GinkgoWriter.Printf("Waiting for cluster %d (%s) to become Ready...\n", i, clusterID)
+					err := h.WaitForClusterCondition(
+						ctx,
+						clusterID,
+						client.ConditionTypeReady,
+						openapi.ResourceConditionStatusTrue,
+						h.Cfg.Timeouts.Cluster.Ready,
+					)
+					Expect(err).NotTo(HaveOccurred(), "cluster %d (%s) should reach Ready=True", i, clusterID)
+
+					cluster, err := h.Client.GetCluster(ctx, clusterID)
+					Expect(err).NotTo(HaveOccurred(), "failed to get cluster %d (%s)", i, clusterID)
+
+					hasAvailable := h.HasResourceCondition(cluster.Status.Conditions,
+						client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
+					Expect(hasAvailable).To(BeTrue(),
+						"cluster %d (%s) should have Available=True", i, clusterID)
+
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) reached Ready=True, Available=True\n", i, clusterID)
+				}
+
+				ginkgo.By("Verify each cluster has isolated Kubernetes resources (separate namespaces)")
+				for i, clusterID := range clusterIDs {
+					expectedLabels := map[string]string{
+						"hyperfleet.io/cluster-id": clusterID,
+					}
+					err := h.VerifyNamespaceActive(ctx, clusterID, expectedLabels, nil)
+					Expect(err).NotTo(HaveOccurred(),
+						"cluster %d (%s) should have its own active namespace", i, clusterID)
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has isolated namespace\n", i, clusterID)
+				}
+
+				ginkgo.By("Verify all adapter statuses are complete for each cluster")
+				for i, clusterID := range clusterIDs {
+					Eventually(func(g Gomega) {
+						statuses, err := h.Client.GetClusterStatuses(ctx, clusterID)
+						g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses for cluster %d (%s)", i, clusterID)
+						g.Expect(statuses.Items).NotTo(BeEmpty(), "cluster %d (%s) should have adapter statuses", i, clusterID)
+
+						// Build adapter status map
+						adapterMap := make(map[string]openapi.AdapterStatus)
+						for _, adapter := range statuses.Items {
+							adapterMap[adapter.Adapter] = adapter
+						}
+
+						// Verify each required adapter has completed successfully
+						for _, requiredAdapter := range h.Cfg.Adapters.Cluster {
+							adapter, exists := adapterMap[requiredAdapter]
+							g.Expect(exists).To(BeTrue(),
+								"cluster %d (%s): required adapter %s should be present", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Applied=True", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Available=True", i, clusterID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"cluster %d (%s): adapter %s should have Health=True", i, clusterID, requiredAdapter)
+						}
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())
+
+					ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has all adapter statuses complete\n", i, clusterID)
+				}
+
+				ginkgo.GinkgoWriter.Printf("Successfully validated %d concurrent cluster creations with resource isolation\n", concurrentClusterCount)
+			})
+
+		ginkgo.AfterEach(func(ctx context.Context) {
+			if h == nil || len(clusterIDs) == 0 {
+				return
+			}
+
+			ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs)))
+			for _, clusterID := range clusterIDs {
+				// Wait for cluster Ready before cleanup to prevent namespace deletion conflicts
+				// Without this, adapters may still be creating resources during cleanup
+				err := h.WaitForClusterCondition(
+					ctx,
+					clusterID,
+					client.ConditionTypeReady,
+					openapi.ResourceConditionStatusTrue,
+					h.Cfg.Timeouts.Cluster.Ready,
+				)
+				if err != nil {
+					ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
+				}
+
+				ginkgo.By("cleaning up cluster " + clusterID)
+				err = h.CleanupTestCluster(ctx, clusterID)
+				Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID)
+			}
+		})
+	},
+)
diff --git a/e2e/nodepool/concurrent_creation.go b/e2e/nodepool/concurrent_creation.go
new file mode 100644
index 0000000..22b05f4
--- /dev/null
+++ b/e2e/nodepool/concurrent_creation.go
@@ -0,0 +1,201 @@
+package nodepool
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability
+
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
+	"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
+)
+
+const concurrentNodePoolCount = 3
+
+var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can coexist under same cluster without conflicts",
+	ginkgo.Label(labels.Tier1),
+	func() {
+		var h *helper.Helper
+		var clusterID string
+		var nodepoolIDs []string
+
+		ginkgo.BeforeEach(func(ctx context.Context) {
+			h = helper.New()
+			nodepoolIDs = nil
+
+			// Get or create a cluster for nodepool tests
+			var err error
+			clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
+			Expect(err).NotTo(HaveOccurred(), "failed to get test cluster")
+			ginkgo.GinkgoWriter.Printf("Using cluster ID: %s\n", clusterID)
+		})
+
+		ginkgo.It("should create multiple nodepools under the same cluster and all reach Ready state with isolated resources",
+			func(ctx context.Context) {
+				ginkgo.By(fmt.Sprintf("Submit %d nodepool creation requests simultaneously", concurrentNodePoolCount))
+
+				type nodepoolResult struct {
+					id   string
+					name string
+					err  error
+				}
+
+				results := make([]nodepoolResult, concurrentNodePoolCount)
+				var wg sync.WaitGroup
+				wg.Add(concurrentNodePoolCount)
+
+				for i := 0; i < concurrentNodePoolCount; i++ {
+					go func(idx int) {
+						defer wg.Done()
+						defer ginkgo.GinkgoRecover()
+
+						nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json"))
+						if err != nil {
+							results[idx] = nodepoolResult{err: fmt.Errorf("failed to create nodepool %d: %w", idx, err)}
+							return
+						}
+						if nodepool.Id == nil {
+							results[idx] = nodepoolResult{err: fmt.Errorf("nodepool %d has nil ID", idx)}
+							return
+						}
+						results[idx] = nodepoolResult{
+							id:   *nodepool.Id,
+							name: nodepool.Name,
+						}
+					}(i)
+				}
+				wg.Wait()
+
+				// Verify all creations succeeded and collect IDs
+				for i, r := range results {
+					Expect(r.err).NotTo(HaveOccurred(), "nodepool creation %d failed", i)
+					Expect(r.id).NotTo(BeEmpty(), "nodepool %d should have a non-empty ID", i)
+					nodepoolIDs = append(nodepoolIDs, r.id)
+					ginkgo.GinkgoWriter.Printf("Created nodepool %d: ID=%s, Name=%s\n", i, r.id, r.name)
+				}
+
+				// Verify all nodepool IDs are unique
+				idSet := make(map[string]bool, len(nodepoolIDs))
+				for _, id := range nodepoolIDs {
+					Expect(idSet[id]).To(BeFalse(), "duplicate nodepool ID detected: %s", id)
+					idSet[id] = true
+				}
+
+				ginkgo.By("Verify all nodepools appear in the list API")
+				nodepoolList, err := h.Client.ListNodePools(ctx, clusterID)
+				Expect(err).NotTo(HaveOccurred(), "failed to list nodepools")
+
+				listedIDs := make(map[string]bool)
+				for _, np := range nodepoolList.Items {
+					if np.Id != nil {
+						listedIDs[*np.Id] = true
+					}
+				}
+				for i, npID := range nodepoolIDs {
+					Expect(listedIDs[npID]).To(BeTrue(),
+						"nodepool %d (%s) should appear in the list API", i, npID)
+				}
+				ginkgo.GinkgoWriter.Printf("All %d nodepools found in list API\n", concurrentNodePoolCount)
+
+				ginkgo.By("Wait for all nodepools to reach Ready=True and Available=True")
+				for i, npID := range nodepoolIDs {
+					ginkgo.GinkgoWriter.Printf("Waiting for nodepool %d (%s) to become Ready...\n", i, npID)
+					err := h.WaitForNodePoolCondition(
+						ctx,
+						clusterID,
+						npID,
+						client.ConditionTypeReady,
+						openapi.ResourceConditionStatusTrue,
+						h.Cfg.Timeouts.NodePool.Ready,
+					)
+					Expect(err).NotTo(HaveOccurred(), "nodepool %d (%s) should reach Ready=True", i, npID)
+
+					np, err := h.Client.GetNodePool(ctx, clusterID, npID)
+					Expect(err).NotTo(HaveOccurred(), "failed to get nodepool %d (%s)", i, npID)
+
+					hasAvailable := h.HasResourceCondition(np.Status.Conditions,
+						client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
+					Expect(hasAvailable).To(BeTrue(),
+						"nodepool %d (%s) should have Available=True", i, npID)
+
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) reached Ready=True, Available=True\n", i, npID)
+				}
+
+				ginkgo.By("Verify Kubernetes resources are isolated per nodepool")
+				for i, npID := range nodepoolIDs {
+					expectedLabels := map[string]string{
+						"hyperfleet.io/cluster-id":  clusterID,
+						"hyperfleet.io/nodepool-id": npID,
+					}
+					Eventually(func() error {
+						return h.VerifyConfigMap(ctx, clusterID, expectedLabels, nil)
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed(),
+						"nodepool %d (%s) should have its own configmap resource", i, npID)
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has isolated K8s resources\n", i, npID)
+				}
+
+				ginkgo.By("Verify all adapter statuses are complete for each nodepool")
+				for i, npID := range nodepoolIDs {
+					Eventually(func(g Gomega) {
+						statuses, err := h.Client.GetNodePoolStatuses(ctx, clusterID, npID)
+						g.Expect(err).NotTo(HaveOccurred(), "failed to get nodepool statuses for nodepool %d (%s)", i, npID)
+						g.Expect(statuses.Items).NotTo(BeEmpty(), "nodepool %d (%s) should have adapter statuses", i, npID)
+
+						adapterMap := make(map[string]openapi.AdapterStatus)
+						for _, adapter := range statuses.Items {
+							adapterMap[adapter.Adapter] = adapter
+						}
+
+						for _, requiredAdapter := range h.Cfg.Adapters.NodePool {
+							adapter, exists := adapterMap[requiredAdapter]
+							g.Expect(exists).To(BeTrue(),
+								"nodepool %d (%s): required adapter %s should be present", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Applied=True", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Available=True", i, npID, requiredAdapter)
+
+							g.Expect(h.HasAdapterCondition(adapter.Conditions,
+								client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
+								"nodepool %d (%s): adapter %s should have Health=True", i, npID, requiredAdapter)
+						}
+					}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())
+
+					ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has all adapter statuses complete\n", i, npID)
+				}
+
+				ginkgo.GinkgoWriter.Printf("Successfully validated %d nodepools coexisting under cluster %s with resource isolation\n",
+					concurrentNodePoolCount, clusterID)
+			})
+
+		ginkgo.AfterEach(func(ctx context.Context) {
+			if h == nil || clusterID == "" {
+				return
+			}
+
+			ginkgo.By("Verify final cluster state to ensure Ready before cleanup")
+			err := h.WaitForClusterCondition(
+				ctx,
+				clusterID,
+				client.ConditionTypeReady,
+				openapi.ResourceConditionStatusTrue,
+				h.Cfg.Timeouts.Cluster.Ready,
+			)
+			if err != nil {
+				ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
+			}
+
+			ginkgo.By("cleaning up test cluster " + clusterID)
+			err = h.CleanupTestCluster(ctx, clusterID)
+			Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID)
+		})
+	},
+)
diff --git a/test-design/testcases/concurrent-processing.md b/test-design/testcases/concurrent-processing.md
index 259d197..17f7d88 100644
--- a/test-design/testcases/concurrent-processing.md
+++ b/test-design/testcases/concurrent-processing.md
@@ -19,11 +19,11 @@ This test validates that the system can handle multiple cluster creation request
 |-----------|-----------|
 | **Pos/Neg** | Positive |
 | **Priority** | Tier1 |
-| **Status** | Draft |
-| **Automation** | Not Automated |
+| **Status** | Automated |
+| **Automation** | Automated |
 | **Version** | MVP |
 | **Created** | 2026-02-11 |
-| **Updated** | 2026-02-11 |
+| **Updated** | 2026-03-20 |
 
 
 ---
@@ -116,11 +116,11 @@ This test validates that multiple nodepools can be created under the same cluste
 |-----------|---------------|
 | **Pos/Neg** | Positive      |
 | **Priority** | Tier1         |
-| **Status** | Draft         |
-| **Automation** | Not Automated |
+| **Status** | Automated     |
+| **Automation** | Automated     |
 | **Version** | MVP           |
 | **Created** | 2026-02-11    |
-| **Updated** | 2026-03-04    |
+| **Updated** | 2026-03-24    |
 
 
 ---
@@ -139,13 +139,14 @@ This test validates that multiple nodepools can be created under the same cluste
 
 #### Step 1: Create multiple nodepools under the same cluster
 **Action:**
-- Submit 3 POST requests to create NodePool resources (each call generates a unique name via `{{.Random}}` template):
+- Submit 3 POST requests in parallel to create NodePool resources (each call generates a unique name via `{{.Random}}` template):
 ```bash
 for i in 1 2 3; do
   curl -X POST ${API_URL}/api/hyperfleet/v1/clusters/{cluster_id}/nodepools \
     -H "Content-Type: application/json" \
-    -d @testdata/payloads/nodepools/nodepool-request.json
+    -d @testdata/payloads/nodepools/nodepool-request.json &
 done
+wait
 ```
 
 **Expected Result:**

From 62468e23b08dd33f01ef38422ec2cbbf9c27b4a2 Mon Sep 17 00:00:00 2001
From: Tongtong Zhou <tzhou@redhat.com>
Date: Tue, 24 Mar 2026 14:05:08 +0800
Subject: [PATCH 2/2] ensure all clusters are cleaned up even if one fails

---
 e2e/cluster/concurrent_creation.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go
index b3fc8c4..3d924b4 100644
--- a/e2e/cluster/concurrent_creation.go
+++ b/e2e/cluster/concurrent_creation.go
@@ -157,6 +157,7 @@ var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurr
 			}
 
 			ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs)))
+			var cleanupErrors []error
 			for _, clusterID := range clusterIDs {
 				// Wait for cluster Ready before cleanup to prevent namespace deletion conflicts
 				// Without this, adapters may still be creating resources during cleanup
@@ -172,9 +173,12 @@ var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurr
 				}
 
 				ginkgo.By("cleaning up cluster " + clusterID)
-				err = h.CleanupTestCluster(ctx, clusterID)
-				Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID)
+				if err := h.CleanupTestCluster(ctx, clusterID); err != nil {
+					ginkgo.GinkgoWriter.Printf("ERROR: failed to cleanup cluster %s: %v\n", clusterID, err)
+					cleanupErrors = append(cleanupErrors, err)
+				}
 			}
+			Expect(cleanupErrors).To(BeEmpty(), "some clusters failed to cleanup")
 		})
 	},
 )