From d1c93b56141ec593b5aa59231935d9be1c612d36 Mon Sep 17 00:00:00 2001 From: Tongtong Zhou Date: Tue, 24 Mar 2026 13:48:53 +0800 Subject: [PATCH 1/2] HYPERFLEET-757 HYPERFLEET-758 - test: automate concurrent processing E2E tests --- deploy-scripts/lib/sentinel.sh | 6 +- e2e/cluster/concurrent_creation.go | 180 ++++++++++++++++ e2e/nodepool/concurrent_creation.go | 201 ++++++++++++++++++ .../testcases/concurrent-processing.md | 17 +- 4 files changed, 395 insertions(+), 9 deletions(-) create mode 100644 e2e/cluster/concurrent_creation.go create mode 100644 e2e/nodepool/concurrent_creation.go diff --git a/deploy-scripts/lib/sentinel.sh b/deploy-scripts/lib/sentinel.sh index 1127db0..b04da41 100755 --- a/deploy-scripts/lib/sentinel.sh +++ b/deploy-scripts/lib/sentinel.sh @@ -64,7 +64,11 @@ install_sentinel_instance() { # This enables the sentinel to include ownerReferences from the Kubernetes resource # in the message data sent to adapters, which is required for nodepools management if [[ "${resource_type}" == "nodepools" ]]; then - helm_cmd+=(--set "config.messageData.owner_references=resource.owner_references") + helm_cmd+=( + --set "config.messageData.owner_references.id=resource.owner_references.id" + --set "config.messageData.owner_references.href=resource.owner_references.href" + --set "config.messageData.owner_references.kind=resource.owner_references.kind" + ) fi log_info "Executing: ${helm_cmd[*]}" diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go new file mode 100644 index 0000000..b3fc8c4 --- /dev/null +++ b/e2e/cluster/concurrent_creation.go @@ -0,0 +1,180 @@ +package cluster + +import ( + "context" + "fmt" + "sync" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +const concurrentClusterCount = 5 + +var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurrent cluster creations without resource conflicts", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterIDs []string + + ginkgo.BeforeEach(func() { + h = helper.New() + clusterIDs = nil + }) + + ginkgo.It("should create multiple clusters concurrently and all reach Ready state with isolated resources", + func(ctx context.Context) { + ginkgo.By(fmt.Sprintf("Submit %d cluster creation requests simultaneously", concurrentClusterCount)) + + type clusterResult struct { + id string + name string + err error + } + + results := make([]clusterResult, concurrentClusterCount) + var wg sync.WaitGroup + wg.Add(concurrentClusterCount) + + for i := 0; i < concurrentClusterCount; i++ { + go func(idx int) { + defer wg.Done() + defer ginkgo.GinkgoRecover() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + if err != nil { + results[idx] = clusterResult{err: fmt.Errorf("failed to create cluster %d: %w", idx, err)} + return + } + if cluster.Id == nil { + results[idx] = clusterResult{err: fmt.Errorf("cluster %d has nil ID", idx)} + return + } + results[idx] = clusterResult{ + id: *cluster.Id, + name: cluster.Name, + } + }(i) + } + wg.Wait() + + // Verify all creations succeeded and collect IDs + for i, r := range results { + Expect(r.err).NotTo(HaveOccurred(), "cluster creation %d failed", i) + Expect(r.id).NotTo(BeEmpty(), "cluster %d should have a non-empty ID", i) + clusterIDs = append(clusterIDs, r.id) + ginkgo.GinkgoWriter.Printf("Created cluster %d: ID=%s, Name=%s\n", i, r.id, r.name) + } + + // Verify all cluster IDs are unique + idSet := make(map[string]bool, len(clusterIDs)) + for _, id := range clusterIDs { + Expect(idSet[id]).To(BeFalse(), "duplicate cluster ID detected: %s", id) + idSet[id] = true + } + + ginkgo.By("Wait for all clusters to reach Ready=True and Available=True") + for i, clusterID := range clusterIDs { + ginkgo.GinkgoWriter.Printf("Waiting for cluster %d (%s) to become Ready...\n", i, clusterID) + err := h.WaitForClusterCondition( + ctx, + clusterID, + client.ConditionTypeReady, + openapi.ResourceConditionStatusTrue, + h.Cfg.Timeouts.Cluster.Ready, + ) + Expect(err).NotTo(HaveOccurred(), "cluster %d (%s) should reach Ready=True", i, clusterID) + + cluster, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "failed to get cluster %d (%s)", i, clusterID) + + hasAvailable := h.HasResourceCondition(cluster.Status.Conditions, + client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue) + Expect(hasAvailable).To(BeTrue(), + "cluster %d (%s) should have Available=True", i, clusterID) + + ginkgo.GinkgoWriter.Printf("Cluster %d (%s) reached Ready=True, Available=True\n", i, clusterID) + } + + ginkgo.By("Verify each cluster has isolated Kubernetes resources (separate namespaces)") + for i, clusterID := range clusterIDs { + expectedLabels := map[string]string{ + "hyperfleet.io/cluster-id": clusterID, + } + err := h.VerifyNamespaceActive(ctx, clusterID, expectedLabels, nil) + Expect(err).NotTo(HaveOccurred(), + "cluster %d (%s) should have its own active namespace", i, clusterID) + ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has isolated namespace\n", i, clusterID) + } + + ginkgo.By("Verify all adapter statuses are complete for each cluster") + for i, clusterID := range clusterIDs { + Eventually(func(g Gomega) { + statuses, err := h.Client.GetClusterStatuses(ctx, clusterID) + g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses for cluster %d (%s)", i, clusterID) + g.Expect(statuses.Items).NotTo(BeEmpty(), "cluster %d (%s) should have adapter statuses", i, clusterID) + + // Build adapter status map + adapterMap := make(map[string]openapi.AdapterStatus) + for _, adapter := range statuses.Items { + adapterMap[adapter.Adapter] = adapter + } + + // Verify each required adapter has completed successfully + for _, requiredAdapter := range h.Cfg.Adapters.Cluster { + adapter, exists := adapterMap[requiredAdapter] + g.Expect(exists).To(BeTrue(), + "cluster %d (%s): required adapter %s should be present", i, clusterID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "cluster %d (%s): adapter %s should have Applied=True", i, clusterID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "cluster %d (%s): adapter %s should have Available=True", i, clusterID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "cluster %d (%s): adapter %s should have Health=True", i, clusterID, requiredAdapter) + } + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has all adapter statuses complete\n", i, clusterID) + } + + ginkgo.GinkgoWriter.Printf("Successfully validated %d concurrent cluster creations with resource isolation\n", concurrentClusterCount) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || len(clusterIDs) == 0 { + return + } + + ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs))) + for _, clusterID := range clusterIDs { + // Wait for cluster Ready before cleanup to prevent namespace deletion conflicts + // Without this, adapters may still be creating resources during cleanup + err := h.WaitForClusterCondition( + ctx, + clusterID, + client.ConditionTypeReady, + openapi.ResourceConditionStatusTrue, + h.Cfg.Timeouts.Cluster.Ready, + ) + if err != nil { + ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err) + } + + ginkgo.By("cleaning up cluster " + clusterID) + err = h.CleanupTestCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID) + } + }) + }, +) diff --git a/e2e/nodepool/concurrent_creation.go b/e2e/nodepool/concurrent_creation.go new file mode 100644 index 0000000..22b05f4 --- /dev/null +++ b/e2e/nodepool/concurrent_creation.go @@ -0,0 +1,201 @@ +package nodepool + +import ( + "context" + "fmt" + "sync" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +const concurrentNodePoolCount = 3 + +var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can coexist under same cluster without conflicts", + ginkgo.Label(labels.Tier1), + func() { + var h *helper.Helper + var clusterID string + var nodepoolIDs []string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + nodepoolIDs = nil + + // Get or create a cluster for nodepool tests + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred(), "failed to get test cluster") + ginkgo.GinkgoWriter.Printf("Using cluster ID: %s\n", clusterID) + }) + + ginkgo.It("should create multiple nodepools under the same cluster and all reach Ready state with isolated resources", + func(ctx context.Context) { + ginkgo.By(fmt.Sprintf("Submit %d nodepool creation requests simultaneously", concurrentNodePoolCount)) + + type nodepoolResult struct { + id string + name string + err error + } + + results := make([]nodepoolResult, concurrentNodePoolCount) + var wg sync.WaitGroup + wg.Add(concurrentNodePoolCount) + + for i := 0; i < concurrentNodePoolCount; i++ { + go func(idx int) { + defer wg.Done() + defer ginkgo.GinkgoRecover() + + nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + if err != nil { + results[idx] = nodepoolResult{err: fmt.Errorf("failed to create nodepool %d: %w", idx, err)} + return + } + if nodepool.Id == nil { + results[idx] = nodepoolResult{err: fmt.Errorf("nodepool %d has nil ID", idx)} + return + } + results[idx] = nodepoolResult{ + id: *nodepool.Id, + name: nodepool.Name, + } + }(i) + } + wg.Wait() + + // Verify all creations succeeded and collect IDs + for i, r := range results { + Expect(r.err).NotTo(HaveOccurred(), "nodepool creation %d failed", i) + Expect(r.id).NotTo(BeEmpty(), "nodepool %d should have a non-empty ID", i) + nodepoolIDs = append(nodepoolIDs, r.id) + ginkgo.GinkgoWriter.Printf("Created nodepool %d: ID=%s, Name=%s\n", i, r.id, r.name) + } + + // Verify all nodepool IDs are unique + idSet := make(map[string]bool, len(nodepoolIDs)) + for _, id := range nodepoolIDs { + Expect(idSet[id]).To(BeFalse(), "duplicate nodepool ID detected: %s", id) + idSet[id] = true + } + + ginkgo.By("Verify all nodepools appear in the list API") + nodepoolList, err := h.Client.ListNodePools(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "failed to list nodepools") + + listedIDs := make(map[string]bool) + for _, np := range nodepoolList.Items { + if np.Id != nil { + listedIDs[*np.Id] = true + } + } + for i, npID := range nodepoolIDs { + Expect(listedIDs[npID]).To(BeTrue(), + "nodepool %d (%s) should appear in the list API", i, npID) + } + ginkgo.GinkgoWriter.Printf("All %d nodepools found in list API\n", concurrentNodePoolCount) + + ginkgo.By("Wait for all nodepools to reach Ready=True and Available=True") + for i, npID := range nodepoolIDs { + ginkgo.GinkgoWriter.Printf("Waiting for nodepool %d (%s) to become Ready...\n", i, npID) + err := h.WaitForNodePoolCondition( + ctx, + clusterID, + npID, + client.ConditionTypeReady, + openapi.ResourceConditionStatusTrue, + h.Cfg.Timeouts.NodePool.Ready, + ) + Expect(err).NotTo(HaveOccurred(), "nodepool %d (%s) should reach Ready=True", i, npID) + + np, err := h.Client.GetNodePool(ctx, clusterID, npID) + Expect(err).NotTo(HaveOccurred(), "failed to get nodepool %d (%s)", i, npID) + + hasAvailable := h.HasResourceCondition(np.Status.Conditions, + client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue) + Expect(hasAvailable).To(BeTrue(), + "nodepool %d (%s) should have Available=True", i, npID) + + ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) reached Ready=True, Available=True\n", i, npID) + } + + ginkgo.By("Verify Kubernetes resources are isolated per nodepool") + for i, npID := range nodepoolIDs { + expectedLabels := map[string]string{ + "hyperfleet.io/cluster-id": clusterID, + "hyperfleet.io/nodepool-id": npID, + } + Eventually(func() error { + return h.VerifyConfigMap(ctx, clusterID, expectedLabels, nil) + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed(), + "nodepool %d (%s) should have its own configmap resource", i, npID) + ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has isolated K8s resources\n", i, npID) + } + + ginkgo.By("Verify all adapter statuses are complete for each nodepool") + for i, npID := range nodepoolIDs { + Eventually(func(g Gomega) { + statuses, err := h.Client.GetNodePoolStatuses(ctx, clusterID, npID) + g.Expect(err).NotTo(HaveOccurred(), "failed to get nodepool statuses for nodepool %d (%s)", i, npID) + g.Expect(statuses.Items).NotTo(BeEmpty(), "nodepool %d (%s) should have adapter statuses", i, npID) + + adapterMap := make(map[string]openapi.AdapterStatus) + for _, adapter := range statuses.Items { + adapterMap[adapter.Adapter] = adapter + } + + for _, requiredAdapter := range h.Cfg.Adapters.NodePool { + adapter, exists := adapterMap[requiredAdapter] + g.Expect(exists).To(BeTrue(), + "nodepool %d (%s): required adapter %s should be present", i, npID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "nodepool %d (%s): adapter %s should have Applied=True", i, npID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "nodepool %d (%s): adapter %s should have Available=True", i, npID, requiredAdapter) + + g.Expect(h.HasAdapterCondition(adapter.Conditions, + client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(), + "nodepool %d (%s): adapter %s should have Health=True", i, npID, requiredAdapter) + } + }, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed()) + + ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has all adapter statuses complete\n", i, npID) + } + + ginkgo.GinkgoWriter.Printf("Successfully validated %d nodepools coexisting under cluster %s with resource isolation\n", + concurrentNodePoolCount, clusterID) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + if h == nil || clusterID == "" { + return + } + + ginkgo.By("Verify final cluster state to ensure Ready before cleanup") + err := h.WaitForClusterCondition( + ctx, + clusterID, + client.ConditionTypeReady, + openapi.ResourceConditionStatusTrue, + h.Cfg.Timeouts.Cluster.Ready, + ) + if err != nil { + ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err) + } + + ginkgo.By("cleaning up test cluster " + clusterID) + err = h.CleanupTestCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID) + }) + }, +) diff --git a/test-design/testcases/concurrent-processing.md b/test-design/testcases/concurrent-processing.md index 259d197..17f7d88 100644 --- a/test-design/testcases/concurrent-processing.md +++ b/test-design/testcases/concurrent-processing.md @@ -19,11 +19,11 @@ This test validates that the system can handle multiple cluster creation request |-----------|-----------| | **Pos/Neg** | Positive | | **Priority** | Tier1 | -| **Status** | Draft | -| **Automation** | Not Automated | +| **Status** | Automated | +| **Automation** | Automated | | **Version** | MVP | | **Created** | 2026-02-11 | -| **Updated** | 2026-02-11 | +| **Updated** | 2026-03-20 | --- @@ -116,11 +116,11 @@ This test validates that multiple nodepools can be created under the same cluste |-----------|---------------| | **Pos/Neg** | Positive | | **Priority** | Tier1 | -| **Status** | Draft | -| **Automation** | Not Automated | +| **Status** | Automated | +| **Automation** | Automated | | **Version** | MVP | | **Created** | 2026-02-11 | -| **Updated** | 2026-03-04 | +| **Updated** | 2026-03-24 | --- @@ -139,13 +139,14 @@ This test validates that multiple nodepools can be created under the same cluste #### Step 1: Create multiple nodepools under the same cluster **Action:** -- Submit 3 POST requests to create NodePool resources (each call generates a unique name via `{{.Random}}` template): +- Submit 3 POST requests in parallel to create NodePool resources (each call generates a unique name via `{{.Random}}` template): ```bash for i in 1 2 3; do curl -X POST ${API_URL}/api/hyperfleet/v1/clusters/{cluster_id}/nodepools \ -H "Content-Type: application/json" \ - -d @testdata/payloads/nodepools/nodepool-request.json + -d @testdata/payloads/nodepools/nodepool-request.json & done +wait ``` **Expected Result:** From 62468e23b08dd33f01ef38422ec2cbbf9c27b4a2 Mon Sep 17 00:00:00 2001 From: Tongtong Zhou Date: Tue, 24 Mar 2026 14:05:08 +0800 Subject: [PATCH 2/2] ensure all clusters are cleaned up even if one fails --- e2e/cluster/concurrent_creation.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/e2e/cluster/concurrent_creation.go b/e2e/cluster/concurrent_creation.go index b3fc8c4..3d924b4 100644 --- a/e2e/cluster/concurrent_creation.go +++ b/e2e/cluster/concurrent_creation.go @@ -157,6 +157,7 @@ var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurr } ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs))) + var cleanupErrors []error for _, clusterID := range clusterIDs { // Wait for cluster Ready before cleanup to prevent namespace deletion conflicts // Without this, adapters may still be creating resources during cleanup @@ -172,9 +173,12 @@ var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurr } ginkgo.By("cleaning up cluster " + clusterID) - err = h.CleanupTestCluster(ctx, clusterID) - Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID) + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("ERROR: failed to cleanup cluster %s: %v\n", clusterID, err) + cleanupErrors = append(cleanupErrors, err) + } } + Expect(cleanupErrors).To(BeEmpty(), "some clusters failed to cleanup") }) }, )