Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion deploy-scripts/lib/sentinel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ install_sentinel_instance() {
# This enables the sentinel to include ownerReferences from the Kubernetes resource
# in the message data sent to adapters, which is required for nodepools management
if [[ "${resource_type}" == "nodepools" ]]; then
helm_cmd+=(--set "config.messageData.owner_references=resource.owner_references")
helm_cmd+=(
--set "config.messageData.owner_references.id=resource.owner_references.id"
--set "config.messageData.owner_references.href=resource.owner_references.href"
--set "config.messageData.owner_references.kind=resource.owner_references.kind"
)
fi

log_info "Executing: ${helm_cmd[*]}"
Expand Down
184 changes: 184 additions & 0 deletions e2e/cluster/concurrent_creation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package cluster

import (
"context"
"fmt"
"sync"

"github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability

"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
)

const concurrentClusterCount = 5

var _ = ginkgo.Describe("[Suite: cluster][concurrent] System can process concurrent cluster creations without resource conflicts",
ginkgo.Label(labels.Tier1),
func() {
var h *helper.Helper
var clusterIDs []string

ginkgo.BeforeEach(func() {
h = helper.New()
clusterIDs = nil
})

ginkgo.It("should create multiple clusters concurrently and all reach Ready state with isolated resources",
func(ctx context.Context) {
ginkgo.By(fmt.Sprintf("Submit %d cluster creation requests simultaneously", concurrentClusterCount))

type clusterResult struct {
id string
name string
err error
}

results := make([]clusterResult, concurrentClusterCount)
var wg sync.WaitGroup
wg.Add(concurrentClusterCount)

for i := 0; i < concurrentClusterCount; i++ {
go func(idx int) {
defer wg.Done()
defer ginkgo.GinkgoRecover()

cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
if err != nil {
results[idx] = clusterResult{err: fmt.Errorf("failed to create cluster %d: %w", idx, err)}
return
}
if cluster.Id == nil {
results[idx] = clusterResult{err: fmt.Errorf("cluster %d has nil ID", idx)}
return
}
results[idx] = clusterResult{
id: *cluster.Id,
name: cluster.Name,
}
}(i)
}
wg.Wait()

// Verify all creations succeeded and collect IDs
for i, r := range results {
Expect(r.err).NotTo(HaveOccurred(), "cluster creation %d failed", i)
Expect(r.id).NotTo(BeEmpty(), "cluster %d should have a non-empty ID", i)
clusterIDs = append(clusterIDs, r.id)
ginkgo.GinkgoWriter.Printf("Created cluster %d: ID=%s, Name=%s\n", i, r.id, r.name)
}

// Verify all cluster IDs are unique
idSet := make(map[string]bool, len(clusterIDs))
for _, id := range clusterIDs {
Expect(idSet[id]).To(BeFalse(), "duplicate cluster ID detected: %s", id)
idSet[id] = true
}

ginkgo.By("Wait for all clusters to reach Ready=True and Available=True")
for i, clusterID := range clusterIDs {
ginkgo.GinkgoWriter.Printf("Waiting for cluster %d (%s) to become Ready...\n", i, clusterID)
err := h.WaitForClusterCondition(
ctx,
clusterID,
client.ConditionTypeReady,
openapi.ResourceConditionStatusTrue,
h.Cfg.Timeouts.Cluster.Ready,
)
Expect(err).NotTo(HaveOccurred(), "cluster %d (%s) should reach Ready=True", i, clusterID)

cluster, err := h.Client.GetCluster(ctx, clusterID)
Expect(err).NotTo(HaveOccurred(), "failed to get cluster %d (%s)", i, clusterID)

hasAvailable := h.HasResourceCondition(cluster.Status.Conditions,
client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
Expect(hasAvailable).To(BeTrue(),
"cluster %d (%s) should have Available=True", i, clusterID)

ginkgo.GinkgoWriter.Printf("Cluster %d (%s) reached Ready=True, Available=True\n", i, clusterID)
}

ginkgo.By("Verify each cluster has isolated Kubernetes resources (separate namespaces)")
for i, clusterID := range clusterIDs {
expectedLabels := map[string]string{
"hyperfleet.io/cluster-id": clusterID,
}
err := h.VerifyNamespaceActive(ctx, clusterID, expectedLabels, nil)
Expect(err).NotTo(HaveOccurred(),
"cluster %d (%s) should have its own active namespace", i, clusterID)
ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has isolated namespace\n", i, clusterID)
}

ginkgo.By("Verify all adapter statuses are complete for each cluster")
for i, clusterID := range clusterIDs {
Eventually(func(g Gomega) {
statuses, err := h.Client.GetClusterStatuses(ctx, clusterID)
g.Expect(err).NotTo(HaveOccurred(), "failed to get cluster statuses for cluster %d (%s)", i, clusterID)
g.Expect(statuses.Items).NotTo(BeEmpty(), "cluster %d (%s) should have adapter statuses", i, clusterID)

// Build adapter status map
adapterMap := make(map[string]openapi.AdapterStatus)
for _, adapter := range statuses.Items {
adapterMap[adapter.Adapter] = adapter
}

// Verify each required adapter has completed successfully
for _, requiredAdapter := range h.Cfg.Adapters.Cluster {
adapter, exists := adapterMap[requiredAdapter]
g.Expect(exists).To(BeTrue(),
"cluster %d (%s): required adapter %s should be present", i, clusterID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"cluster %d (%s): adapter %s should have Applied=True", i, clusterID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"cluster %d (%s): adapter %s should have Available=True", i, clusterID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"cluster %d (%s): adapter %s should have Health=True", i, clusterID, requiredAdapter)
}
}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())

ginkgo.GinkgoWriter.Printf("Cluster %d (%s) has all adapter statuses complete\n", i, clusterID)
}

ginkgo.GinkgoWriter.Printf("Successfully validated %d concurrent cluster creations with resource isolation\n", concurrentClusterCount)
})

ginkgo.AfterEach(func(ctx context.Context) {
if h == nil || len(clusterIDs) == 0 {
return
}

ginkgo.By(fmt.Sprintf("Cleaning up %d test clusters", len(clusterIDs)))
var cleanupErrors []error
for _, clusterID := range clusterIDs {
// Wait for cluster Ready before cleanup to prevent namespace deletion conflicts
// Without this, adapters may still be creating resources during cleanup
err := h.WaitForClusterCondition(
ctx,
clusterID,
client.ConditionTypeReady,
openapi.ResourceConditionStatusTrue,
h.Cfg.Timeouts.Cluster.Ready,
)
if err != nil {
ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
}

ginkgo.By("cleaning up cluster " + clusterID)
if err := h.CleanupTestCluster(ctx, clusterID); err != nil {
ginkgo.GinkgoWriter.Printf("ERROR: failed to cleanup cluster %s: %v\n", clusterID, err)
cleanupErrors = append(cleanupErrors, err)
}
}
Expect(cleanupErrors).To(BeEmpty(), "some clusters failed to cleanup")
})
},
)
201 changes: 201 additions & 0 deletions e2e/nodepool/concurrent_creation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
package nodepool

import (
"context"
"fmt"
"sync"

"github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability

"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper"
"github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels"
)

const concurrentNodePoolCount = 3

var _ = ginkgo.Describe("[Suite: nodepool][concurrent] Multiple nodepools can coexist under same cluster without conflicts",
ginkgo.Label(labels.Tier1),
func() {
var h *helper.Helper
var clusterID string
var nodepoolIDs []string

ginkgo.BeforeEach(func(ctx context.Context) {
h = helper.New()
nodepoolIDs = nil

// Get or create a cluster for nodepool tests
var err error
clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json"))
Expect(err).NotTo(HaveOccurred(), "failed to get test cluster")
ginkgo.GinkgoWriter.Printf("Using cluster ID: %s\n", clusterID)
})

ginkgo.It("should create multiple nodepools under the same cluster and all reach Ready state with isolated resources",
func(ctx context.Context) {
ginkgo.By(fmt.Sprintf("Submit %d nodepool creation requests simultaneously", concurrentNodePoolCount))

type nodepoolResult struct {
id string
name string
err error
}

results := make([]nodepoolResult, concurrentNodePoolCount)
var wg sync.WaitGroup
wg.Add(concurrentNodePoolCount)

for i := 0; i < concurrentNodePoolCount; i++ {
go func(idx int) {
defer wg.Done()
defer ginkgo.GinkgoRecover()

nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json"))
if err != nil {
results[idx] = nodepoolResult{err: fmt.Errorf("failed to create nodepool %d: %w", idx, err)}
return
}
if nodepool.Id == nil {
results[idx] = nodepoolResult{err: fmt.Errorf("nodepool %d has nil ID", idx)}
return
}
results[idx] = nodepoolResult{
id: *nodepool.Id,
name: nodepool.Name,
}
}(i)
}
wg.Wait()

// Verify all creations succeeded and collect IDs
for i, r := range results {
Expect(r.err).NotTo(HaveOccurred(), "nodepool creation %d failed", i)
Expect(r.id).NotTo(BeEmpty(), "nodepool %d should have a non-empty ID", i)
nodepoolIDs = append(nodepoolIDs, r.id)
ginkgo.GinkgoWriter.Printf("Created nodepool %d: ID=%s, Name=%s\n", i, r.id, r.name)
}

// Verify all nodepool IDs are unique
idSet := make(map[string]bool, len(nodepoolIDs))
for _, id := range nodepoolIDs {
Expect(idSet[id]).To(BeFalse(), "duplicate nodepool ID detected: %s", id)
idSet[id] = true
}

ginkgo.By("Verify all nodepools appear in the list API")
nodepoolList, err := h.Client.ListNodePools(ctx, clusterID)
Expect(err).NotTo(HaveOccurred(), "failed to list nodepools")

listedIDs := make(map[string]bool)
for _, np := range nodepoolList.Items {
if np.Id != nil {
listedIDs[*np.Id] = true
}
}
for i, npID := range nodepoolIDs {
Expect(listedIDs[npID]).To(BeTrue(),
"nodepool %d (%s) should appear in the list API", i, npID)
}
ginkgo.GinkgoWriter.Printf("All %d nodepools found in list API\n", concurrentNodePoolCount)

ginkgo.By("Wait for all nodepools to reach Ready=True and Available=True")
for i, npID := range nodepoolIDs {
ginkgo.GinkgoWriter.Printf("Waiting for nodepool %d (%s) to become Ready...\n", i, npID)
err := h.WaitForNodePoolCondition(
ctx,
clusterID,
npID,
client.ConditionTypeReady,
openapi.ResourceConditionStatusTrue,
h.Cfg.Timeouts.NodePool.Ready,
)
Expect(err).NotTo(HaveOccurred(), "nodepool %d (%s) should reach Ready=True", i, npID)

np, err := h.Client.GetNodePool(ctx, clusterID, npID)
Expect(err).NotTo(HaveOccurred(), "failed to get nodepool %d (%s)", i, npID)

hasAvailable := h.HasResourceCondition(np.Status.Conditions,
client.ConditionTypeAvailable, openapi.ResourceConditionStatusTrue)
Expect(hasAvailable).To(BeTrue(),
"nodepool %d (%s) should have Available=True", i, npID)

ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) reached Ready=True, Available=True\n", i, npID)
}

ginkgo.By("Verify Kubernetes resources are isolated per nodepool")
for i, npID := range nodepoolIDs {
expectedLabels := map[string]string{
"hyperfleet.io/cluster-id": clusterID,
"hyperfleet.io/nodepool-id": npID,
}
Eventually(func() error {
return h.VerifyConfigMap(ctx, clusterID, expectedLabels, nil)
}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed(),
"nodepool %d (%s) should have its own configmap resource", i, npID)
ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has isolated K8s resources\n", i, npID)
}

ginkgo.By("Verify all adapter statuses are complete for each nodepool")
for i, npID := range nodepoolIDs {
Eventually(func(g Gomega) {
statuses, err := h.Client.GetNodePoolStatuses(ctx, clusterID, npID)
g.Expect(err).NotTo(HaveOccurred(), "failed to get nodepool statuses for nodepool %d (%s)", i, npID)
g.Expect(statuses.Items).NotTo(BeEmpty(), "nodepool %d (%s) should have adapter statuses", i, npID)

adapterMap := make(map[string]openapi.AdapterStatus)
for _, adapter := range statuses.Items {
adapterMap[adapter.Adapter] = adapter
}

for _, requiredAdapter := range h.Cfg.Adapters.NodePool {
adapter, exists := adapterMap[requiredAdapter]
g.Expect(exists).To(BeTrue(),
"nodepool %d (%s): required adapter %s should be present", i, npID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeApplied, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"nodepool %d (%s): adapter %s should have Applied=True", i, npID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeAvailable, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"nodepool %d (%s): adapter %s should have Available=True", i, npID, requiredAdapter)

g.Expect(h.HasAdapterCondition(adapter.Conditions,
client.ConditionTypeHealth, openapi.AdapterConditionStatusTrue)).To(BeTrue(),
"nodepool %d (%s): adapter %s should have Health=True", i, npID, requiredAdapter)
}
}, h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval).Should(Succeed())

ginkgo.GinkgoWriter.Printf("Nodepool %d (%s) has all adapter statuses complete\n", i, npID)
}

ginkgo.GinkgoWriter.Printf("Successfully validated %d nodepools coexisting under cluster %s with resource isolation\n",
concurrentNodePoolCount, clusterID)
})

ginkgo.AfterEach(func(ctx context.Context) {
if h == nil || clusterID == "" {
return
}

ginkgo.By("Verify final cluster state to ensure Ready before cleanup")
err := h.WaitForClusterCondition(
ctx,
clusterID,
client.ConditionTypeReady,
openapi.ResourceConditionStatusTrue,
h.Cfg.Timeouts.Cluster.Ready,
)
if err != nil {
ginkgo.GinkgoWriter.Printf("WARNING: cluster %s did not reach Ready state before cleanup: %v\n", clusterID, err)
}

ginkgo.By("cleaning up test cluster " + clusterID)
err = h.CleanupTestCluster(ctx, clusterID)
Expect(err).NotTo(HaveOccurred(), "failed to cleanup cluster %s", clusterID)
})
Comment on lines +179 to +199
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check what CleanupTestCluster does
ast-grep --pattern 'func ($_ *Helper) CleanupTestCluster($$$) {
  $$$
}'

Repository: openshift-hyperfleet/hyperfleet-e2e

Length of output: 61


🏁 Script executed:

#!/bin/bash
# Search for CleanupTestCluster function definition
rg "CleanupTestCluster" -A 10 --type go

Repository: openshift-hyperfleet/hyperfleet-e2e

Length of output: 4202


🏁 Script executed:

#!/bin/bash
# Also check the full concurrent_creation.go file to understand context
wc -l e2e/nodepool/concurrent_creation.go

Repository: openshift-hyperfleet/hyperfleet-e2e

Length of output: 118


🏁 Script executed:

#!/bin/bash
# Look at the full AfterEach and surrounding code in the test file
cat -n e2e/nodepool/concurrent_creation.go | head -220 | tail -60

Repository: openshift-hyperfleet/hyperfleet-e2e

Length of output: 3028


Nodepool resources not explicitly cleaned up before cluster deletion.

The test creates and collects nodepoolIDs but the AfterEach only calls CleanupTestCluster, which is a temporary workaround that deletes the Kubernetes namespace using client-go—it does not delete nodepool API resources. Explicitly delete the created nodepools during cleanup to prevent orphaned API resources.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@e2e/nodepool/concurrent_creation.go` around lines 179 - 199, The AfterEach
cleanup currently only calls h.CleanupTestCluster and may leave nodepool API
resources orphaned; update the ginkgo.AfterEach to explicitly iterate the
collected nodepoolIDs and call the handler's nodepool deletion method for each
(e.g., use h.DeleteNodepool(ctx, clusterID, nodepoolID) or the appropriate h
method) before calling h.CleanupTestCluster, checking errors with
Expect(err).NotTo(HaveOccurred()) for each deletion; keep the existing
WaitForClusterCondition and CleanupTestCluster calls but ensure nodepool
deletions run first to remove nodepool API resources.

},
)
Loading