From 1227376b8a5ecbd5e8a6e444f017644deace4585 Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Tue, 10 Feb 2026 15:29:05 +0100 Subject: [PATCH 1/6] workaround: give the cluster time to restore the hot backup --- tests/test_backup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_backup.py b/tests/test_backup.py index 1fea7ecd..12de2791 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -1,6 +1,7 @@ import time import pytest +import time from packaging import version from arango.errno import DATABASE_NOT_FOUND, FILE_NOT_FOUND, FORBIDDEN, HTTP_NOT_FOUND @@ -110,6 +111,7 @@ def test_backup_management(sys_db, bad_db, cluster, skip_tests, db_version): # Test restore backup. result = sys_db.backup.restore(backup_id_foo) + time.sleep(10) assert isinstance(result, dict) # Wait for restore to complete From 8ffdefc62c62fb0c30298e77a3d75e6f260eb9cb Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 26 Feb 2026 15:23:08 +0100 Subject: [PATCH 2/6] implement waiting for cluster resillience after hotbackup restore --- tests/test_backup.py | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/test_backup.py b/tests/test_backup.py index 12de2791..b42c93ae 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -15,6 +15,41 @@ ) from tests.helpers import assert_raises +def wait_for_cluster_resilient(sys_db): + firstExec = True + collectionsInSync = True + attempts = 100 + while not collectionsInSync and attempts > 0: + collectionsInSync = True + countInSync = 0 + countStillWaiting = 0 + cols = sys_db.replication.cluster_inventory(include_system=True) + print(cols) + if cols is None: + collectionsInSync = False + time.sleep(1) + attempts -= 1 + continue + for col in cols: + collectionsInSync = collectionsInSync and col.allInSync + if not col.allInSync: + countStillWaiting += 1 + else: + countInSync+= 1 + + if not collectionsInSync: + time.sleep(1) + if attempts % 50 == 0: + print(cols) + print(f"Amount of collection in sync: {countInSync}. Still not in sync: {countStillWaiting}") + if firstExec: + firstExec = False + if countInSync + countStillWaiting > 100: + attempts = Math.round((countInSync + countStillWaiting) * 0.8); + print("Set attempts to {attempts}") + attempts -= 1; + if attempts == 0: + raise Exception("collections didn't come in sync!") def test_backup_management(sys_db, bad_db, cluster, skip_tests, db_version): if "enterprise" in skip_tests: @@ -111,11 +146,13 @@ def test_backup_management(sys_db, bad_db, cluster, skip_tests, db_version): # Test restore backup. result = sys_db.backup.restore(backup_id_foo) - time.sleep(10) assert isinstance(result, dict) # Wait for restore to complete - time.sleep(10) + if cluster: + wait_for_cluster_resilient(sys_db) + else: + time.sleep(10) # Test restore backup with bad database. with assert_raises(BackupRestoreError) as err: From fbc4df89f529fa9ca59971caf5da39745e5cd423 Mon Sep 17 00:00:00 2001 From: Alex Petenchea Date: Sun, 1 Mar 2026 22:10:14 +0800 Subject: [PATCH 3/6] Adapting the wait function --- tests/test_backup.py | 74 +++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/tests/test_backup.py b/tests/test_backup.py index b42c93ae..7b045313 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -1,7 +1,6 @@ import time import pytest -import time from packaging import version from arango.errno import DATABASE_NOT_FOUND, FILE_NOT_FOUND, FORBIDDEN, HTTP_NOT_FOUND @@ -12,44 +11,45 @@ BackupGetError, BackupRestoreError, BackupUploadError, + ReplicationClusterInventoryError, ) from tests.helpers import assert_raises + def wait_for_cluster_resilient(sys_db): - firstExec = True - collectionsInSync = True - attempts = 100 - while not collectionsInSync and attempts > 0: - collectionsInSync = True - countInSync = 0 - countStillWaiting = 0 - cols = sys_db.replication.cluster_inventory(include_system=True) - print(cols) - if cols is None: - collectionsInSync = False - time.sleep(1) - attempts -= 1 - continue + collections_in_sync = True + max_attempts = 100 + + while not collections_in_sync and max_attempts > 0: + collections_in_sync = True + count_in_sync = 0 + count_still_waiting = 0 + + try: + cols = sys_db.replication.cluster_inventory(include_system=True) + except ReplicationClusterInventoryError: + time.sleep(1) + max_attempts -= 1 + continue + for col in cols: - collectionsInSync = collectionsInSync and col.allInSync - if not col.allInSync: - countStillWaiting += 1 - else: - countInSync+= 1 - - if not collectionsInSync: - time.sleep(1) - if attempts % 50 == 0: - print(cols) - print(f"Amount of collection in sync: {countInSync}. Still not in sync: {countStillWaiting}") - if firstExec: - firstExec = False - if countInSync + countStillWaiting > 100: - attempts = Math.round((countInSync + countStillWaiting) * 0.8); - print("Set attempts to {attempts}") - attempts -= 1; - if attempts == 0: - raise Exception("collections didn't come in sync!") + collections_in_sync = collections_in_sync and col["all_in_sync"] + if not col["all_in_sync"]: + count_still_waiting += 1 + else: + count_in_sync += 1 + + if not collections_in_sync: + if max_attempts % 50 == 0: + print(cols) + print(f"In sync: {collections_in_sync}") + print(f"Still not in sync: {count_still_waiting}") + + max_attempts -= 1 + + if not collections_in_sync: + raise Exception("Collections didn't come in sync!") + def test_backup_management(sys_db, bad_db, cluster, skip_tests, db_version): if "enterprise" in skip_tests: @@ -148,11 +148,7 @@ def test_backup_management(sys_db, bad_db, cluster, skip_tests, db_version): result = sys_db.backup.restore(backup_id_foo) assert isinstance(result, dict) - # Wait for restore to complete - if cluster: - wait_for_cluster_resilient(sys_db) - else: - time.sleep(10) + wait_for_cluster_resilient(sys_db) # Test restore backup with bad database. with assert_raises(BackupRestoreError) as err: From 2f54bd9460d6780b1e8e0ee49096166825b13e92 Mon Sep 17 00:00:00 2001 From: Alex Petenchea Date: Sun, 1 Mar 2026 22:50:02 +0800 Subject: [PATCH 4/6] Adapting the wait function --- tests/test_backup.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_backup.py b/tests/test_backup.py index 7b045313..51c6b965 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -17,7 +17,7 @@ def wait_for_cluster_resilient(sys_db): - collections_in_sync = True + collections_in_sync = False max_attempts = 100 while not collections_in_sync and max_attempts > 0: @@ -26,24 +26,26 @@ def wait_for_cluster_resilient(sys_db): count_still_waiting = 0 try: - cols = sys_db.replication.cluster_inventory(include_system=True) + inventory = sys_db.replication.cluster_inventory(include_system=True) except ReplicationClusterInventoryError: + print("Failed to get cluster inventory, retrying...") time.sleep(1) max_attempts -= 1 continue - for col in cols: - collections_in_sync = collections_in_sync and col["all_in_sync"] + for col in inventory["collections"]: if not col["all_in_sync"]: count_still_waiting += 1 + collections_in_sync = False else: count_in_sync += 1 if not collections_in_sync: if max_attempts % 50 == 0: - print(cols) + print(inventory) print(f"In sync: {collections_in_sync}") print(f"Still not in sync: {count_still_waiting}") + time.sleep(1) max_attempts -= 1 From c200f7f4998b236bf1c6dd9b8b7fb437f979bbc8 Mon Sep 17 00:00:00 2001 From: Alex Petenchea Date: Sun, 1 Mar 2026 23:03:17 +0800 Subject: [PATCH 5/6] Adapting the wait function --- tests/test_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_backup.py b/tests/test_backup.py index 51c6b965..56efaf58 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -21,7 +21,6 @@ def wait_for_cluster_resilient(sys_db): max_attempts = 100 while not collections_in_sync and max_attempts > 0: - collections_in_sync = True count_in_sync = 0 count_still_waiting = 0 @@ -33,6 +32,7 @@ def wait_for_cluster_resilient(sys_db): max_attempts -= 1 continue + collections_in_sync = True for col in inventory["collections"]: if not col["all_in_sync"]: count_still_waiting += 1 From 9bab94966a3286e312dab7244e31d6e23940d005 Mon Sep 17 00:00:00 2001 From: Alex Petenchea Date: Sun, 1 Mar 2026 23:15:17 +0800 Subject: [PATCH 6/6] Adapting the wait function --- tests/test_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_backup.py b/tests/test_backup.py index 56efaf58..150b9e16 100644 --- a/tests/test_backup.py +++ b/tests/test_backup.py @@ -43,7 +43,7 @@ def wait_for_cluster_resilient(sys_db): if not collections_in_sync: if max_attempts % 50 == 0: print(inventory) - print(f"In sync: {collections_in_sync}") + print(f"In sync: {count_in_sync}") print(f"Still not in sync: {count_still_waiting}") time.sleep(1)