From 89402dfd65b0577e03d7edeb00b026c823f68619 Mon Sep 17 00:00:00 2001 From: saber Date: Thu, 19 Mar 2026 12:55:28 +0100 Subject: [PATCH 1/2] Add Azure Blob Storage driver support Implements AzureBlobDriver following the same pattern as MinioDriver, allowing media files to be synced to Azure Blob Storage containers. Changes: - drivers.py: Add DriverType.AZURE enum value and AzureBlobDriver class using azure-storage-blob SDK; supports optional blob_path_prefix - config.py: Add AZURE to supported driver check and validate azure_blob config block (account_name, account_key, container required) - config.yaml.default: Add azure_blob config section with all options - Pipfile: Add azure-storage-blob ~=12.0 dependency - test/conftest.py: Add TEST_AZURE_STORAGE_* env vars and reset defaults - test/test_sync.py: Add test_azure_blob_backend covering invalid config, basic upload verification, and blob_path_prefix behaviour Co-Authored-By: Claude Sonnet 4.6 --- Pipfile | 1 + config.py | 12 +++++ config.yaml.default | 8 +++- drivers.py | 46 ++++++++++++++++++ test/conftest.py | 7 +++ test/test_sync.py | 113 +++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 185 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index 3552b96..dd4be42 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ minio = "~=7.1" mergin-client = "==0.9.3" dynaconf = {extras = ["ini"],version = "~=3.1"} google-api-python-client = "==2.24" +azure-storage-blob = "~=12.0" [requires] python_version = "3" diff --git a/config.py b/config.py index ca379ab..a53b772 100644 --- a/config.py +++ b/config.py @@ -33,6 +33,7 @@ def validate_config(config): config.driver == DriverType.LOCAL or config.driver == DriverType.MINIO or config.driver == DriverType.GOOGLE_DRIVE + or config.driver == DriverType.AZURE ): raise ConfigError("Config error: Unsupported driver") @@ -78,6 +79,17 @@ def validate_config(config): ): raise ConfigError("Config error: Incorrect GoogleDrive driver settings") + if config.driver == DriverType.AZURE and not ( + hasattr(config, "azure_blob") + and hasattr(config.azure_blob, "account_name") + and hasattr(config.azure_blob, "account_key") + and hasattr(config.azure_blob, "container") + and config.azure_blob.account_name + and config.azure_blob.account_key + and config.azure_blob.container + ): + raise ConfigError("Config error: Incorrect Azure Blob Storage driver settings") + def update_config_path( path_param: str, diff --git a/config.yaml.default b/config.yaml.default index d72d66f..0599bdd 100644 --- a/config.yaml.default +++ b/config.yaml.default @@ -25,10 +25,16 @@ minio: bucket_subpath: google_drive: - service_account_file: + service_account_file: folder: share_with: +azure_blob: + account_name: + account_key: + container: + blob_path_prefix: + references: - file: survey.gpkg table: notes diff --git a/drivers.py b/drivers.py index 0ec20bf..4ee7df4 100644 --- a/drivers.py +++ b/drivers.py @@ -21,11 +21,15 @@ from googleapiclient.discovery import build, Resource from googleapiclient.http import MediaFileUpload +from azure.storage.blob import BlobServiceClient +from azure.core.exceptions import AzureError + class DriverType(enum.Enum): LOCAL = "local" MINIO = "minio" GOOGLE_DRIVE = "google_drive" + AZURE = "azure" def __eq__(self, value): if isinstance(value, str): @@ -282,6 +286,46 @@ def _get_share_with(self, config_google_drive) -> typing.List[str]: return emails_to_share_with +class AzureBlobDriver(Driver): + """Driver to handle connection to Azure Blob Storage""" + + def __init__(self, config): + super(AzureBlobDriver, self).__init__(config) + + try: + self.account_name = config.azure_blob.account_name + connection_string = ( + f"DefaultEndpointsProtocol=https;" + f"AccountName={self.account_name};" + f"AccountKey={config.azure_blob.account_key};" + f"EndpointSuffix=core.windows.net" + ) + service_client = BlobServiceClient.from_connection_string(connection_string) + self.container = config.azure_blob.container + container_client = service_client.get_container_client(self.container) + if not container_client.exists(): + container_client.create_container() + self.client = container_client + + self.blob_path_prefix = None + if hasattr(config.azure_blob, "blob_path_prefix"): + if config.azure_blob.blob_path_prefix: + self.blob_path_prefix = config.azure_blob.blob_path_prefix + + except AzureError as e: + raise DriverError("Azure Blob Storage driver init error: " + str(e)) + + def upload_file(self, src: str, obj_path: str) -> str: + if self.blob_path_prefix: + obj_path = f"{self.blob_path_prefix}/{obj_path}" + try: + with open(src, "rb") as data: + self.client.upload_blob(name=obj_path, data=data, overwrite=True) + except AzureError as e: + raise DriverError("Azure Blob Storage driver error: " + str(e)) + return f"https://{self.account_name}.blob.core.windows.net/{self.container}/{obj_path}" + + def create_driver(config): """Create driver object based on type defined in config""" driver = None @@ -291,4 +335,6 @@ def create_driver(config): driver = MinioDriver(config) elif config.driver == DriverType.GOOGLE_DRIVE: driver = GoogleDriveDriver(config) + elif config.driver == DriverType.AZURE: + driver = AzureBlobDriver(config) return driver diff --git a/test/conftest.py b/test/conftest.py index 8f9af2a..962e567 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -20,6 +20,9 @@ GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE = os.environ.get( "TEST_GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE" ) +AZURE_STORAGE_ACCOUNT_NAME = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_NAME") +AZURE_STORAGE_ACCOUNT_KEY = os.environ.get("TEST_AZURE_STORAGE_ACCOUNT_KEY") +AZURE_STORAGE_CONTAINER = os.environ.get("TEST_AZURE_STORAGE_CONTAINER") @pytest.fixture(scope="function") @@ -49,6 +52,10 @@ def setup_config(): "MINIO__BUCKET_SUBPATH": "", "MINIO__SECURE": False, "MINIO__REGION": "", + "AZURE_BLOB__ACCOUNT_NAME": "", + "AZURE_BLOB__ACCOUNT_KEY": "", + "AZURE_BLOB__CONTAINER": "", + "AZURE_BLOB__BLOB_PATH_PREFIX": "", } ) diff --git a/test/test_sync.py b/test/test_sync.py index a9cfe00..95cebd1 100644 --- a/test/test_sync.py +++ b/test/test_sync.py @@ -11,7 +11,7 @@ import shutil import sqlite3 -from drivers import MinioDriver, LocalDriver, GoogleDriveDriver +from drivers import MinioDriver, LocalDriver, GoogleDriveDriver, AzureBlobDriver from media_sync import ( main, config, @@ -33,6 +33,9 @@ MINIO_SECRET_KEY, GOOGLE_DRIVE_SERVICE_ACCOUNT_FILE, GOOGLE_DRIVE_FOLDER, + AZURE_STORAGE_ACCOUNT_NAME, + AZURE_STORAGE_ACCOUNT_KEY, + AZURE_STORAGE_CONTAINER, cleanup, prepare_mergin_project, ) @@ -634,3 +637,111 @@ def test_google_drive_backend(mc): # files in mergin project still exist (copy mode) assert os.path.exists(os.path.join(work_project_dir, "img1.png")) assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg")) + + +def test_azure_blob_backend(mc): + """Test media sync connected to Azure Blob Storage backend (needs valid Azure credentials)""" + project_name = "mediasync_test_azure" + full_project_name = WORKSPACE + "/" + project_name + work_project_dir = os.path.join(TMP_DIR, project_name + "_work") + + cleanup(mc, full_project_name, [work_project_dir]) + prepare_mergin_project(mc, full_project_name) + + # invalid config - missing required fields + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": "", + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + } + ) + + with pytest.raises(ConfigError): + validate_config(config) + + # patch config to fit testing purposes + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY, + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + } + ) + + main() + + # verify files were uploaded to Azure Blob Storage + driver = AzureBlobDriver(config) + blob_names = [b.name for b in driver.client.list_blobs()] + assert "img1.png" in blob_names + assert "images/img2.jpg" in blob_names + + # files in mergin project still exist (copy mode) + assert os.path.exists(os.path.join(work_project_dir, "img1.png")) + assert os.path.exists(os.path.join(work_project_dir, "images", "img2.jpg")) + + # test with blob_path_prefix + cleanup(mc, full_project_name, [work_project_dir]) + prepare_mergin_project(mc, full_project_name) + + config.update( + { + "MERGIN__USERNAME": API_USER, + "MERGIN__PASSWORD": USER_PWD, + "MERGIN__URL": SERVER_URL, + "MERGIN__PROJECT_NAME": full_project_name, + "PROJECT_WORKING_DIR": work_project_dir, + "OPERATION_MODE": "copy", + "REFERENCES": [ + { + "file": None, + "table": None, + "local_path_column": None, + "driver_path_column": None, + } + ], + "DRIVER": "azure", + "AZURE_BLOB__ACCOUNT_NAME": AZURE_STORAGE_ACCOUNT_NAME, + "AZURE_BLOB__ACCOUNT_KEY": AZURE_STORAGE_ACCOUNT_KEY, + "AZURE_BLOB__CONTAINER": AZURE_STORAGE_CONTAINER, + "AZURE_BLOB__BLOB_PATH_PREFIX": "subPath", + } + ) + + main() + + driver = AzureBlobDriver(config) + blob_names = [b.name for b in driver.client.list_blobs()] + assert "subPath/img1.png" in blob_names + assert "subPath/images/img2.jpg" in blob_names From 4c6b4dff5fa0e549f145cb1835c92107d237fae1 Mon Sep 17 00:00:00 2001 From: saber Date: Thu, 19 Mar 2026 13:25:53 +0100 Subject: [PATCH 2/2] docs: add Azure Blob Storage setup instructions to README Co-Authored-By: Claude Sonnet 4.6 --- README.md | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 51e2522..60be4f7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Mergin Maps Media Sync -Sync media files from Mergin Maps projects to other storage backends. Currently, supported backend are MinIO (S3-like) backend, Google Drive and local drive (mostly used for testing). +Sync media files from Mergin Maps projects to other storage backends. Currently, supported backends are MinIO (S3-like), Azure Blob Storage, Google Drive and local drive (mostly used for testing). Sync works in two modes, in COPY mode, where media files are only copied to external drive and MOVE mode, where files are subsequently removed from Mergin Maps project (on cloud). @@ -68,6 +68,37 @@ docker run -it \ The specification of `MINIO__BUCKET_SUBPATH` is optional and can be skipped if the files should be stored directly in `MINIO__BUCKET`. +#### Using Azure Blob Storage backend + +You will need an Azure Storage account. Retrieve the **account name** and one of the **account keys** from the Azure Portal under _Storage account → Access keys_. + +```shell +docker run -it \ + --name mergin-media-sync \ + -e MERGIN__USERNAME=john \ + -e MERGIN__PASSWORD=myStrongPassword \ + -e MERGIN__PROJECT_NAME=john/my_project \ + -e DRIVER=azure \ + -e AZURE_BLOB__ACCOUNT_NAME=mystorageaccount \ + -e AZURE_BLOB__ACCOUNT_KEY=base64encodedkey== \ + -e AZURE_BLOB__CONTAINER=my-container \ + lutraconsulting/mergin-media-sync python3 media_sync_daemon.py +``` + +The container is created automatically if it does not already exist. Uploaded files are accessible at: +``` +https://.blob.core.windows.net// +``` + +`AZURE_BLOB__BLOB_PATH_PREFIX` is optional. When set, all blobs are placed under that prefix inside the container (e.g. `AZURE_BLOB__BLOB_PATH_PREFIX=myproject` stores files at `myproject/img1.png`). + +| Environment variable | Required | Description | +|---|---|---| +| `AZURE_BLOB__ACCOUNT_NAME` | yes | Azure Storage account name | +| `AZURE_BLOB__ACCOUNT_KEY` | yes | Storage account access key (found under _Access keys_ in the portal) | +| `AZURE_BLOB__CONTAINER` | yes | Blob container name (created automatically if missing) | +| `AZURE_BLOB__BLOB_PATH_PREFIX` | no | Optional path prefix for all uploaded blobs | + #### Using Google Drive backend For setup instructions and more details, please refer to our [Google Drive guide](./docs/google-drive-setup.md). @@ -136,6 +167,10 @@ To run automatic tests: export TEST_MINIO_URL="localhost:9000" export TEST_MINIO_ACCESS_KEY=EXAMPLE export TEST_MINIO_SECRET_KEY=EXAMPLEKEY + # Azure Blob Storage backend tests (optional) + export TEST_AZURE_STORAGE_ACCOUNT_NAME= + export TEST_AZURE_STORAGE_ACCOUNT_KEY= + export TEST_AZURE_STORAGE_CONTAINER= pipenv run pytest test/ ```