-
Notifications
You must be signed in to change notification settings - Fork 22
feat: Support Actor schema storages with Alias mechanism #797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c4adb74
19113e7
b12e27e
fd0716c
3b36459
72c2f35
b7604cb
ec6e071
aa3b9ea
aa5624f
17288ca
a7e645f
a996dca
f738e58
2657528
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ | |
| from logging import getLogger | ||
| from typing import TYPE_CHECKING, ClassVar, Literal, overload | ||
|
|
||
| from propcache import cached_property | ||
|
|
||
| from apify_client import ApifyClientAsync | ||
|
|
||
| from ._utils import hash_api_base_url_and_token | ||
|
|
@@ -139,7 +141,6 @@ def __init__( | |
| self._storage_type = storage_type | ||
| self._alias = alias | ||
| self._configuration = configuration | ||
| self._additional_cache_key = hash_api_base_url_and_token(configuration) | ||
|
|
||
| async def __aenter__(self) -> AliasResolver: | ||
| """Context manager to prevent race condition in alias creation.""" | ||
|
|
@@ -183,15 +184,7 @@ async def _get_alias_map(cls, configuration: Configuration) -> dict[str, str]: | |
| default_kvs_client = await cls._get_default_kvs_client(configuration) | ||
|
|
||
| record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY) | ||
|
|
||
| # get_record can return {key: ..., value: ..., content_type: ...} | ||
| if isinstance(record, dict): | ||
| if 'value' in record and isinstance(record['value'], dict): | ||
| cls._alias_map = record['value'] | ||
| else: | ||
| cls._alias_map = record | ||
| else: | ||
| cls._alias_map = dict[str, str]() | ||
| cls._alias_map = record.get('value', {}) if record else {} | ||
|
|
||
| return cls._alias_map | ||
|
|
||
|
|
@@ -201,6 +194,18 @@ async def resolve_id(self) -> str | None: | |
| Returns: | ||
| Storage id if it exists, None otherwise. | ||
| """ | ||
| # First try to find the alias in the configuration mapping to avoid any API calls. | ||
| # This mapping is maintained by the Apify platform and does not have to be maintained in the default KVS. | ||
| if self._configuration.actor_storages and self._alias != 'default': | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: I don't understand why we're treating 'default' differently.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way to access the default storage is without an alias. Loading up default aliases from the configuration mapping would allow an alternative way to access the default storage through "default" alias. In Python that is generally frowned upon:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, sometimes I wish Javascript was like this :D |
||
| storage_maps = { | ||
| 'Dataset': self._configuration.actor_storages.datasets, | ||
| 'KeyValueStore': self._configuration.actor_storages.key_value_stores, | ||
| 'RequestQueue': self._configuration.actor_storages.request_queues, | ||
| } | ||
| if storage_id := storage_maps.get(self._storage_type, {}).get(self._alias): | ||
| return storage_id | ||
|
|
||
| # Fallback to the mapping saved in the default KVS | ||
| return (await self._get_alias_map(self._configuration)).get(self._storage_key, None) | ||
|
|
||
| async def store_mapping(self, storage_id: str) -> None: | ||
|
|
@@ -220,30 +225,21 @@ async def store_mapping(self, storage_id: str) -> None: | |
|
|
||
| try: | ||
| record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY) | ||
|
|
||
| # get_record can return {key: ..., value: ..., content_type: ...} | ||
| if isinstance(record, dict) and 'value' in record: | ||
| record = record['value'] | ||
|
|
||
| # Update or create the record with the new alias mapping | ||
| if isinstance(record, dict): | ||
| record[self._storage_key] = storage_id | ||
| else: | ||
| record = {self._storage_key: storage_id} | ||
| value = record.get('value', {}) if record else {} | ||
| value[self._storage_key] = storage_id | ||
|
|
||
| # Store the mapping back in the KVS. | ||
| await default_kvs_client.set_record(self._ALIAS_MAPPING_KEY, record) | ||
| await default_kvs_client.set_record(key=self._ALIAS_MAPPING_KEY, value=value) | ||
| except Exception as exc: | ||
| logger.warning(f'Error storing alias mapping for {self._alias}: {exc}') | ||
|
|
||
| @property | ||
| @cached_property | ||
| def _storage_key(self) -> str: | ||
| """Get a unique storage key used for storing the alias in the mapping.""" | ||
| return self._ALIAS_STORAGE_KEY_SEPARATOR.join( | ||
| [ | ||
| self._storage_type, | ||
| self._alias, | ||
| self._additional_cache_key, | ||
| hash_api_base_url_and_token(self._configuration), | ||
| ] | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| { | ||
vdusek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| "actorSpecification": 1, | ||
| "version": "0.0", | ||
| "storages": { | ||
| "datasets": { | ||
| "default": { | ||
| "actorSpecification": 1, | ||
| "fields": { | ||
| "properties": { | ||
| "id": { "type": "string" } | ||
| } | ||
| } | ||
| }, | ||
| "custom": { | ||
| "actorSpecification": 1, | ||
| "fields": { | ||
| "properties": { | ||
| "id": { "type": "string" } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| from apify import Actor | ||
|
|
||
|
|
||
| async def main() -> None: | ||
| async with Actor: | ||
| assert Actor.configuration.actor_storages | ||
| assert (await Actor.open_dataset(alias='custom')).id == Actor.configuration.actor_storages.datasets['custom'] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Would it also make sense to check that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That should probably be tested on the platform level. If the env vars are inconsistent. (default id is different in mapping env and in dedicated env var, then our code can't do much about that) |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
| from typing import TYPE_CHECKING | ||
|
|
||
| if TYPE_CHECKING: | ||
| from ..conftest import MakeActorFunction, RunActorFunction | ||
|
|
||
| _ACTOR_SOURCE_DIR = Path(__file__).parent / 'actor_source' | ||
|
|
||
|
|
||
| def read_actor_source(filename: str) -> str: | ||
| return (_ACTOR_SOURCE_DIR / filename).read_text() | ||
|
|
||
|
|
||
| async def test_configuration_storages(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None: | ||
| actor = await make_actor( | ||
| label='schema_storages', | ||
| source_files={ | ||
| 'src/main.py': read_actor_source('main.py'), | ||
| '.actor/actor.json': read_actor_source('actor.json'), | ||
| }, | ||
| ) | ||
| run_result = await run_actor(actor) | ||
|
|
||
| assert run_result.status == 'SUCCEEDED' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change closes #762, but the issue description specifies the platform-provided env var name as
ACTOR_STORAGE_IDS(object withdatasets,keyValueStores,requestQueues). The new field only declaresalias='actor_storages_json'(envACTOR_STORAGES_JSON). If the platform actually usesACTOR_STORAGE_IDS, configuration loading will silently miss the mapping. Consider supportingACTOR_STORAGE_IDSviavalidation_alias=AliasChoices(...)(keeping backward compatibility ifACTOR_STORAGES_JSONis intentional).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The implementation on the platform is
ACTOR_STORAGES_JSONnow, if it changes the code should reflect it as well