From ae6a437deedec4d0896fe61b246945bea7a3e325 Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Wed, 11 Feb 2026 07:54:45 -0800 Subject: [PATCH 1/5] provider --- src/datacustomcode/client.py | 72 ++++++++----------- src/datacustomcode/config.py | 20 +++++- src/datacustomcode/config.yaml | 2 +- src/datacustomcode/proxy/base.py | 2 +- src/datacustomcode/proxy/client/base.py | 9 ++- .../proxy/client/local_proxy_client.py | 6 +- 6 files changed, 58 insertions(+), 53 deletions(-) diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index d1a1138..32d907e 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -15,6 +15,7 @@ from __future__ import annotations from enum import Enum +import pprint from typing import ( TYPE_CHECKING, ClassVar, @@ -107,7 +108,7 @@ class Client: _reader: BaseDataCloudReader _writer: BaseDataCloudWriter _file: DefaultFindFilePath - _proxy: BaseProxyClient + _proxy: Optional[BaseProxyClient] _data_layer_history: dict[DataCloudObjectType, set[str]] def __new__( @@ -117,18 +118,30 @@ def __new__( proxy: Optional[BaseProxyClient] = None, spark_provider: Optional["BaseSparkSessionProvider"] = None, ) -> Client: + print("Chuy client start 2 config:") + pprint.pprint(str(config), indent=4) + if cls._instance is None: cls._instance = super().__new__(cls) - spark = None + print("Chuy client here") + # Initialize Readers and Writers from config # and/or provided reader and writer if reader is None or writer is None: # We need a spark because we will initialize readers and writers if config.spark_config is None: - raise ValueError( - "Spark config is required when reader/writer is not provided" + # Assume BYOC Function + # cls._instance._reader = None + # cls._instance._writer = None + cls._instance._file = DefaultFindFilePath() + # cls._instance._data_layer_history = None + cls._instance._proxy = ( + config.proxy_config.to_object() # type: ignore + if config.proxy_config is not None + else None ) + return cls._instance provider: BaseSparkSessionProvider if spark_provider is not None: @@ -139,22 +152,6 @@ def __new__( provider = DefaultSparkSessionProvider() spark = provider.get_session(config.spark_config) - elif ( - proxy is None - and config.proxy_config is not None - and config.spark_config is not None - ): - # Both reader and writer provided; we still need spark for proxy init - provider = ( - spark_provider - if spark_provider is not None - else ( - config.spark_provider_config.to_object() - if config.spark_provider_config is not None - else DefaultSparkSessionProvider() - ) - ) - spark = provider.get_session(config.spark_config) if config.reader_config is None and reader is None: raise ValueError( @@ -163,28 +160,9 @@ def __new__( elif reader is None or ( config.reader_config is not None and config.reader_config.force ): - if config.proxy_config is None: - raise ValueError( - "Proxy config is required when reader is built from config" - ) - assert ( - spark is not None - ) # set in "reader is None or writer is None" branch - assert config.reader_config is not None # ensured by branch condition - proxy_init = config.proxy_config.to_object(spark) - - reader_init = config.reader_config.to_object(spark) + reader_init = config.reader_config.to_object(spark) # type: ignore else: reader_init = reader - if proxy is not None: - proxy_init = proxy - elif config.proxy_config is None: - raise ValueError("Proxy config is required when reader is provided") - else: - assert ( - spark is not None - ) # set in "both provided; proxy from config" branch - proxy_init = config.proxy_config.to_object(spark) if config.writer_config is None and writer is None: raise ValueError( "Writer config is required when writer is not provided" @@ -192,15 +170,19 @@ def __new__( elif writer is None or ( config.writer_config is not None and config.writer_config.force ): - assert spark is not None # set when reader or writer from config - assert config.writer_config is not None # ensured by branch condition - writer_init = config.writer_config.to_object(spark) + writer_init = config.writer_config.to_object(spark) # type: ignore else: writer_init = writer + proxy_init: Optional["BaseProxyClient"] = None + if proxy is not None: + proxy_init = proxy + elif config.proxy_config is not None: + proxy_init = config.proxy_config.to_object() # type: ignore + cls._instance._reader = reader_init cls._instance._writer = writer_init - cls._instance._file = DefaultFindFilePath() cls._instance._proxy = proxy_init + cls._instance._file = DefaultFindFilePath() cls._instance._data_layer_history = { DataCloudObjectType.DLO: set(), DataCloudObjectType.DMO: set(), @@ -260,6 +242,8 @@ def write_to_dmo( return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) def call_llm_gateway(self, LLM_MODEL_ID: str, prompt: str, maxTokens: int) -> str: + if self._proxy is None: + raise ValueError("No proxy configured; set proxy or proxy_config") return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens) def find_file_path(self, file_name: str) -> Path: diff --git a/src/datacustomcode/config.py b/src/datacustomcode/config.py index b1edfc4..602e182 100644 --- a/src/datacustomcode/config.py +++ b/src/datacustomcode/config.py @@ -38,6 +38,7 @@ from datacustomcode.io.base import BaseDataAccessLayer from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH001 from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH001 +from datacustomcode.proxy.base import BaseProxyAccessLayer from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH001 from datacustomcode.spark.base import BaseSparkSessionProvider @@ -93,6 +94,23 @@ class SparkConfig(ForceableConfig): _P = TypeVar("_P", bound=BaseSparkSessionProvider) +_PX = TypeVar("_PX", bound=BaseProxyAccessLayer) + + +class ProxyAccessLayerObjectConfig(ForceableConfig, Generic[_PX]): + """Config for proxy clients that take no constructor args (e.g. no spark).""" + + model_config = ConfigDict(validate_default=True, extra="forbid") + type_base: ClassVar[Type[BaseProxyAccessLayer]] = BaseProxyAccessLayer + type_config_name: str = Field( + description="CONFIG_NAME of the proxy client (e.g. 'LocalProxyClient').", + ) + options: dict[str, Any] = Field(default_factory=dict) + + def to_object(self) -> _PX: + type_ = self.type_base.subclass_from_config_name(self.type_config_name) + return cast(_PX, type_(**self.options)) + class SparkProviderConfig(ForceableConfig, Generic[_P]): model_config = ConfigDict(validate_default=True, extra="forbid") @@ -110,7 +128,7 @@ def to_object(self) -> _P: class ClientConfig(BaseModel): reader_config: Union[AccessLayerObjectConfig[BaseDataCloudReader], None] = None writer_config: Union[AccessLayerObjectConfig[BaseDataCloudWriter], None] = None - proxy_config: Union[AccessLayerObjectConfig[BaseProxyClient], None] = None + proxy_config: Union[ProxyAccessLayerObjectConfig[BaseProxyClient], None] = None spark_config: Union[SparkConfig, None] = None spark_provider_config: Union[ SparkProviderConfig[BaseSparkSessionProvider], None diff --git a/src/datacustomcode/config.yaml b/src/datacustomcode/config.yaml index 0267b6f..190d0b7 100644 --- a/src/datacustomcode/config.yaml +++ b/src/datacustomcode/config.yaml @@ -19,6 +19,6 @@ spark_config: spark.driver.extraJavaOptions: -Djava.security.manager=allow proxy_config: - type_config_name: LocalProxyClientProvider + type_config_name: LocalProxyClient options: credentials_profile: default diff --git a/src/datacustomcode/proxy/base.py b/src/datacustomcode/proxy/base.py index cba92f6..71cf314 100644 --- a/src/datacustomcode/proxy/base.py +++ b/src/datacustomcode/proxy/base.py @@ -19,6 +19,6 @@ from datacustomcode.mixin import UserExtendableNamedConfigMixin -class BaseDataAccessLayer(ABC, UserExtendableNamedConfigMixin): +class BaseProxyAccessLayer(ABC, UserExtendableNamedConfigMixin): def __init__(self): pass diff --git a/src/datacustomcode/proxy/client/base.py b/src/datacustomcode/proxy/client/base.py index 3c4a56b..5c840a0 100644 --- a/src/datacustomcode/proxy/client/base.py +++ b/src/datacustomcode/proxy/client/base.py @@ -16,13 +16,12 @@ from abc import abstractmethod -from datacustomcode.io.base import BaseDataAccessLayer +from datacustomcode.proxy.base import BaseProxyAccessLayer -class BaseProxyClient(BaseDataAccessLayer): - def __init__(self, spark=None, **kwargs): - if spark is not None: - super().__init__(spark) +class BaseProxyClient(BaseProxyAccessLayer): + def __init__(self): + pass @abstractmethod def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: ... diff --git a/src/datacustomcode/proxy/client/local_proxy_client.py b/src/datacustomcode/proxy/client/local_proxy_client.py index 2c2f962..6156fe9 100644 --- a/src/datacustomcode/proxy/client/local_proxy_client.py +++ b/src/datacustomcode/proxy/client/local_proxy_client.py @@ -20,7 +20,11 @@ class LocalProxyClientProvider(BaseProxyClient): """Default proxy client provider.""" - CONFIG_NAME = "LocalProxyClientProvider" + CONFIG_NAME = "LocalProxyClient" + + def __init__(self, credentials_profile: str = "default", **kwargs: object) -> None: + super().__init__() + self.credentials_profile = credentials_profile def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" From 9b64711c089b29fed9421e95066a244b9ec6d29f Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Wed, 11 Feb 2026 16:11:17 -0800 Subject: [PATCH 2/5] remove local proxy client, just client --- src/datacustomcode/__init__.py | 2 +- src/datacustomcode/client.py | 47 ++++++++++--------- src/datacustomcode/config.yaml | 2 +- .../proxy/client/local_proxy_client.py | 30 ------------ 4 files changed, 27 insertions(+), 54 deletions(-) delete mode 100644 src/datacustomcode/proxy/client/local_proxy_client.py diff --git a/src/datacustomcode/__init__.py b/src/datacustomcode/__init__.py index fdb0679..c3b6309 100644 --- a/src/datacustomcode/__init__.py +++ b/src/datacustomcode/__init__.py @@ -17,7 +17,7 @@ from datacustomcode.credentials import AuthType, Credentials from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader from datacustomcode.io.writer.print import PrintDataCloudWriter -from datacustomcode.proxy.client.local_proxy_client import LocalProxyClientProvider +from datacustomcode.proxy.client.client import LocalProxyClientProvider __all__ = [ "AuthType", diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 32d907e..5ad04a9 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -15,7 +15,7 @@ from __future__ import annotations from enum import Enum -import pprint +import importlib from typing import ( TYPE_CHECKING, ClassVar, @@ -110,6 +110,7 @@ class Client: _file: DefaultFindFilePath _proxy: Optional[BaseProxyClient] _data_layer_history: dict[DataCloudObjectType, set[str]] + _code_type: str def __new__( cls, @@ -117,31 +118,23 @@ def __new__( writer: Optional["BaseDataCloudWriter"] = None, proxy: Optional[BaseProxyClient] = None, spark_provider: Optional["BaseSparkSessionProvider"] = None, + code_type: str = "script", ) -> Client: - print("Chuy client start 2 config:") - pprint.pprint(str(config), indent=4) + print(f"Chuy client new client: {code_type}") + if "function" in code_type: + print("Chuy111 client new function client") + return cls._new_function_client() if cls._instance is None: cls._instance = super().__new__(cls) - - print("Chuy client here") - # Initialize Readers and Writers from config # and/or provided reader and writer if reader is None or writer is None: # We need a spark because we will initialize readers and writers if config.spark_config is None: - # Assume BYOC Function - # cls._instance._reader = None - # cls._instance._writer = None - cls._instance._file = DefaultFindFilePath() - # cls._instance._data_layer_history = None - cls._instance._proxy = ( - config.proxy_config.to_object() # type: ignore - if config.proxy_config is not None - else None + raise ValueError( + "Spark config is required when reader/writer is not provided" ) - return cls._instance provider: BaseSparkSessionProvider if spark_provider is not None: @@ -173,15 +166,9 @@ def __new__( writer_init = config.writer_config.to_object(spark) # type: ignore else: writer_init = writer - proxy_init: Optional["BaseProxyClient"] = None - if proxy is not None: - proxy_init = proxy - elif config.proxy_config is not None: - proxy_init = config.proxy_config.to_object() # type: ignore cls._instance._reader = reader_init cls._instance._writer = writer_init - cls._instance._proxy = proxy_init cls._instance._file = DefaultFindFilePath() cls._instance._data_layer_history = { DataCloudObjectType.DLO: set(), @@ -191,6 +178,22 @@ def __new__( raise ValueError("Cannot set reader or writer after client is initialized") return cls._instance + @classmethod + def _new_function_client(cls) -> Client: + print(f"Chuy config: {config}") + + importlib.import_module( + "datacustomcoderemote.proxy.client.client.ProxyClientProvider" + ) + + cls._instance = super().__new__(cls) + cls._instance._proxy = ( + config.proxy_config.to_object() # type: ignore + if config.proxy_config is not None + else None + ) + return cls._instance + def read_dlo(self, name: str) -> PySparkDataFrame: """Read a DLO from Data Cloud. diff --git a/src/datacustomcode/config.yaml b/src/datacustomcode/config.yaml index 190d0b7..0267b6f 100644 --- a/src/datacustomcode/config.yaml +++ b/src/datacustomcode/config.yaml @@ -19,6 +19,6 @@ spark_config: spark.driver.extraJavaOptions: -Djava.security.manager=allow proxy_config: - type_config_name: LocalProxyClient + type_config_name: LocalProxyClientProvider options: credentials_profile: default diff --git a/src/datacustomcode/proxy/client/local_proxy_client.py b/src/datacustomcode/proxy/client/local_proxy_client.py deleted file mode 100644 index 6156fe9..0000000 --- a/src/datacustomcode/proxy/client/local_proxy_client.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2025, Salesforce, Inc. -# SPDX-License-Identifier: Apache-2 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -from datacustomcode.proxy.client.base import BaseProxyClient - - -class LocalProxyClientProvider(BaseProxyClient): - """Default proxy client provider.""" - - CONFIG_NAME = "LocalProxyClient" - - def __init__(self, credentials_profile: str = "default", **kwargs: object) -> None: - super().__init__() - self.credentials_profile = credentials_profile - - def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: - return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" From 0e2a2f132dd17d4fb012ba916e233ff4f90846c5 Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Thu, 12 Feb 2026 13:38:39 -0800 Subject: [PATCH 3/5] finally working --- src/datacustomcode/__init__.py | 4 ++- src/datacustomcode/client.py | 20 ++++++++----- src/datacustomcode/config.py | 8 +++++ .../proxy/client/LocalProxyClientProvider.py | 30 +++++++++++++++++++ src/datacustomcode/run.py | 13 ++++---- 5 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 src/datacustomcode/proxy/client/LocalProxyClientProvider.py diff --git a/src/datacustomcode/__init__.py b/src/datacustomcode/__init__.py index c3b6309..2662e74 100644 --- a/src/datacustomcode/__init__.py +++ b/src/datacustomcode/__init__.py @@ -17,7 +17,9 @@ from datacustomcode.credentials import AuthType, Credentials from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader from datacustomcode.io.writer.print import PrintDataCloudWriter -from datacustomcode.proxy.client.client import LocalProxyClientProvider +from datacustomcode.proxy.client.LocalProxyClientProvider import ( + LocalProxyClientProvider, +) __all__ = [ "AuthType", diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 5ad04a9..da309a2 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -120,9 +120,7 @@ def __new__( spark_provider: Optional["BaseSparkSessionProvider"] = None, code_type: str = "script", ) -> Client: - print(f"Chuy client new client: {code_type}") if "function" in code_type: - print("Chuy111 client new function client") return cls._new_function_client() if cls._instance is None: @@ -180,11 +178,19 @@ def __new__( @classmethod def _new_function_client(cls) -> Client: - print(f"Chuy config: {config}") - - importlib.import_module( - "datacustomcoderemote.proxy.client.client.ProxyClientProvider" - ) + for dependency in config.dependencies: + try: + importlib.import_module(dependency) + except ModuleNotFoundError as exc: + try: + if "." in dependency: + module_name, object_name = dependency.rsplit(".", 1) + module = importlib.import_module(module_name) + getattr(module, object_name) + else: + raise exc + except AttributeError as inner_exc: + raise inner_exc from exc cls._instance = super().__new__(cls) cls._instance._proxy = ( diff --git a/src/datacustomcode/config.py b/src/datacustomcode/config.py index 602e182..5fad70a 100644 --- a/src/datacustomcode/config.py +++ b/src/datacustomcode/config.py @@ -133,6 +133,13 @@ class ClientConfig(BaseModel): spark_provider_config: Union[ SparkProviderConfig[BaseSparkSessionProvider], None ] = None + dependencies: list[str] = Field( + default_factory=list, + description=""" + Extra modules to import before running the entrypoint + (merged with --dependencies from CLI). + """, + ) def update(self, other: ClientConfig) -> ClientConfig: """Merge this ClientConfig with another, respecting force flags. @@ -161,6 +168,7 @@ def merge( self.spark_provider_config = merge( self.spark_provider_config, other.spark_provider_config ) + self.dependencies = list(dict.fromkeys(self.dependencies + other.dependencies)) return self def load(self, config_path: str) -> ClientConfig: diff --git a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py new file mode 100644 index 0000000..34856fd --- /dev/null +++ b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from datacustomcode.proxy.client.base import BaseProxyClient + + +class LocalProxyClientProvider(BaseProxyClient): + """Default proxy client provider.""" + + CONFIG_NAME = "LocalProxyClientProvider" + + def __init__(self, credentials_profile: str = "default", **kwargs: object) -> None: + super().__init__() + self.credentials_profile = credentials_profile + + def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: + return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py index 0e4e0ff..5f0beab 100644 --- a/src/datacustomcode/run.py +++ b/src/datacustomcode/run.py @@ -52,6 +52,13 @@ def run_entrypoint( """ add_py_folder(entrypoint) + # Load config file (so we can merge config.dependencies with CLI deps) + if config_file: + config.load(config_file) + + # Merge dependencies from config and CLI (config first, then CLI, deduped) + merged_dependencies = list(dict.fromkeys(config.dependencies + list(dependencies))) + # Read dataspace from config.json (required) entrypoint_dir = os.path.dirname(entrypoint) config_json_path = os.path.join(entrypoint_dir, "config.json") @@ -81,10 +88,6 @@ def run_entrypoint( f"Please ensure config.json contains a 'dataspace' field." ) - # Load config file first - if config_file: - config.load(config_file) - # Add dataspace to reader and writer config options _set_config_option(config.reader_config, "dataspace", dataspace) _set_config_option(config.writer_config, "dataspace", dataspace) @@ -92,7 +95,7 @@ def run_entrypoint( if profile != "default": _set_config_option(config.reader_config, "credentials_profile", profile) _set_config_option(config.writer_config, "credentials_profile", profile) - for dependency in dependencies: + for dependency in merged_dependencies: try: importlib.import_module(dependency) except ModuleNotFoundError as exc: From aa03c6a6d6f7bc42506b2de472cd8c637e4ad943 Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Thu, 12 Feb 2026 14:44:33 -0800 Subject: [PATCH 4/5] unecessary --- src/datacustomcode/proxy/client/LocalProxyClientProvider.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py index 34856fd..515db00 100644 --- a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py +++ b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py @@ -22,9 +22,8 @@ class LocalProxyClientProvider(BaseProxyClient): CONFIG_NAME = "LocalProxyClientProvider" - def __init__(self, credentials_profile: str = "default", **kwargs: object) -> None: - super().__init__() - self.credentials_profile = credentials_profile + def __init__(self, **kwargs: object) -> None: + pass def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" From 92a0c502ab9ff92ed67dadeeaf5f1982464343ef Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Fri, 13 Feb 2026 14:22:16 -0800 Subject: [PATCH 5/5] unwind some recent unecessary changes --- src/datacustomcode/client.py | 15 --------------- src/datacustomcode/config.py | 8 -------- src/datacustomcode/run.py | 13 +++++-------- 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index da309a2..01aed31 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -15,7 +15,6 @@ from __future__ import annotations from enum import Enum -import importlib from typing import ( TYPE_CHECKING, ClassVar, @@ -178,20 +177,6 @@ def __new__( @classmethod def _new_function_client(cls) -> Client: - for dependency in config.dependencies: - try: - importlib.import_module(dependency) - except ModuleNotFoundError as exc: - try: - if "." in dependency: - module_name, object_name = dependency.rsplit(".", 1) - module = importlib.import_module(module_name) - getattr(module, object_name) - else: - raise exc - except AttributeError as inner_exc: - raise inner_exc from exc - cls._instance = super().__new__(cls) cls._instance._proxy = ( config.proxy_config.to_object() # type: ignore diff --git a/src/datacustomcode/config.py b/src/datacustomcode/config.py index 5fad70a..602e182 100644 --- a/src/datacustomcode/config.py +++ b/src/datacustomcode/config.py @@ -133,13 +133,6 @@ class ClientConfig(BaseModel): spark_provider_config: Union[ SparkProviderConfig[BaseSparkSessionProvider], None ] = None - dependencies: list[str] = Field( - default_factory=list, - description=""" - Extra modules to import before running the entrypoint - (merged with --dependencies from CLI). - """, - ) def update(self, other: ClientConfig) -> ClientConfig: """Merge this ClientConfig with another, respecting force flags. @@ -168,7 +161,6 @@ def merge( self.spark_provider_config = merge( self.spark_provider_config, other.spark_provider_config ) - self.dependencies = list(dict.fromkeys(self.dependencies + other.dependencies)) return self def load(self, config_path: str) -> ClientConfig: diff --git a/src/datacustomcode/run.py b/src/datacustomcode/run.py index 5f0beab..0e4e0ff 100644 --- a/src/datacustomcode/run.py +++ b/src/datacustomcode/run.py @@ -52,13 +52,6 @@ def run_entrypoint( """ add_py_folder(entrypoint) - # Load config file (so we can merge config.dependencies with CLI deps) - if config_file: - config.load(config_file) - - # Merge dependencies from config and CLI (config first, then CLI, deduped) - merged_dependencies = list(dict.fromkeys(config.dependencies + list(dependencies))) - # Read dataspace from config.json (required) entrypoint_dir = os.path.dirname(entrypoint) config_json_path = os.path.join(entrypoint_dir, "config.json") @@ -88,6 +81,10 @@ def run_entrypoint( f"Please ensure config.json contains a 'dataspace' field." ) + # Load config file first + if config_file: + config.load(config_file) + # Add dataspace to reader and writer config options _set_config_option(config.reader_config, "dataspace", dataspace) _set_config_option(config.writer_config, "dataspace", dataspace) @@ -95,7 +92,7 @@ def run_entrypoint( if profile != "default": _set_config_option(config.reader_config, "credentials_profile", profile) _set_config_option(config.writer_config, "credentials_profile", profile) - for dependency in merged_dependencies: + for dependency in dependencies: try: importlib.import_module(dependency) except ModuleNotFoundError as exc: