diff --git a/src/alignrl/eval.py b/src/alignrl/eval.py index 85170eb..750565f 100644 --- a/src/alignrl/eval.py +++ b/src/alignrl/eval.py @@ -48,9 +48,9 @@ def _resolve_preset(self) -> EvalConfig: raise ValueError( f"Unknown preset {self.preset!r}. Available: {', '.join(BENCHMARK_PRESETS)}" ) - self.tasks = BENCHMARK_PRESETS[self.preset] + self.tasks = list(BENCHMARK_PRESETS[self.preset]) else: - self.tasks = BENCHMARK_PRESETS["core"] + self.tasks = list(BENCHMARK_PRESETS["core"]) return self diff --git a/tests/test_eval.py b/tests/test_eval.py index 531ea89..9d5481f 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -47,6 +47,12 @@ def test_unknown_preset_raises(self) -> None: with pytest.raises(ValueError, match="Unknown preset"): EvalConfig(preset="nonexistent") + def test_preset_tasks_not_aliased_to_shared_dict(self) -> None: + cfg = EvalConfig(preset="reasoning") + original = list(BENCHMARK_PRESETS["reasoning"]) + cfg.tasks.append("should_not_leak") + assert BENCHMARK_PRESETS["reasoning"] == original + class TestParseResults: def test_parses_lm_eval_output(self) -> None: