From e48af7204257889a063c5f3cbcf87b50ac9b2df3 Mon Sep 17 00:00:00 2001
From: Samanvya Tripathi <tripathisamanvya@gmail.com>
Date: Wed, 25 Mar 2026 21:28:59 -0400
Subject: [PATCH] fix(eval): copy preset list to prevent aliasing mutation

The preset resolver assigned BENCHMARK_PRESETS[key] directly to
self.tasks, so mutating config.tasks corrupted the shared preset
dict for all future configs in the same process. Now copies with
list().

Fixes #31
---
 src/alignrl/eval.py | 4 ++--
 tests/test_eval.py  | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/alignrl/eval.py b/src/alignrl/eval.py
index 85170eb..750565f 100644
--- a/src/alignrl/eval.py
+++ b/src/alignrl/eval.py
@@ -48,9 +48,9 @@ def _resolve_preset(self) -> EvalConfig:
                 raise ValueError(
                     f"Unknown preset {self.preset!r}. Available: {', '.join(BENCHMARK_PRESETS)}"
                 )
-            self.tasks = BENCHMARK_PRESETS[self.preset]
+            self.tasks = list(BENCHMARK_PRESETS[self.preset])
         else:
-            self.tasks = BENCHMARK_PRESETS["core"]
+            self.tasks = list(BENCHMARK_PRESETS["core"])
         return self
 
 
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 531ea89..9d5481f 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -47,6 +47,12 @@ def test_unknown_preset_raises(self) -> None:
         with pytest.raises(ValueError, match="Unknown preset"):
             EvalConfig(preset="nonexistent")
 
+    def test_preset_tasks_not_aliased_to_shared_dict(self) -> None:
+        cfg = EvalConfig(preset="reasoning")
+        original = list(BENCHMARK_PRESETS["reasoning"])
+        cfg.tasks.append("should_not_leak")
+        assert BENCHMARK_PRESETS["reasoning"] == original
+
 
 class TestParseResults:
     def test_parses_lm_eval_output(self) -> None: