From a7e7482ced1a7c88314636f37c314aa999726dfe Mon Sep 17 00:00:00 2001 From: Cristian Pufu Date: Tue, 24 Feb 2026 09:54:18 +0200 Subject: [PATCH] feat: add evaluations UI with eval sets, runs, evaluators management - Add eval data models, service layer, and REST/WS routes for eval sets, runs, and evaluators - Add frontend eval pages: eval set detail with I/O and evaluators tabs, eval run results with score/I/O/logs tabs and trace tree - Add evaluators management with create/edit forms, category filtering, and card-based layout - Restructure sidebar: shared header with activity bar, section-specific content panels - Add resizable split-pane layout with slide in/out animations and drag handle - Route-driven item selection for eval runs (#/evals/runs/:id/:itemName) - Auto-select latest run or first eval set on navigation - Bump version to 0.0.61 Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 3 +- src/uipath/dev/models/eval_data.py | 125 +++ src/uipath/dev/server/__init__.py | 30 + src/uipath/dev/server/app.py | 4 + src/uipath/dev/server/frontend/src/App.tsx | 285 ++++- .../server/frontend/src/api/eval-client.ts | 112 ++ .../src/components/chat/ChatInput.tsx | 5 +- .../src/components/chat/ChatInterrupt.tsx | 21 +- .../src/components/chat/ChatMessage.tsx | 6 +- .../src/components/debug/DebugControls.tsx | 6 +- .../components/evals/CreateEvalSetView.tsx | 165 +++ .../src/components/evals/EvalRunResults.tsx | 680 ++++++++++++ .../src/components/evals/EvalSetDetail.tsx | 483 +++++++++ .../src/components/evals/EvalsSidebar.tsx | 141 +++ .../evaluators/CreateEvaluatorView.tsx | 272 +++++ .../components/evaluators/EvaluatorDetail.tsx | 559 ++++++++++ .../evaluators/EvaluatorsSidebar.tsx | 107 ++ .../src/components/layout/ActivityBar.tsx | 98 ++ .../src/components/layout/DebugSidebar.tsx | 63 ++ .../src/components/layout/SidePanel.tsx | 23 + .../src/components/layout/Sidebar.tsx | 4 +- .../src/components/layout/StatusBar.tsx | 14 +- .../src/components/runs/AddToEvalModal.tsx | 266 +++++ .../src/components/runs/NewRunPanel.tsx | 8 +- .../src/components/runs/RunDetailsPanel.tsx | 84 +- .../src/components/runs/RunHistoryItem.tsx | 133 ++- .../src/components/runs/SetupView.tsx | 18 +- .../src/components/shared/DataSection.tsx | 48 + .../src/components/shared/ToastContainer.tsx | 58 + .../src/components/traces/SpanDetails.tsx | 16 +- .../src/components/traces/TraceTree.tsx | 35 +- .../server/frontend/src/hooks/useHashRoute.ts | 113 +- .../server/frontend/src/store/useEvalStore.ts | 109 ++ .../frontend/src/store/useToastStore.ts | 30 + .../server/frontend/src/store/useWebSocket.ts | 28 +- .../dev/server/frontend/src/styles/global.css | 98 +- .../dev/server/frontend/src/types/eval.ts | 70 ++ .../dev/server/frontend/src/types/ws.ts | 12 +- .../dev/server/frontend/tsconfig.tsbuildinfo | 2 +- src/uipath/dev/server/routes/evals.py | 132 +++ src/uipath/dev/server/routes/evaluators.py | 327 ++++++ src/uipath/dev/server/serializers.py | 29 +- ...anel-CeP3-CFA.js => ChatPanel-DL8mXt7H.js} | 6 +- .../server/static/assets/index-BL2n_TWc.css | 1 - .../server/static/assets/index-CQlfl4ed.js | 42 - .../server/static/assets/index-DigEqtwO.css | 1 + .../server/static/assets/index-XG1MkD32.js | 72 ++ src/uipath/dev/server/static/index.html | 4 +- src/uipath/dev/server/ws/manager.py | 47 + src/uipath/dev/server/ws/protocol.py | 3 + src/uipath/dev/services/eval_service.py | 430 ++++++++ src/uipath/dev/services/run_service.py | 35 +- uv.lock | 988 ++++++++++++++++-- 53 files changed, 6082 insertions(+), 369 deletions(-) create mode 100644 src/uipath/dev/models/eval_data.py create mode 100644 src/uipath/dev/server/frontend/src/api/eval-client.ts create mode 100644 src/uipath/dev/server/frontend/src/components/evals/CreateEvalSetView.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/EvaluatorDetail.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/evaluators/EvaluatorsSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/ActivityBar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/DebugSidebar.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/layout/SidePanel.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/runs/AddToEvalModal.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/shared/DataSection.tsx create mode 100644 src/uipath/dev/server/frontend/src/components/shared/ToastContainer.tsx create mode 100644 src/uipath/dev/server/frontend/src/store/useEvalStore.ts create mode 100644 src/uipath/dev/server/frontend/src/store/useToastStore.ts create mode 100644 src/uipath/dev/server/frontend/src/types/eval.ts create mode 100644 src/uipath/dev/server/routes/evals.py create mode 100644 src/uipath/dev/server/routes/evaluators.py rename src/uipath/dev/server/static/assets/{ChatPanel-CeP3-CFA.js => ChatPanel-DL8mXt7H.js} (96%) delete mode 100644 src/uipath/dev/server/static/assets/index-BL2n_TWc.css delete mode 100644 src/uipath/dev/server/static/assets/index-CQlfl4ed.js create mode 100644 src/uipath/dev/server/static/assets/index-DigEqtwO.css create mode 100644 src/uipath/dev/server/static/assets/index-XG1MkD32.js create mode 100644 src/uipath/dev/services/eval_service.py diff --git a/pyproject.toml b/pyproject.toml index b33bfdf..636ed25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath-dev" -version = "0.0.60" +version = "0.0.61" description = "UiPath Developer Console" readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" @@ -10,6 +10,7 @@ dependencies = [ "pyperclip>=1.11.0, <2.0.0", "fastapi>=0.128.8", "uvicorn[standard]>=0.40.0", + "uipath" ] classifiers = [ "Intended Audience :: Developers", diff --git a/src/uipath/dev/models/eval_data.py b/src/uipath/dev/models/eval_data.py new file mode 100644 index 0000000..e592d83 --- /dev/null +++ b/src/uipath/dev/models/eval_data.py @@ -0,0 +1,125 @@ +"""Data models for evaluation runs.""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + + +@dataclass +class EvalSetInfo: + """Summary of a discovered evaluation set.""" + + id: str + name: str + eval_count: int + evaluator_ids: list[str] + + +@dataclass +class EvalItemResult: + """Result of evaluating a single item.""" + + name: str + inputs: dict[str, Any] = field(default_factory=dict) + expected_output: Any = None + scores: dict[str, float] = field(default_factory=dict) + overall_score: float = 0.0 + output: Any = None + justifications: dict[str, str] = field(default_factory=dict) + duration_ms: float | None = None + status: str = "pending" # pending | running | completed | failed + traces: list[dict[str, Any]] = field(default_factory=list) + + +@dataclass +class EvalRunState: + """Full state of an eval run.""" + + id: str = field(default_factory=lambda: str(uuid.uuid4())) + eval_set_id: str = "" + eval_set_name: str = "" + status: str = "pending" # pending | running | completed | failed + progress_completed: int = 0 + progress_total: int = 0 + overall_score: float | None = None + evaluator_scores: dict[str, float] = field(default_factory=dict) + results: list[EvalItemResult] = field(default_factory=list) + start_time: datetime | None = None + end_time: datetime | None = None + + def to_summary(self) -> dict[str, Any]: + """Serialize to summary dict (no per-item results).""" + return { + "id": self.id, + "eval_set_id": self.eval_set_id, + "eval_set_name": self.eval_set_name, + "status": self.status, + "progress_completed": self.progress_completed, + "progress_total": self.progress_total, + "overall_score": self.overall_score, + "evaluator_scores": self.evaluator_scores, + "start_time": self.start_time.isoformat() if self.start_time else None, + "end_time": self.end_time.isoformat() if self.end_time else None, + } + + def to_detail(self) -> dict[str, Any]: + """Serialize to detail dict (includes per-item results).""" + base = self.to_summary() + base["results"] = [ + { + "name": r.name, + "inputs": r.inputs, + "expected_output": r.expected_output, + "scores": r.scores, + "overall_score": r.overall_score, + "output": str(r.output) + if isinstance(r.output, Exception) + else r.output, + "justifications": r.justifications, + "duration_ms": r.duration_ms, + "status": r.status, + "traces": r.traces, + } + for r in self.results + ] + return base + + def start(self) -> None: + """Mark run as started.""" + self.status = "running" + self.start_time = datetime.now(timezone.utc) + + def complete(self) -> None: + """Mark run as completed, computing final scores.""" + self.status = "completed" + self.end_time = datetime.now(timezone.utc) + self._compute_scores() + + def fail(self) -> None: + """Mark run as failed.""" + self.status = "failed" + self.end_time = datetime.now(timezone.utc) + + def _compute_scores(self) -> None: + """Compute overall and per-evaluator scores from item results.""" + completed = [r for r in self.results if r.status == "completed"] + if not completed: + self.overall_score = 0.0 + return + + # Per-evaluator averages + evaluator_totals: dict[str, list[float]] = {} + for r in completed: + for ev_id, score in r.scores.items(): + evaluator_totals.setdefault(ev_id, []).append(score) + + self.evaluator_scores = { + ev_id: sum(scores) / len(scores) + for ev_id, scores in evaluator_totals.items() + } + + # Overall = average of item overall_scores + self.overall_score = sum(r.overall_score for r in completed) / len(completed) diff --git a/src/uipath/dev/server/__init__.py b/src/uipath/dev/server/__init__.py index 2e3ba31..f34e90a 100644 --- a/src/uipath/dev/server/__init__.py +++ b/src/uipath/dev/server/__init__.py @@ -24,8 +24,10 @@ StateData, TraceData, ) +from uipath.dev.models.eval_data import EvalItemResult, EvalRunState from uipath.dev.models.execution import ExecutionRun from uipath.dev.server.debug_bridge import WebDebugBridge +from uipath.dev.services.eval_service import EvalService from uipath.dev.services.run_service import RunService logger = logging.getLogger(__name__) @@ -86,6 +88,14 @@ def __init__( on_run_removed=self.connection_manager.remove_run_subscriptions, ) + self.eval_service = EvalService( + runtime_factory=self.runtime_factory, + trace_manager=self.trace_manager, + on_eval_run_created=self._on_eval_run_created, + on_eval_run_progress=self._on_eval_run_progress, + on_eval_run_completed=self._on_eval_run_completed, + ) + def create_app(self) -> Any: """Create and return a FastAPI application.""" from uipath.dev.server.app import create_app @@ -231,6 +241,26 @@ def _on_state(self, state_data: StateData) -> None: """Broadcast state transition to subscribed WebSocket clients.""" self.connection_manager.broadcast_state(state_data) + def _on_eval_run_created(self, run: EvalRunState) -> None: + """Broadcast eval run created to all connected clients.""" + self.connection_manager.broadcast_eval_run_created(run) + + def _on_eval_run_progress( + self, + run_id: str, + completed: int, + total: int, + item_result: EvalItemResult | None, + ) -> None: + """Broadcast eval run progress to all connected clients.""" + self.connection_manager.broadcast_eval_run_progress( + run_id, completed, total, item_result + ) + + def _on_eval_run_completed(self, run: EvalRunState) -> None: + """Broadcast eval run completed to all connected clients.""" + self.connection_manager.broadcast_eval_run_completed(run) + @staticmethod def _find_free_port(host: str, start_port: int, max_attempts: int = 100) -> int: """Find a free port starting from *start_port*. diff --git a/src/uipath/dev/server/app.py b/src/uipath/dev/server/app.py index d608338..209a801 100644 --- a/src/uipath/dev/server/app.py +++ b/src/uipath/dev/server/app.py @@ -150,6 +150,8 @@ async def _config(): # Register routes from uipath.dev.server.routes.entrypoints import router as entrypoints_router + from uipath.dev.server.routes.evals import router as evals_router + from uipath.dev.server.routes.evaluators import router as evaluators_router from uipath.dev.server.routes.graph import router as graph_router from uipath.dev.server.routes.reload import router as reload_router from uipath.dev.server.routes.runs import router as runs_router @@ -166,6 +168,8 @@ async def _config(): app.include_router(runs_router, prefix="/api") app.include_router(graph_router, prefix="/api") app.include_router(reload_router, prefix="/api") + app.include_router(evaluators_router, prefix="/api") + app.include_router(evals_router, prefix="/api") app.include_router(ws_router) # Auto-build frontend if source is available and build is stale diff --git a/src/uipath/dev/server/frontend/src/App.tsx b/src/uipath/dev/server/frontend/src/App.tsx index 8249df3..9e37a5b 100644 --- a/src/uipath/dev/server/frontend/src/App.tsx +++ b/src/uipath/dev/server/frontend/src/App.tsx @@ -6,13 +6,25 @@ import { useWebSocket } from "./store/useWebSocket"; import { listRuns, listEntrypoints, getRun } from "./api/client"; import type { RunDetail } from "./types/run"; import { useHashRoute } from "./hooks/useHashRoute"; +import type { Section } from "./hooks/useHashRoute"; import { useIsMobile } from "./hooks/useIsMobile"; -import Sidebar from "./components/layout/Sidebar"; +import ActivityBar from "./components/layout/ActivityBar"; +import DebugSidebar from "./components/layout/DebugSidebar"; import StatusBar from "./components/layout/StatusBar"; import NewRunPanel from "./components/runs/NewRunPanel"; import SetupView from "./components/runs/SetupView"; import RunDetailsPanel from "./components/runs/RunDetailsPanel"; import ReloadToast from "./components/shared/ReloadToast"; +import ToastContainer from "./components/shared/ToastContainer"; +import { useEvalStore } from "./store/useEvalStore"; +import { listEvalSets, listEvaluators, listEvalRuns, listLocalEvaluators } from "./api/eval-client"; +import EvalsSidebar from "./components/evals/EvalsSidebar"; +import EvalSetDetail from "./components/evals/EvalSetDetail"; +import EvalRunResults from "./components/evals/EvalRunResults"; +import CreateEvalSetView from "./components/evals/CreateEvalSetView"; +import EvaluatorsSidebar from "./components/evaluators/EvaluatorsSidebar"; +import EvaluatorsView from "./components/evaluators/EvaluatorDetail"; +import CreateEvaluatorView from "./components/evaluators/CreateEvaluatorView"; export default function App() { const ws = useWebSocket(); @@ -33,14 +45,30 @@ export default function App() { setActiveNode, removeActiveNode, } = useRunStore(); - const { view, runId: routeRunId, setupEntrypoint, setupMode, navigate } = useHashRoute(); + const { + section, + view, + runId: routeRunId, + setupEntrypoint, + setupMode, + evalCreating, + evalSetId, + evalRunId, + evalRunItemName, + evaluatorCreateType, + evaluatorId, + evaluatorFilter, + navigate, + } = useHashRoute(); + + const { setEvalSets, setEvaluators, setLocalEvaluators, setEvalRuns } = useEvalStore(); // Sync route runId → store selection useEffect(() => { - if (view === "details" && routeRunId && routeRunId !== selectedRunId) { + if (section === "debug" && view === "details" && routeRunId && routeRunId !== selectedRunId) { selectRun(routeRunId); } - }, [view, routeRunId, selectedRunId, selectRun]); + }, [section, view, routeRunId, selectedRunId, selectRun]); // Load existing runs, entrypoints, auth status, and config on mount const initAuth = useAuthStore((s) => s.init); @@ -54,6 +82,49 @@ export default function App() { initConfig(); }, [setRuns, setEntrypoints, initAuth, initConfig]); + // Load eval data when switching to evals/evaluators section + useEffect(() => { + if (section === "evals") { + listEvalSets().then((sets) => setEvalSets(sets)).catch(console.error); + listEvalRuns().then((runs) => setEvalRuns(runs)).catch(console.error); + } + if (section === "evals" || section === "evaluators") { + listEvaluators().then((evs) => setEvaluators(evs)).catch(console.error); + listLocalEvaluators().then((evs) => setLocalEvaluators(evs)).catch(console.error); + } + }, [section, setEvalSets, setEvaluators, setLocalEvaluators, setEvalRuns]); + + // Auto-select latest run or first eval set when navigating to evals with no selection + const evalSets = useEvalStore((s) => s.evalSets); + const evalRuns = useEvalStore((s) => s.evalRuns); + useEffect(() => { + if (section !== "evals" || evalCreating || evalSetId || evalRunId) return; + // Pick latest run by start_time + const runs = Object.values(evalRuns).sort( + (a, b) => new Date(b.start_time ?? 0).getTime() - new Date(a.start_time ?? 0).getTime(), + ); + if (runs.length > 0) { + navigate(`#/evals/runs/${runs[0].id}`); + return; + } + // Fallback: first eval set + const sets = Object.values(evalSets); + if (sets.length > 0) { + navigate(`#/evals/sets/${sets[0].id}`); + } + }, [section, evalCreating, evalSetId, evalRunId, evalRuns, evalSets, navigate]); + + // Keyboard shortcuts + useEffect(() => { + const onKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape" && sidebarOpen) { + setSidebarOpen(false); + } + }; + window.addEventListener("keydown", onKeyDown); + return () => window.removeEventListener("keydown", onKeyDown); + }, [sidebarOpen]); + const selectedRun = selectedRunId ? runs[selectedRunId] : null; // Shared helper: apply a full run detail response to the store @@ -169,70 +240,188 @@ export default function App() { }, [selectedRunId, selectedRun?.status, applyRunDetail]); const handleRunCreated = (runId: string) => { - navigate(`#/runs/${runId}/traces`); + navigate(`#/debug/runs/${runId}/traces`); selectRun(runId); setSidebarOpen(false); }; const handleSelectRun = (runId: string) => { - navigate(`#/runs/${runId}/traces`); + navigate(`#/debug/runs/${runId}/traces`); selectRun(runId); setSidebarOpen(false); }; const handleNewRun = () => { - navigate("#/new"); + navigate("#/debug/new"); setSidebarOpen(false); }; - return ( -
-
- {/* Mobile hamburger button */} - {isMobile && !sidebarOpen && ( - - )} - { + if (s === "debug") navigate("#/debug/new"); + else if (s === "evals") navigate("#/evals"); + else if (s === "evaluators") navigate("#/evaluators"); + }; + + // --- Render main content based on section --- + const renderMainContent = () => { + if (section === "evals") { + if (evalCreating) return ; + if (evalRunId) return ; + if (evalSetId) return ; + return ; + } + + if (section === "evaluators") { + if (evaluatorCreateType) { + return ; + } + return ; + } + + // Debug section + if (view === "new") { + return ; + } + if (view === "setup" && setupEntrypoint && setupMode) { + return ( + setSidebarOpen(false)} /> -
- {view === "new" ? ( - - ) : view === "setup" && setupEntrypoint && setupMode ? ( - - ) : selectedRun ? ( - - ) : ( -
- Select a run or create a new one -
+ ); + } + if (selectedRun) { + return ; + } + return ( +
+ Select a run or create a new one +
+ ); + }; + + // --- Mobile layout --- + if (isMobile) { + return ( +
+
+ {!sidebarOpen && ( + + )} + {sidebarOpen && ( + <> +
setSidebarOpen(false)} + /> + + )} +
+ {renderMainContent()} +
+
+ + + +
+ ); + } + + // --- Desktop layout --- + return ( +
+
+ {/* Left aside: shared header + ActivityBar + section sidebar */} + +
+ {renderMainContent()}
+
); } diff --git a/src/uipath/dev/server/frontend/src/api/eval-client.ts b/src/uipath/dev/server/frontend/src/api/eval-client.ts new file mode 100644 index 0000000..63089b7 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/api/eval-client.ts @@ -0,0 +1,112 @@ +import type { EvaluatorInfo, LocalEvaluator, EvalSetSummary, EvalSetDetail, EvalItem, EvalRunSummary, EvalRunDetail } from "../types/eval"; + +const BASE = "/api"; + +async function fetchJson(url: string, options?: RequestInit): Promise { + const res = await fetch(url, options); + if (!res.ok) { + let errorDetail; + try { + const body = await res.json(); + errorDetail = body.detail || res.statusText; + } catch { + errorDetail = res.statusText; + } + const error = new Error(`HTTP ${res.status}`); + (error as any).detail = errorDetail; + (error as any).status = res.status; + throw error; + } + return res.json(); +} + +export async function listEvaluators(): Promise { + return fetchJson(`${BASE}/evaluators`); +} + +export async function listEvalSets(): Promise { + return fetchJson(`${BASE}/eval-sets`); +} + +export async function createEvalSet(body: { + name: string; + evaluator_refs: string[]; +}): Promise { + return fetchJson(`${BASE}/eval-sets`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} + +export async function addEvalItem( + evalSetId: string, + item: { name: string; inputs: Record; expected_output: unknown }, +): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/items`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(item), + }); +} + +export async function getEvalSet(id: string): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(id)}`); +} + +export async function startEvalRun(evalSetId: string): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/runs`, { + method: "POST", + }); +} + +export async function listEvalRuns(): Promise { + return fetchJson(`${BASE}/eval-runs`); +} + +export async function getEvalRun(id: string): Promise { + return fetchJson(`${BASE}/eval-runs/${encodeURIComponent(id)}`); +} + +export async function listLocalEvaluators(): Promise { + return fetchJson(`${BASE}/local-evaluators`); +} + +export async function createLocalEvaluator(body: { + name: string; + description: string; + evaluator_type_id: string; + config: Record; +}): Promise { + return fetchJson(`${BASE}/local-evaluators`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} + +export async function updateEvalSetEvaluators( + evalSetId: string, + evaluatorRefs: string[], +): Promise { + return fetchJson(`${BASE}/eval-sets/${encodeURIComponent(evalSetId)}/evaluators`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ evaluator_refs: evaluatorRefs }), + }); +} + +export async function updateLocalEvaluator( + id: string, + body: { + description?: string; + evaluator_type_id?: string; + config?: Record; + }, +): Promise { + return fetchJson(`${BASE}/local-evaluators/${encodeURIComponent(id)}`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); +} diff --git a/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx b/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx index 55a3d72..95ec9e5 100644 --- a/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx +++ b/src/uipath/dev/server/frontend/src/components/chat/ChatInput.tsx @@ -36,13 +36,14 @@ export default function ChatInput({ onSend, disabled, placeholder }: Props) { onKeyDown={handleKeyDown} disabled={disabled} placeholder={placeholder ?? "Message..."} - className="flex-1 bg-transparent text-sm py-1 focus:outline-none disabled:opacity-40 placeholder:text-[var(--text-muted)]" + className="flex-1 bg-transparent text-sm py-1 disabled:opacity-40 placeholder:text-[var(--text-muted)]" style={{ color: "var(--text-primary)" }} /> +

+ ) : ( +
+ {localEvaluators.map((ev) => ( + + ))} +
+ )} +
+ + {/* Error */} + {error && ( +

{error}

+ )} + + {/* Create button */} + +
+
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx new file mode 100644 index 0000000..e1a334a --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalRunResults.tsx @@ -0,0 +1,680 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { getEvalRun } from "../../api/eval-client"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import type { EvalRunDetail, EvalItemResult } from "../../types/eval"; +import TraceTree from "../traces/TraceTree"; +import JsonHighlight from "../shared/JsonHighlight"; +import DataSection from "../shared/DataSection"; + +interface Props { + evalRunId: string; + itemName?: string | null; +} + +function formatScore(score: number | null): string { + if (score === null) return "-"; + return `${Math.round(score * 100)}%`; +} + +function scoreColor(score: number | null): string { + if (score === null) return "var(--text-muted)"; + const pct = score * 100; + if (pct >= 80) return "var(--success)"; + if (pct >= 50) return "var(--warning)"; + return "var(--error)"; +} + +function formatDuration(startTime: string | null, endTime: string | null): string { + if (!startTime) return "-"; + const start = new Date(startTime).getTime(); + const end = endTime ? new Date(endTime).getTime() : Date.now(); + const secs = Math.round((end - start) / 1000); + if (secs < 60) return `${secs}s`; + return `${Math.floor(secs / 60)}m ${secs % 60}s`; +} + +function stripEvaluatorSuffix(name: string): string { + return name.replace(/\s*Evaluator$/i, ""); +} + +const statusStyles: Record = { + pending: { color: "var(--text-muted)", bg: "var(--bg-tertiary)", label: "Pending" }, + running: { color: "var(--info)", bg: "rgba(59,130,246,0.1)", label: "Running" }, + completed: { color: "var(--success)", bg: "rgba(34,197,94,0.1)", label: "Completed" }, + failed: { color: "var(--error)", bg: "rgba(239,68,68,0.1)", label: "Failed" }, +}; + +export default function EvalRunResults({ evalRunId, itemName }: Props) { + const [detail, setDetail] = useState(null); + const [loading, setLoading] = useState(true); + const { navigate } = useHashRoute(); + + const selectedItemName = itemName ?? null; + + // Item list height (top panel, resizable like GraphPanel) + const [itemListHeight, setItemListHeight] = useState(220); + const containerRef = useRef(null); + const draggingRow = useRef(false); + + // Sidebar width (right panel, resizable like ChatPanel) + const [sidebarWidth, setSidebarWidth] = useState(() => { + const saved = localStorage.getItem("evalSidebarWidth"); + return saved ? parseInt(saved, 10) : 320; + }); + const [isDragging, setIsDragging] = useState(false); + const outerRef = useRef(null); + + useEffect(() => { + localStorage.setItem("evalSidebarWidth", String(sidebarWidth)); + }, [sidebarWidth]); + + const storeRun = useEvalStore((s) => s.evalRuns[evalRunId]); + const evaluators = useEvalStore((s) => s.evaluators); + + useEffect(() => { + setLoading(true); + getEvalRun(evalRunId) + .then((d) => { + setDetail(d); + // Auto-navigate to first item if none selected + if (!itemName) { + const first = d.results.find((r) => r.status === "completed") ?? d.results[0]; + if (first) navigate(`#/evals/runs/${evalRunId}/${encodeURIComponent(first.name)}`); + } + }) + .catch(console.error) + .finally(() => setLoading(false)); + }, [evalRunId]); + + // Re-fetch when store run status reaches terminal + useEffect(() => { + if (storeRun?.status === "completed" || storeRun?.status === "failed") { + getEvalRun(evalRunId).then(setDetail).catch(console.error); + } + }, [storeRun?.status, evalRunId]); + + // Auto-select first completed item as results come in (when no item is in route) + useEffect(() => { + if (itemName || !detail?.results) return; + const first = detail.results.find((r) => r.status === "completed") ?? detail.results[0]; + if (first) navigate(`#/evals/runs/${evalRunId}/${encodeURIComponent(first.name)}`); + }, [detail?.results]); + + // --- Row resize (item list height) --- + const onRowResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + draggingRow.current = true; + + const startY = "touches" in e ? e.touches[0].clientY : e.clientY; + const startH = itemListHeight; + + const onMove = (ev: MouseEvent | TouchEvent) => { + if (!draggingRow.current) return; + const container = containerRef.current; + if (!container) return; + const clientY = "touches" in ev ? ev.touches[0].clientY : ev.clientY; + const maxH = container.clientHeight - 100; + const newH = Math.max(80, Math.min(maxH, startH + (clientY - startY))); + setItemListHeight(newH); + }; + + const onUp = () => { + draggingRow.current = false; + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "row-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [itemListHeight]); + + // --- Sidebar col resize --- + const onSidebarResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDragging(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = sidebarWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const container = outerRef.current; + if (!container) return; + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + const maxW = container.clientWidth - 300; + const newW = Math.max(280, Math.min(maxW, startW + (startX - clientX))); + setSidebarWidth(newW); + }; + + const onUp = () => { + setIsDragging(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [sidebarWidth]); + + if (loading) { + return ( +
+ Loading... +
+ ); + } + + if (!detail) { + return ( +
+ Eval run not found +
+ ); + } + + const run = storeRun ?? detail; + const status = statusStyles[run.status] ?? statusStyles.pending; + const isRunning = run.status === "running"; + const evaluatorIds = Object.keys(run.evaluator_scores ?? {}); + const selectedItem = detail.results.find((r) => r.name === selectedItemName) ?? null; + const selectedTraces = (selectedItem?.traces ?? []).map((t) => ({ ...t, run_id: "" })); + + return ( +
+ {/* Main content: item list (top) + traces (bottom) */} +
+ {/* Header bar */} +
+

+ {run.eval_set_name} +

+ + {status.label} + + + {formatScore(run.overall_score)} + + + {formatDuration(run.start_time, run.end_time)} + + {isRunning && ( +
+
+
0 ? (run.progress_completed / run.progress_total) * 100 : 0}%`, + background: "var(--info)", + }} + /> +
+ + {run.progress_completed}/{run.progress_total} + +
+ )} + {/* Per-evaluator scores inline */} + {evaluatorIds.length > 0 && ( +
+ {evaluatorIds.map((id) => { + const ev = evaluators.find((e) => e.id === id); + const score = run.evaluator_scores[id]; + return ( +
+ + {stripEvaluatorSuffix(ev?.name ?? id)} + +
+
+
+ + {formatScore(score)} + +
+ ); + })} +
+ )} +
+ + {/* Item list (resizable height, like GraphPanel) */} +
+ {/* Table header */} +
+ + Name + Score + {evaluatorIds.map((id) => { + const ev = evaluators.find((e) => e.id === id); + return ( + {stripEvaluatorSuffix(ev?.name ?? id)} + ); + })} + Time +
+ {/* Scrollable item rows */} +
+ {detail.results.map((item: EvalItemResult) => { + const isPending = item.status === "pending"; + const isFailed = item.status === "failed"; + const isSelected = item.name === selectedItemName; + return ( + + ); + })} + {detail.results.length === 0 && ( +
+ {isRunning ? "Waiting for results..." : "No results"} +
+ )} +
+
+ + {/* Row drag handle */} +
+ + {/* Trace tree (bottom, flex-1, like TraceTree in debug) */} +
+ {selectedItem && selectedTraces.length > 0 ? ( + + ) : ( +
+ {selectedItem?.status === "pending" ? "Pending..." : "No traces available"} +
+ )} +
+
+ + {/* Sidebar drag handle */} +
+ + {/* Right sidebar */} + +
+ ); +} + +type DetailTab = "score" | "io" | "logs"; + +const detailTabs: { id: DetailTab; label: string }[] = [ + { id: "score", label: "Score" }, + { id: "io", label: "I/O" }, + { id: "logs", label: "Logs" }, +]; + +function DetailsSidebar({ + width, + item, + evaluators, + isRunning, + isDragging, +}: { + width: number; + item: EvalItemResult | null; + evaluators: { id: string; name: string }[]; + isRunning: boolean; + isDragging: boolean; +}) { + const [tab, setTab] = useState("score"); + + const showSidebar = !!item; + + return ( +
+ {/* Tab bar */} +
+ {detailTabs.map((t) => ( + + ))} + {isRunning && ( + + Running... + + )} +
+ + {/* Content */} +
+ {!item ? null : item.status === "pending" ? ( +
+ Pending... +
+ ) : tab === "score" ? ( + + ) : tab === "io" ? ( + + ) : ( +
+ Logs coming soon +
+ )} +
+
+ ); +} + +function ScoreTab({ item, evaluators }: { item: EvalItemResult; evaluators: { id: string; name: string }[] }) { + const evalIds = Object.keys(item.scores); + + return ( +
+ {/* Overall */} +
+
+ + Overall + +
+
+
+
+ + {formatScore(item.overall_score)} + +
+
+
+ + {/* Per-evaluator scores */} + {evalIds.map((evId) => { + const ev = evaluators.find((e) => e.id === evId); + const score = item.scores[evId]; + const justification = item.justifications[evId]; + return ( +
+
+ + {ev?.name ?? evId} + +
+
+
+
+ + {formatScore(score)} + +
+
+ {justification && ( + + )} +
+ ); + })} +
+ ); +} + +function IOTab({ item }: { item: EvalItemResult }) { + const inputJson = JSON.stringify(item.inputs, null, 2); + const outputJson = typeof item.output === "string" ? item.output : JSON.stringify(item.output, null, 2); + const expectedJson = item.expected_output != null + ? (typeof item.expected_output === "string" ? item.expected_output : JSON.stringify(item.expected_output, null, 2)) + : null; + + return ( +
+ + + + + {expectedJson && ( + + + + )} + + + {(item.duration_ms / 1000).toFixed(2)}s + + ) : undefined} + > + + +
+ ); +} + +/** Try to parse `expected="..." actual="..."` from justification text into structured blocks. */ +function parseExpectedActual(text: string): { expected: string; actual: string; meta: Record } | null { + const m = text.match(/expected="(.+?)"\s+actual="(.+?)"(.*)/s); + if (!m) return null; + // Parse trailing key=value pairs like matched_leaves=1.0 total_leaves=1.0 + const meta: Record = {}; + const rest = m[3]?.trim() ?? ""; + if (rest) { + for (const pair of rest.match(/(\w+)=([\S]+)/g) ?? []) { + const eq = pair.indexOf("="); + meta[pair.slice(0, eq)] = pair.slice(eq + 1); + } + } + return { expected: m[1], actual: m[2], meta }; +} + +function tryFormatValue(raw: string): string { + // Python dict repr → JSON: single quotes → double quotes, True/False/None → JSON equivalents + try { + const jsonLike = raw + .replace(/'/g, '"') + .replace(/\bTrue\b/g, "true") + .replace(/\bFalse\b/g, "false") + .replace(/\bNone\b/g, "null"); + const parsed = JSON.parse(jsonLike); + return JSON.stringify(parsed, null, 2); + } catch { + return raw; + } +} + +function JustificationBlock({ text }: { text: string }) { + const parsed = parseExpectedActual(text); + + if (!parsed) { + // Fallback: plain text + return ( +
+
+ {text} +
+
+ ); + } + + const expected = tryFormatValue(parsed.expected); + const actual = tryFormatValue(parsed.actual); + const match = expected === actual; + + return ( +
+
+ {/* Expected */} +
+
+ Expected +
+
+            {expected}
+          
+
+ {/* Actual */} +
+
+ + Actual + + +
+
+            {actual}
+          
+
+
+ {Object.keys(parsed.meta).length > 0 && ( +
+ {Object.entries(parsed.meta).map(([k, v]) => ( + + {k.replace(/_/g, " ")}{" "} + {v} + + ))} +
+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx new file mode 100644 index 0000000..099e234 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalSetDetail.tsx @@ -0,0 +1,483 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { getEvalSet, startEvalRun, updateEvalSetEvaluators } from "../../api/eval-client"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import type { EvalSetDetail as EvalSetDetailType, EvalItem } from "../../types/eval"; +import JsonHighlight from "../shared/JsonHighlight"; +import DataSection from "../shared/DataSection"; + +interface Props { + evalSetId: string; +} + +function truncateJson(val: unknown, max = 60): string { + const s = typeof val === "string" ? val : JSON.stringify(val); + if (!s || s === "null") return "-"; + return s.length > max ? s.slice(0, max) + "..." : s; +} + +type SidebarTab = "io" | "evaluators"; + +export default function EvalSetDetail({ evalSetId }: Props) { + const [detail, setDetail] = useState(null); + const [loading, setLoading] = useState(true); + const [selectedItemName, setSelectedItemName] = useState(null); + const [runLoading, setRunLoading] = useState(false); + const [sidebarTab, setSidebarTab] = useState("io"); + const evaluators = useEvalStore((s) => s.evaluators); + const localEvaluators = useEvalStore((s) => s.localEvaluators); + const storeUpdateEvaluators = useEvalStore((s) => s.updateEvalSetEvaluators); + const upsertEvalRun = useEvalStore((s) => s.upsertEvalRun); + const { navigate } = useHashRoute(); + + // Evaluator edit popover state + const [editOpen, setEditOpen] = useState(false); + const [editRefs, setEditRefs] = useState>(new Set()); + const [editSaving, setEditSaving] = useState(false); + const popoverRef = useRef(null); + + // Sidebar width (resizable) + const [sidebarWidth, setSidebarWidth] = useState(() => { + const saved = localStorage.getItem("evalSetSidebarWidth"); + return saved ? parseInt(saved, 10) : 320; + }); + const [isDragging, setIsDragging] = useState(false); + const outerRef = useRef(null); + + useEffect(() => { + localStorage.setItem("evalSetSidebarWidth", String(sidebarWidth)); + }, [sidebarWidth]); + + useEffect(() => { + setLoading(true); + setSelectedItemName(null); + getEvalSet(evalSetId) + .then((d) => { + setDetail(d); + if (d.items.length > 0) setSelectedItemName(d.items[0].name); + }) + .catch(console.error) + .finally(() => setLoading(false)); + }, [evalSetId]); + + const handleRun = async () => { + setRunLoading(true); + try { + const run = await startEvalRun(evalSetId); + upsertEvalRun(run); + navigate(`#/evals/runs/${run.id}`); + } catch (err) { + console.error(err); + } finally { + setRunLoading(false); + } + }; + + // --- Evaluator edit popover --- + const openEditPopover = useCallback(() => { + if (detail) { + setEditRefs(new Set(detail.evaluator_ids)); + } + setEditOpen(true); + }, [detail]); + + const toggleEditRef = (id: string) => { + setEditRefs((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + }; + + const handleSaveEvaluators = async () => { + if (!detail) return; + setEditSaving(true); + try { + const updated = await updateEvalSetEvaluators(evalSetId, Array.from(editRefs)); + setDetail(updated); + storeUpdateEvaluators(evalSetId, updated.evaluator_ids); + setEditOpen(false); + } catch (err) { + console.error(err); + } finally { + setEditSaving(false); + } + }; + + // Click-outside to close popover + useEffect(() => { + if (!editOpen) return; + const handler = (e: MouseEvent) => { + if (popoverRef.current && !popoverRef.current.contains(e.target as Node)) { + setEditOpen(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, [editOpen]); + + // --- Sidebar col resize --- + const onSidebarResizeStart = useCallback((e: React.MouseEvent | React.TouchEvent) => { + e.preventDefault(); + setIsDragging(true); + + const startX = "touches" in e ? e.touches[0].clientX : e.clientX; + const startW = sidebarWidth; + + const onMove = (ev: MouseEvent | TouchEvent) => { + const container = outerRef.current; + if (!container) return; + const clientX = "touches" in ev ? ev.touches[0].clientX : ev.clientX; + const maxW = container.clientWidth - 300; + const newW = Math.max(280, Math.min(maxW, startW + (startX - clientX))); + setSidebarWidth(newW); + }; + + const onUp = () => { + setIsDragging(false); + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + document.removeEventListener("touchmove", onMove); + document.removeEventListener("touchend", onUp); + document.body.style.cursor = ""; + document.body.style.userSelect = ""; + }; + + document.body.style.cursor = "col-resize"; + document.body.style.userSelect = "none"; + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); + document.addEventListener("touchmove", onMove, { passive: false }); + document.addEventListener("touchend", onUp); + }, [sidebarWidth]); + + if (loading) { + return ( +
+ Loading... +
+ ); + } + + if (!detail) { + return ( +
+ Eval set not found +
+ ); + } + + const selectedItem = detail.items.find((i) => i.name === selectedItemName) ?? null; + + return ( +
+ {/* Main content: header + item grid */} +
+ {/* Header bar */} +
+

+ {detail.name} +

+ + {detail.eval_count} items + +
+ + {detail.evaluator_ids.map((id) => { + const ev = evaluators.find((e) => e.id === id); + return ( + + {ev?.name ?? id} + + ); + })} + {editOpen && ( +
+
+ Evaluators +
+
+ {localEvaluators.length === 0 ? ( +
+ No evaluators available +
+ ) : ( + localEvaluators.map((ev) => ( + + )) + )} +
+
+ +
+
+ )} +
+ +
+ + {/* Table header */} +
+ Name + Input + Expected Behavior + Expected Output + Simulation Instr. +
+ + {/* Scrollable item rows */} +
+ {detail.items.map((item: EvalItem) => { + const isSelected = item.name === selectedItemName; + return ( + + ); + })} + {detail.items.length === 0 && ( +
+ No items in this eval set +
+ )} +
+
+ + {/* Sidebar drag handle */} +
+ + {/* Right sidebar */} +
+ {/* Tab bar */} +
+ {(["io", "evaluators"] as const).map((tab) => { + const active = sidebarTab === tab; + const label = tab === "io" ? "I/O" : "Evaluators"; + return ( + + ); + })} +
+ + {/* Content */} +
+ {selectedItem ? ( + sidebarTab === "io" ? ( + + ) : ( + + ) + ) : null} +
+
+
+ ); +} + +function ItemIOView({ item }: { item: EvalItem }) { + const inputJson = JSON.stringify(item.inputs, null, 2); + const expectedOutputJson = item.expected_output != null + ? (typeof item.expected_output === "string" ? item.expected_output : JSON.stringify(item.expected_output, null, 2)) + : null; + + return ( +
+ + + + + {item.expected_behavior && ( + +
+ {item.expected_behavior} +
+
+ )} + + {expectedOutputJson && ( + + + + )} + + {item.simulation_instructions && ( + +
+ {item.simulation_instructions} +
+
+ )} +
+ ); +} + +function ItemEvaluatorsView({ item, evaluators }: { item: EvalItem; evaluators: { id: string; name: string }[] }) { + return ( +
+ {item.evaluator_ids.length > 0 ? ( + <> + {item.evaluator_ids.map((evId) => { + const ev = evaluators.find((e) => e.id === evId); + const criteria = item.evaluation_criterias?.[evId]; + return ( +
+
+ + {ev?.name ?? evId} + + + {criteria ? "Custom criteria" : "Default criteria"} + +
+ {criteria && ( +
+                    {JSON.stringify(criteria, null, 2)}
+                  
+ )} +
+ ); + })} + + ) : ( +
+ No evaluators configured for this item +
+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx b/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx new file mode 100644 index 0000000..a0a8792 --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evals/EvalsSidebar.tsx @@ -0,0 +1,141 @@ +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; + +function formatScore(score: number | null): string { + if (score === null) return "-"; + return `${Math.round(score * 100)}%`; +} + +function scoreColor(score: number | null): string { + if (score === null) return "var(--text-muted)"; + const pct = score * 100; + if (pct >= 80) return "var(--success)"; + if (pct >= 50) return "var(--warning)"; + return "var(--error)"; +} + +const statusIcons: Record = { + pending: { color: "var(--text-muted)", label: "Pending" }, + running: { color: "var(--info)", label: "Running" }, + completed: { color: "var(--success)", label: "Completed" }, + failed: { color: "var(--error)", label: "Failed" }, +}; + +export default function EvalsSidebar() { + const evalSets = useEvalStore((s) => s.evalSets); + const evalRuns = useEvalStore((s) => s.evalRuns); + const { evalSetId, evalRunId, navigate } = useHashRoute(); + + const sets = Object.values(evalSets); + const runs = Object.values(evalRuns).sort( + (a, b) => new Date(b.start_time ?? 0).getTime() - new Date(a.start_time ?? 0).getTime(), + ); + + return ( +
+ {/* New Eval Set */} + + + {/* Eval Sets */} +
+ Eval Sets +
+ {sets.map((es) => { + const active = evalSetId === es.id; + return ( + + ); + })} + {sets.length === 0 && ( +

+ No eval sets yet +

+ )} + + {/* Run History */} +
+ History +
+ {runs.map((run) => { + const active = evalRunId === run.id; + const si = statusIcons[run.status] ?? statusIcons.pending; + return ( + + ); + })} + {runs.length === 0 && ( +

+ No eval runs yet +

+ )} +
+ ); +} diff --git a/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx b/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx new file mode 100644 index 0000000..9a4c76c --- /dev/null +++ b/src/uipath/dev/server/frontend/src/components/evaluators/CreateEvaluatorView.tsx @@ -0,0 +1,272 @@ +import { useEffect, useState } from "react"; +import { useEvalStore } from "../../store/useEvalStore"; +import { useHashRoute } from "../../hooks/useHashRoute"; +import { createLocalEvaluator } from "../../api/eval-client"; +import { typesByCategory, typeDefaults, getTypeFields, categoryLabel } from "./EvaluatorDetail"; + +const allCategories = ["deterministic", "llm", "tool"] as const; + +interface Props { + category: string; +} + +export default function CreateEvaluatorView({ category: initialCategory }: Props) { + const addLocalEvaluator = useEvalStore((s) => s.addLocalEvaluator); + const { navigate } = useHashRoute(); + + const isFixed = initialCategory !== "any"; + const [category, setCategory] = useState(isFixed ? initialCategory : "deterministic"); + const types = typesByCategory[category] ?? []; + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [typeId, setTypeId] = useState(types[0]?.id ?? ""); + const [targetOutputKey, setTargetOutputKey] = useState("*"); + const [prompt, setPrompt] = useState(""); + const [saving, setSaving] = useState(false); + const [error, setError] = useState(null); + const [descriptionTouched, setDescriptionTouched] = useState(false); + const [promptTouched, setPromptTouched] = useState(false); + + // Reset form when initial category prop changes + useEffect(() => { + const cat = isFixed ? initialCategory : "deterministic"; + setCategory(cat); + const t = typesByCategory[cat] ?? []; + const firstId = t[0]?.id ?? ""; + const defaults = typeDefaults[firstId]; + setName(""); + setDescription(defaults?.description ?? ""); + setTypeId(firstId); + setTargetOutputKey("*"); + setPrompt(defaults?.prompt ?? ""); + setError(null); + setDescriptionTouched(false); + setPromptTouched(false); + }, [initialCategory, isFixed]); + + const handleCategoryChange = (newCat: string) => { + setCategory(newCat); + const t = typesByCategory[newCat] ?? []; + const firstId = t[0]?.id ?? ""; + const defaults = typeDefaults[firstId]; + setTypeId(firstId); + if (!descriptionTouched) setDescription(defaults?.description ?? ""); + if (!promptTouched) setPrompt(defaults?.prompt ?? ""); + }; + + const handleTypeChange = (newTypeId: string) => { + setTypeId(newTypeId); + const defaults = typeDefaults[newTypeId]; + if (defaults) { + if (!descriptionTouched) setDescription(defaults.description); + if (!promptTouched) setPrompt(defaults.prompt); + } + }; + + const fields = getTypeFields(typeId); + + const handleSubmit = async () => { + if (!name.trim()) { + setError("Name is required"); + return; + } + setSaving(true); + setError(null); + try { + const config: Record = {}; + if (fields.targetOutputKey) config.targetOutputKey = targetOutputKey; + if (fields.prompt && prompt.trim()) config.prompt = prompt; + + const result = await createLocalEvaluator({ + name: name.trim(), + description: description.trim(), + evaluator_type_id: typeId, + config, + }); + addLocalEvaluator(result); + navigate("#/evaluators"); + } catch (err: unknown) { + const detail = (err as { detail?: string })?.detail; + setError(detail ?? "Failed to create evaluator"); + } finally { + setSaving(false); + } + }; + + const inputStyle = { + background: "var(--bg-secondary)", + border: "1px solid var(--border)", + color: "var(--text-primary)", + }; + + return ( +
+
+
+ {/* Header */} +
+
+
+ + New Evaluator + +
+

+ Create an evaluator to score agent outputs +

+
+ + {/* Name */} +
+ + setName(e.target.value)} + placeholder="e.g. MyEvaluator" + className="w-full rounded-md px-3 py-2 text-xs" + style={inputStyle} + onKeyDown={(e) => { + if (e.key === "Enter" && name.trim()) handleSubmit(); + }} + /> +
+ + {/* Category */} +
+ + {isFixed ? ( +
+ {categoryLabel[category] ?? category} +
+ ) : ( + + )} +
+ + {/* Type */} +
+ + +
+ + {/* Description */} +
+ +