From 25517b0e7229912a79e8f8707a1629657891b03d Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 10:25:49 +0100 Subject: [PATCH 1/6] Add KCP manifest, TL;DR files, and benchmark results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - knowledge.yaml: 16-unit KCP v0.3 manifest with triggers, hints, and summary cross-links - docs/en/getting-started-tldr.mdx: architecture summary + minimal crew example - docs/en/concepts/agents-tasks-tldr.mdx: agent/task quick reference - docs/en/concepts/flows-tldr.mdx: Flow decorators and state management reference - docs/en/concepts/tools-memory-tldr.mdx: tools, memory, and knowledge/RAG reference - benchmark.py: reproducible tool-call efficiency benchmark (baseline vs KCP) - BENCHMARK.md: results — 76% reduction (123 → 30 tool calls across 8 queries) Co-Authored-By: Claude Sonnet 4.6 --- BENCHMARK.md | 36 +++ benchmark.py | 146 +++++++++++ docs/en/concepts/agents-tasks-tldr.mdx | 80 ++++++ docs/en/concepts/flows-tldr.mdx | 102 ++++++++ docs/en/concepts/tools-memory-tldr.mdx | 99 +++++++ docs/en/getting-started-tldr.mdx | 88 +++++++ knowledge.yaml | 342 +++++++++++++++++++++++++ 7 files changed, 893 insertions(+) create mode 100644 BENCHMARK.md create mode 100644 benchmark.py create mode 100644 docs/en/concepts/agents-tasks-tldr.mdx create mode 100644 docs/en/concepts/flows-tldr.mdx create mode 100644 docs/en/concepts/tools-memory-tldr.mdx create mode 100644 docs/en/getting-started-tldr.mdx create mode 100644 knowledge.yaml diff --git a/BENCHMARK.md b/BENCHMARK.md new file mode 100644 index 0000000000..1633f6f71b --- /dev/null +++ b/BENCHMARK.md @@ -0,0 +1,36 @@ +# KCP Benchmark Results — CrewAI + +## Summary + +**76% reduction in tool calls** when using the Knowledge Context Protocol (KCP) manifest compared to unguided repository exploration. + +- Baseline total: **123 tool calls** +- KCP total: **30 tool calls** +- Saved: **93 tool calls** across 8 queries + +## Results Table + +| Query | Baseline | KCP | Saved | +| :---- | -------: | --: | ----: | +| What is the difference between Flows and Crews in CrewAI? | 14 | 2 | 12 | +| How do I create my first agent and assign it a task? | 7 | 3 | 4 | +| How do I create a custom tool for my agent? | 8 | 3 | 5 | +| How do I add memory to my crew? | 7 | 3 | 4 | +| Which LLM providers does CrewAI support? | 17 | 5 | 12 | +| How do I build a flow that triggers a crew? | 15 | 2 | 13 | +| How do I implement a hierarchical crew with a manager agent? | 22 | 9 | 13 | +| How do I add knowledge (RAG) to my crew? | 33 | 3 | 30 | +| **TOTAL** | **123** | **30** | **93** | + +## Methodology + +Each query was run twice against the CrewAI repository (`/src/totto/crewAI`): + +1. **Baseline**: The agent was told the repository path and instructed to explore it freely using `read_file`, `glob_files`, and `grep_content` tools to find the answer. +2. **KCP**: The agent was instructed to first read `knowledge.yaml`, match the query against unit triggers, and read only the files pointed to by matching units — preferring TL;DR summary files when available. + +Both runs used `claude-haiku-4-5-20251001` with `max_tokens=2048` and up to 20 turns. Tool call counts measure retrieval efficiency only (not answer quality). + +## Findings + +The KCP manifest delivered a **76% reduction in tool calls**, with the largest gains on broad or unfamiliar queries. The "knowledge (RAG)" query showed the most dramatic improvement (33 → 3 calls, 91% reduction): without KCP the agent recursively explored the docs directory; with KCP it read `knowledge.yaml`, matched the `rag crew` trigger directly to `tools-memory-tldr.mdx`, and answered immediately. The hierarchical crew query had the smallest relative gain (22 → 9), because the answer required reading the full `crews.mdx` and `tasks.mdx` even with guidance — demonstrating that KCP eliminates exploration overhead but cannot shrink inherently large source files. diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000000..c1c6a072ce --- /dev/null +++ b/benchmark.py @@ -0,0 +1,146 @@ +import anthropic +import os +import glob as glob_module +import subprocess + +client = anthropic.Anthropic() + +TOOLS = [ + { + "name": "read_file", + "description": "Read the content of a file", + "input_schema": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"] + } + }, + { + "name": "glob_files", + "description": "Find files matching a pattern", + "input_schema": { + "type": "object", + "properties": { + "pattern": {"type": "string"}, + "base_dir": {"type": "string"} + }, + "required": ["pattern"] + } + }, + { + "name": "grep_content", + "description": "Search for text in files", + "input_schema": { + "type": "object", + "properties": { + "pattern": {"type": "string"}, + "path": {"type": "string"} + }, + "required": ["pattern", "path"] + } + } +] + +def execute_tool(tool_name, tool_input): + if tool_name == "read_file": + path = tool_input["path"] + try: + with open(path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + if len(content) > 8000: + content = content[:8000] + "\n...[truncated]" + return content + except Exception as e: + return f"Error: {e}" + elif tool_name == "glob_files": + pattern = tool_input["pattern"] + base = tool_input.get("base_dir", "/src/totto/crewAI") + if not pattern.startswith("/"): + pattern = os.path.join(base, pattern) + matches = glob_module.glob(pattern, recursive=True) + return "\n".join(matches[:20]) if matches else "No files found" + elif tool_name == "grep_content": + pattern = tool_input["pattern"] + path = tool_input["path"] + try: + result = subprocess.run( + ["grep", "-r", "-l", "-m", "5", pattern, path], + capture_output=True, text=True, timeout=10 + ) + return result.stdout[:2000] if result.stdout else "No matches" + except Exception as e: + return f"Error: {e}" + return "Unknown tool" + +def run_agent(system_prompt, query, max_turns=20): + messages = [{"role": "user", "content": query}] + tool_count = 0 + for _ in range(max_turns): + response = client.messages.create( + model="claude-haiku-4-5-20251001", + max_tokens=2048, + system=system_prompt, + tools=TOOLS, + messages=messages + ) + tool_uses = [b for b in response.content if b.type == "tool_use"] + tool_count += len(tool_uses) + if response.stop_reason == "end_turn" or not tool_uses: + return "", tool_count + messages.append({"role": "assistant", "content": response.content}) + tool_results = [] + for tool_use in tool_uses: + result = execute_tool(tool_use.name, tool_use.input) + tool_results.append({ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": result + }) + messages.append({"role": "user", "content": tool_results}) + return "", tool_count + +BASELINE_PROMPT = """You are a helpful assistant answering questions about the CrewAI framework. +The repository is at /src/totto/crewAI. +Use the available tools to read files and find the answer. +Start by exploring the repository structure to understand where to find information.""" + +KCP_PROMPT = """You are a helpful assistant answering questions about the CrewAI framework. +The repository is at /src/totto/crewAI. +IMPORTANT: First read /src/totto/crewAI/knowledge.yaml to understand the repository structure. +Match the question to the triggers in knowledge.yaml and read only the files pointed to by matching units. +If a unit has summary_available: true, read the summary_unit file first (it's much smaller).""" + +QUERIES = [ + "What is the difference between Flows and Crews in CrewAI?", + "How do I create my first agent and assign it a task?", + "How do I create a custom tool for my agent?", + "How do I add memory to my crew?", + "Which LLM providers does CrewAI support?", + "How do I build a flow that triggers a crew?", + "How do I implement a hierarchical crew with a manager agent?", + "How do I add knowledge (RAG) to my crew?", +] + +if __name__ == "__main__": + print("CrewAI KCP Benchmark") + print("=" * 60) + results = [] + for i, query in enumerate(QUERIES): + print(f"\nQuery {i+1}: {query[:60]}...") + _, baseline = run_agent(BASELINE_PROMPT, query) + print(f" Baseline: {baseline} tool calls") + _, kcp = run_agent(KCP_PROMPT, query) + print(f" KCP: {kcp} tool calls") + results.append((query, baseline, kcp)) + + print("\n" + "=" * 60) + total_baseline = sum(r[1] for r in results) + total_kcp = sum(r[2] for r in results) + print(f"\n{'Query':<55} {'Base':>5} {'KCP':>5} {'Saved':>6}") + print("-" * 75) + for query, b, k in results: + print(f"{query[:55]:<55} {b:>5} {k:>5} {b-k:>6}") + print("-" * 75) + print(f"{'TOTAL':<55} {total_baseline:>5} {total_kcp:>5} {total_baseline-total_kcp:>6}") + pct = round((1 - total_kcp/total_baseline) * 100) if total_baseline > 0 else 0 + print(f"\nReduction: {pct}% fewer tool calls with KCP") diff --git a/docs/en/concepts/agents-tasks-tldr.mdx b/docs/en/concepts/agents-tasks-tldr.mdx new file mode 100644 index 0000000000..c3b40f95d0 --- /dev/null +++ b/docs/en/concepts/agents-tasks-tldr.mdx @@ -0,0 +1,80 @@ +--- +title: Agents & Tasks (TL;DR) +description: The 5 key agent attributes and how to define tasks — quick reference +icon: robot +--- + +## Agent: The 5 Key Attributes + +An `Agent` is an autonomous unit with a role, a goal, and the tools to get things done. + +| Attribute | Parameter | What it does | +| :--- | :--- | :--- | +| **Role** | `role` | Defines the agent's function and expertise | +| **Goal** | `goal` | The individual objective guiding decisions | +| **Backstory** | `backstory` | Context and personality enriching interactions | +| **Tools** | `tools` | List of capabilities the agent can use (default: `[]`) | +| **LLM** | `llm` | The language model powering the agent (default: `gpt-4o`) | + +### Minimal Agent Example + +```python +from crewai import Agent + +researcher = Agent( + role="Research Analyst", + goal="Find accurate, up-to-date information on any topic", + backstory="An expert at gathering data from multiple sources and identifying key insights.", + tools=[], # add tools here, e.g. SerperDevTool() + verbose=True, # enable logs for debugging +) +``` + +## Task: Description + Expected Output + Agent + +A `Task` is a specific assignment given to an agent. Two fields are required; agent assignment is strongly recommended. + +| Attribute | Parameter | What it does | +| :--- | :--- | :--- | +| **Description** | `description` | What the agent must do | +| **Expected Output** | `expected_output` | What a successful completion looks like | +| **Agent** | `agent` | Which agent handles this task | +| **Context** | `context` | Other tasks whose outputs feed into this one | +| **Output File** | `output_file` | Save output to a file path | + +### Minimal Task Example + +```python +from crewai import Task + +research_task = Task( + description="Research the top 5 AI frameworks released in 2025 and summarize their key features.", + expected_output="A markdown list of 5 frameworks with name, key feature, and one-sentence summary.", + agent=researcher, +) +``` + +## Process Types: How Tasks Are Executed + +```python +from crewai import Crew, Process + +# Sequential (default): tasks run in order, output feeds into the next +crew = Crew(agents=[...], tasks=[...], process=Process.sequential) + +# Hierarchical: a manager LLM assigns tasks based on agent capabilities +crew = Crew( + agents=[...], + tasks=[...], + process=Process.hierarchical, + manager_llm="gpt-4o", # required for hierarchical +) +``` + +**Sequential** — use when tasks have a clear order and each builds on the previous. +**Hierarchical** — use when tasks should be dynamically assigned by a manager agent. + +## Full Reference + +- All agent attributes: [concepts/agents.mdx](/en/concepts/agents) +- All task attributes: [concepts/tasks.mdx](/en/concepts/tasks) diff --git a/docs/en/concepts/flows-tldr.mdx b/docs/en/concepts/flows-tldr.mdx new file mode 100644 index 0000000000..804fab83eb --- /dev/null +++ b/docs/en/concepts/flows-tldr.mdx @@ -0,0 +1,102 @@ +--- +title: Flows (TL;DR) +description: Event-driven workflows with state management — quick reference +icon: arrow-progress +--- + +## What is a Flow? + +A Flow is the control plane of your CrewAI application. It chains tasks together with state, conditional logic, and event-driven triggers. Crews run *inside* Flow steps when you need autonomous agent intelligence. + +## Key Decorators + +| Decorator | Purpose | +| :--- | :--- | +| `@start()` | Entry point — runs when `flow.kickoff()` is called | +| `@listen(method)` | Runs after the specified method completes, receives its output | +| `@router(method)` | Routes execution to different branches based on return value | +| `@and_(a, b)` | Runs only after **both** `a` and `b` complete | +| `@or_(a, b)` | Runs when **either** `a` or `b` completes | + +## Minimal Flow + Crew Example + +```python +from crewai import Agent, Task, Crew +from crewai.flow.flow import Flow, start, listen + +class ResearchFlow(Flow): + # State is a dict accessible as self.state throughout the flow + model = "gpt-4o-mini" + + @start() + def get_topic(self): + # Set initial state + self.state["topic"] = "AI agent frameworks" + return self.state["topic"] + + @listen(get_topic) + def run_research_crew(self, topic): + # Spin up a Crew inside a Flow step + researcher = Agent( + role="Research Analyst", + goal=f"Research {topic} thoroughly", + backstory="Expert at finding and synthesizing information.", + ) + task = Task( + description=f"Research the latest developments in {topic}.", + expected_output="A 3-bullet summary of key findings.", + agent=researcher, + ) + crew = Crew(agents=[researcher], tasks=[task], verbose=False) + result = crew.kickoff() + self.state["research"] = str(result) + return str(result) + + @listen(run_research_crew) + def save_result(self, research): + # Flow step: save to file (plain Python, no agent needed) + with open("research_output.txt", "w") as f: + f.write(research) + print("Saved!") + return research + + +flow = ResearchFlow() +result = flow.kickoff() +``` + +## Routing Example + +```python +from crewai.flow.flow import Flow, start, listen, router + +class BranchingFlow(Flow): + @start() + def check_input(self): + return "short" # or "long" + + @router(check_input) + def route_by_length(self, result): + if result == "short": + return "handle_short" + return "handle_long" + + @listen("handle_short") + def short_path(self): + return "Quick answer" + + @listen("handle_long") + def long_path(self): + return "Detailed analysis" +``` + +## State Management + +- `self.state` is a dict persisted across all steps in the flow +- Every flow instance gets a unique UUID at `self.state["id"]` +- State is accessible in every `@start`, `@listen`, and `@router` method + +## Full Reference + +- Complete Flows docs: [concepts/flows.mdx](/en/concepts/flows) +- Step-by-step tutorial: [guides/flows/first-flow.mdx](/en/guides/flows/first-flow) diff --git a/docs/en/concepts/tools-memory-tldr.mdx b/docs/en/concepts/tools-memory-tldr.mdx new file mode 100644 index 0000000000..f840fa0ede --- /dev/null +++ b/docs/en/concepts/tools-memory-tldr.mdx @@ -0,0 +1,99 @@ +--- +title: Tools, Memory & Knowledge (TL;DR) +description: Giving agents capabilities, persistence, and domain knowledge — quick reference +icon: screwdriver-wrench +--- + +## Tools: Built-In Options + +Install: `pip install 'crewai[tools]'` + +Key built-in tools from `crewai_tools`: + +| Tool | What it does | +| :--- | :--- | +| `SerperDevTool` | Web search via Serper.dev API | +| `WebsiteSearchTool` | RAG search over a website | +| `ScrapeWebsiteTool` | Scrape full web page content | +| `FileReadTool` | Read local files (any format) | +| `DirectoryReadTool` | Read a directory structure | +| `CodeInterpreterTool` | Execute Python code | +| `PDFSearchTool` | RAG search within PDF documents | +| `CSVSearchTool` | RAG search within CSV files | +| `GithubSearchTool` | RAG search within GitHub repositories | +| `CodeDocsSearchTool` | RAG search over code documentation | + +## Creating a Custom Tool + +```python +from crewai.tools import tool + +@tool("Search Internal Database") +def search_db(query: str) -> str: + """Search our internal database for relevant records. Use this when + the user asks about internal company data.""" + # Your logic here + return f"Results for: {query}" + +# Attach to an agent +agent = Agent( + role="Data Analyst", + goal="Answer questions from internal data", + backstory="Expert at querying internal systems.", + tools=[search_db], +) +``` + +## Memory: Persistent Agent Knowledge + +CrewAI uses a unified `Memory` class. Enable it on a Crew with `memory=True`: + +```python +from crewai import Crew, Memory + +# Simple: use defaults +crew = Crew(agents=[...], tasks=[...], memory=True) + +# Custom: tune recency vs. semantic importance +memory = Memory(recency_weight=0.4, recency_half_life_days=14) +crew = Crew(agents=[...], tasks=[...], memory=memory) +``` + +Memory types (handled internally by the unified Memory class): +- **Short-term** — within a single crew run (context window) +- **Long-term** — persisted across runs (vector DB) +- **Entity** — structured facts about people, places, things +- **User** — per-user preferences and history + +## Knowledge Sources: RAG for Your Crew + +Knowledge gives agents access to external documents (PDFs, text, URLs) via RAG. + +```python +from crewai import Agent, Task, Crew, Process +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource +from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource + +# Text knowledge +text_source = StringKnowledgeSource( + content="Our refund policy: full refund within 30 days of purchase." +) + +# PDF knowledge (place files in ./knowledge/ directory) +pdf_source = PDFKnowledgeSource(file_paths=["company_handbook.pdf"]) + +crew = Crew( + agents=[agent], + tasks=[task], + process=Process.sequential, + knowledge_sources=[text_source, pdf_source], # attach here +) +``` + +Supported source types: `StringKnowledgeSource`, `PDFKnowledgeSource`, `CSVKnowledgeSource`, `CrewDoclingSource` (URLs + HTML). + +## Full Reference + +- Tools: [concepts/tools.mdx](/en/concepts/tools) +- Memory: [concepts/memory.mdx](/en/concepts/memory) +- Knowledge/RAG: [concepts/knowledge.mdx](/en/concepts/knowledge) diff --git a/docs/en/getting-started-tldr.mdx b/docs/en/getting-started-tldr.mdx new file mode 100644 index 0000000000..926146b8a3 --- /dev/null +++ b/docs/en/getting-started-tldr.mdx @@ -0,0 +1,88 @@ +--- +title: Getting Started (TL;DR) +description: What CrewAI is and how to build your first crew in under 5 minutes +icon: bolt +--- + +## What is CrewAI? + +CrewAI is a framework for orchestrating autonomous AI agents using two complementary primitives: + +- **Flows** — event-driven workflows that manage state and control execution (the "backbone" of your app) +- **Crews** — teams of role-playing agents that collaborate on complex tasks (the "intelligence" within a Flow) + +For any production app, start with a Flow and use Crews inside it. + +## When to Use Flows vs. Crews + +| Use Case | Architecture | +| :--- | :--- | +| Simple automation / scripting | Single Flow with Python tasks | +| Complex research or content | Flow managing state → Crew performing research | +| Application backend | Flow handling requests → Crew generating content → Flow saving to DB | + +**Rule of thumb:** Flow = control plane. Crew = execution unit. Use both. + +## Installation + +```bash +pip install crewai +# With built-in tools (web search, file read, etc.) +pip install 'crewai[tools]' +``` + +Set your LLM API key: + +```bash +export OPENAI_API_KEY="your-key-here" +``` + +## Minimal Working Crew + +```python +from crewai import Agent, Task, Crew, Process + +# 1. Define agents with role, goal, backstory +researcher = Agent( + role="Research Analyst", + goal="Find the latest information on a given topic", + backstory="Expert at gathering and synthesizing information from multiple sources.", +) + +writer = Agent( + role="Content Writer", + goal="Write clear, engaging summaries", + backstory="Skilled communicator who turns research into readable content.", +) + +# 2. Define tasks with description and expected output +research_task = Task( + description="Research the top 3 trends in AI agents for 2025.", + expected_output="A bullet-point list of 3 trends with one-sentence explanations.", + agent=researcher, +) + +write_task = Task( + description="Write a 2-paragraph summary based on the research.", + expected_output="A concise 2-paragraph summary in plain English.", + agent=writer, +) + +# 3. Assemble the crew +crew = Crew( + agents=[researcher, writer], + tasks=[research_task, write_task], + process=Process.sequential, + verbose=True, +) + +# 4. Run it +result = crew.kickoff() +print(result) +``` + +## Next Steps + +- Full tutorial: [Build Your First Crew](/en/guides/crews/first-crew) +- In-depth quickstart: [Quickstart](/en/quickstart) +- Understanding the architecture: [Introduction](/en/introduction) diff --git a/knowledge.yaml b/knowledge.yaml new file mode 100644 index 0000000000..8445b1847d --- /dev/null +++ b/knowledge.yaml @@ -0,0 +1,342 @@ +kcp_version: "0.3" +id: crewai +description: "Framework for orchestrating role-playing, autonomous AI agents using Flows (control) and Crews (intelligence)" +entry_points: + primary: docs/en/introduction.mdx + architecture: docs/en/introduction.mdx + +units: + # ── Eager: load immediately ────────────────────────────────────────────────── + + - id: overview + path: docs/en/introduction.mdx + kind: architecture + intent: "Understand what CrewAI is, how Flows and Crews work together, and when to use each" + scope: overview + audience: [beginner, developer] + triggers: + - what is crewai + - flows vs crews + - when to use flows + - when to use crews + - crewai architecture + - overview + hints: + token_estimate: 1200 + load_strategy: eager + priority: high + + - id: getting-started-tldr + path: docs/en/getting-started-tldr.mdx + kind: tutorial + intent: "Get up and running with CrewAI — installation, first crew, and architecture summary" + scope: overview + audience: [beginner, developer] + triggers: + - what is crewai + - getting started + - install crewai + - pip install crewai + - first agent + - first crew + - hello world + - quickstart + - how to start + - minimal example + hints: + token_estimate: 600 + load_strategy: eager + priority: high + summary_of: quickstart + + # ── Core concepts: lazy ─────────────────────────────────────────────────────── + + - id: quickstart + path: docs/en/quickstart.mdx + kind: tutorial + intent: "Full step-by-step quickstart guide for creating your first CrewAI project" + scope: comprehensive + audience: [beginner, developer] + triggers: + - quickstart guide + - full quickstart + - step by step first crew + hints: + token_estimate: 4000 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: getting-started-tldr + + - id: agents-tasks-tldr + path: docs/en/concepts/agents-tasks-tldr.mdx + kind: reference + intent: "Quick reference for agent attributes (role/goal/backstory/tools/llm) and task structure" + scope: focused + audience: [developer] + triggers: + - create an agent + - agent role goal backstory + - agent attributes + - create a task + - task description + - expected output + - assign task to agent + - how to define an agent + - how to define a task + - sequential process + - hierarchical process + hints: + token_estimate: 700 + load_strategy: lazy + priority: high + summary_of: agents + + - id: agents + path: docs/en/concepts/agents.mdx + kind: reference + intent: "Complete agent attribute reference, YAML configuration, and advanced agent patterns" + scope: comprehensive + audience: [developer, advanced] + triggers: + - full agent reference + - all agent attributes + - agent yaml configuration + - agent delegation + - agent memory + - agent reasoning + - agent code execution + hints: + token_estimate: 8000 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: agents-tasks-tldr + + - id: tasks + path: docs/en/concepts/tasks.mdx + kind: reference + intent: "Complete task attribute reference, context chaining, async execution, and output options" + scope: comprehensive + audience: [developer, advanced] + triggers: + - full task reference + - all task attributes + - task context + - task chaining + - async task + - task output file + - human input task + hints: + token_estimate: 11000 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: agents-tasks-tldr + + - id: crews + path: docs/en/concepts/crews.mdx + kind: reference + intent: "Crew configuration, process types, delegation, callbacks, and orchestration patterns" + scope: comprehensive + audience: [developer, advanced] + triggers: + - crew configuration + - how to assemble a crew + - crew attributes + - crew kickoff + - manager agent + - crew callbacks + - hierarchical crew + - delegation + hints: + token_estimate: 6000 + load_strategy: lazy + priority: medium + + - id: flows-tldr + path: docs/en/concepts/flows-tldr.mdx + kind: reference + intent: "Quick reference for Flow decorators (@start, @listen, @router) and state management" + scope: focused + audience: [developer] + triggers: + - flow decorators + - listen start router + - state management flow + - event driven workflow + - how to create a flow + - flow anatomy + - and_ or_ decorators + - flow state + hints: + token_estimate: 700 + load_strategy: lazy + priority: high + summary_of: flows + + - id: flows + path: docs/en/concepts/flows.mdx + kind: reference + intent: "Complete Flows reference: decorators, state, routing, async, flow-crew integration" + scope: comprehensive + audience: [developer, advanced] + triggers: + - full flows reference + - all flow decorators + - flow state management deep dive + - flow async + - flow persistence + - flow visualization + - flow plot + hints: + token_estimate: 10000 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: flows-tldr + + - id: tools-memory-tldr + path: docs/en/concepts/tools-memory-tldr.mdx + kind: reference + intent: "Quick reference for built-in tools, custom @tool decorator, memory types, and knowledge/RAG" + scope: focused + audience: [developer] + triggers: + - custom tool + - tool decorator + - built-in tools + - web search tool + - available tools list + - memory + - short term memory + - long term memory + - entity memory + - user memory + - knowledge source + - rag crew + - pdf knowledge + - add documents to crew + hints: + token_estimate: 700 + load_strategy: lazy + priority: high + summary_of: tools + + - id: tools + path: docs/en/concepts/tools.mdx + kind: reference + intent: "Complete tools reference: all built-in tools, custom tool creation, caching, async tools" + scope: comprehensive + audience: [developer, advanced] + triggers: + - full tools reference + - all crewai tools + - custom tool class + - BaseTool + - tool caching + - async tool + - langchain tool + hints: + token_estimate: 3500 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: tools-memory-tldr + + - id: memory + path: docs/en/concepts/memory.mdx + kind: reference + intent: "Memory system: unified Memory class, usage with Crews/Agents/Flows, storage backends" + scope: comprehensive + audience: [developer, advanced] + triggers: + - full memory reference + - memory class + - memory storage + - memory backends + - standalone memory + - memory in flow + - memory reset + - memory recall + hints: + token_estimate: 8000 + load_strategy: lazy + priority: medium + summary_available: true + summary_unit: tools-memory-tldr + + - id: llms + path: docs/en/concepts/llms.mdx + kind: reference + intent: "LLM provider configuration for all 60+ supported models (OpenAI, Anthropic, Ollama, Azure, etc.)" + scope: comprehensive + audience: [developer, advanced] + triggers: + - which llm + - llm providers + - openai + - anthropic claude + - ollama local model + - azure openai + - model configuration + - llm setup + - supported models + - change model + - groq + - gemini + - bedrock + hints: + token_estimate: 17000 + load_strategy: lazy + priority: medium + + - id: first-crew-guide + path: docs/en/guides/crews/first-crew.mdx + kind: tutorial + intent: "End-to-end tutorial: create a research crew with YAML config, agents, tasks, and crew setup" + scope: comprehensive + audience: [beginner, developer] + triggers: + - build first crew tutorial + - crew tutorial step by step + - research crew example + - crewai create crew cli + hints: + token_estimate: 4000 + load_strategy: lazy + priority: medium + + - id: first-flow-guide + path: docs/en/guides/flows/first-flow.mdx + kind: tutorial + intent: "End-to-end tutorial for building your first Flow with state, listeners, and crew integration" + scope: comprehensive + audience: [beginner, developer] + triggers: + - build first flow tutorial + - flow tutorial step by step + - flow with crew example + - how to build a flow + hints: + token_estimate: 6000 + load_strategy: lazy + priority: medium + + - id: mcp + path: docs/en/mcp/overview.mdx + kind: guide + intent: "Model Context Protocol (MCP) integration: connect MCP servers as tool sources for agents" + scope: comprehensive + audience: [developer, advanced] + triggers: + - mcp + - model context protocol + - mcp server + - mcp tools + - mcp integration + - connect external tools via mcp + hints: + token_estimate: 6000 + load_strategy: lazy + priority: low From c8262242d05cd881e416b9d10eaae9ba85a0dd82 Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 10:45:50 +0100 Subject: [PATCH 2/6] fix: use REPO_ROOT derived from __file__ instead of hardcoded path Replace hardcoded /src/totto/crewAI paths in benchmark.py with a REPO_ROOT variable derived from os.path.dirname(os.path.abspath(__file__)). This makes the benchmark script reproducible on any machine. Co-Authored-By: Claude Sonnet 4.6 --- benchmark.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/benchmark.py b/benchmark.py index c1c6a072ce..9307a3dc9c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -5,6 +5,8 @@ client = anthropic.Anthropic() +REPO_ROOT = os.path.dirname(os.path.abspath(__file__)) + TOOLS = [ { "name": "read_file", @@ -54,7 +56,7 @@ def execute_tool(tool_name, tool_input): return f"Error: {e}" elif tool_name == "glob_files": pattern = tool_input["pattern"] - base = tool_input.get("base_dir", "/src/totto/crewAI") + base = tool_input.get("base_dir", REPO_ROOT) if not pattern.startswith("/"): pattern = os.path.join(base, pattern) matches = glob_module.glob(pattern, recursive=True) @@ -99,14 +101,14 @@ def run_agent(system_prompt, query, max_turns=20): messages.append({"role": "user", "content": tool_results}) return "", tool_count -BASELINE_PROMPT = """You are a helpful assistant answering questions about the CrewAI framework. -The repository is at /src/totto/crewAI. +BASELINE_PROMPT = f"""You are a helpful assistant answering questions about the CrewAI framework. +The repository is at {REPO_ROOT}. Use the available tools to read files and find the answer. Start by exploring the repository structure to understand where to find information.""" -KCP_PROMPT = """You are a helpful assistant answering questions about the CrewAI framework. -The repository is at /src/totto/crewAI. -IMPORTANT: First read /src/totto/crewAI/knowledge.yaml to understand the repository structure. +KCP_PROMPT = f"""You are a helpful assistant answering questions about the CrewAI framework. +The repository is at {REPO_ROOT}. +IMPORTANT: First read {REPO_ROOT}/knowledge.yaml to understand the repository structure. Match the question to the triggers in knowledge.yaml and read only the files pointed to by matching units. If a unit has summary_available: true, read the summary_unit file first (it's much smaller).""" From 28f79d16b8d0c921aee534a209163ff936efac17 Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 11:23:15 +0100 Subject: [PATCH 3/6] fix: remove hardcoded path from BENCHMARK.md; restrict file access to REPO_ROOT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BENCHMARK.md methodology section referenced /src/totto/crewAI — replaced with generic description. benchmark.py read_file and grep_content handlers now validate that requested paths fall within REPO_ROOT, preventing the agent from reading files outside the repository. Co-Authored-By: Claude Sonnet 4.6 --- BENCHMARK.md | 2 +- benchmark.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/BENCHMARK.md b/BENCHMARK.md index 1633f6f71b..6b77499bed 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -24,7 +24,7 @@ ## Methodology -Each query was run twice against the CrewAI repository (`/src/totto/crewAI`): +Each query was run twice against a local clone of the CrewAI repository: 1. **Baseline**: The agent was told the repository path and instructed to explore it freely using `read_file`, `glob_files`, and `grep_content` tools to find the answer. 2. **KCP**: The agent was instructed to first read `knowledge.yaml`, match the query against unit triggers, and read only the files pointed to by matching units — preferring TL;DR summary files when available. diff --git a/benchmark.py b/benchmark.py index 9307a3dc9c..661d5bf58c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -46,6 +46,8 @@ def execute_tool(tool_name, tool_input): if tool_name == "read_file": path = tool_input["path"] + if not os.path.realpath(path).startswith(os.path.realpath(REPO_ROOT)): + return "Error: access denied — path is outside the repository" try: with open(path, 'r', encoding='utf-8', errors='replace') as f: content = f.read() @@ -64,6 +66,8 @@ def execute_tool(tool_name, tool_input): elif tool_name == "grep_content": pattern = tool_input["pattern"] path = tool_input["path"] + if not os.path.realpath(path).startswith(os.path.realpath(REPO_ROOT)): + return "Error: access denied — path is outside the repository" try: result = subprocess.run( ["grep", "-r", "-l", "-m", "5", pattern, path], From 04df41f88551cd1e282a552d1781c0bbefcccf18 Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 11:37:27 +0100 Subject: [PATCH 4/6] =?UTF-8?q?fix:=20harden=20path=20validation=20?= =?UTF-8?q?=E2=80=94=20use=20pathlib.relative=5Fto=20and=20restrict=20glob?= =?UTF-8?q?=5Ffiles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous startswith() check was vulnerable to path traversal (/repo-name-suffix/ would pass /repo-name prefix). Replace with pathlib.Path.relative_to() which correctly checks containment. Also restricts glob_files to REPO_ROOT by filtering all results through the same _within_repo() helper — the handler previously had no path restriction at all. Co-Authored-By: Claude Sonnet 4.6 --- benchmark.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/benchmark.py b/benchmark.py index 661d5bf58c..403bf8b2c1 100644 --- a/benchmark.py +++ b/benchmark.py @@ -2,10 +2,21 @@ import os import glob as glob_module import subprocess +from pathlib import Path client = anthropic.Anthropic() REPO_ROOT = os.path.dirname(os.path.abspath(__file__)) +_REPO_ROOT_REAL = Path(os.path.realpath(REPO_ROOT)) + + +def _within_repo(path: str) -> bool: + """Return True if path resolves to a location inside REPO_ROOT.""" + try: + Path(os.path.realpath(path)).relative_to(_REPO_ROOT_REAL) + return True + except (ValueError, OSError): + return False TOOLS = [ { @@ -46,7 +57,7 @@ def execute_tool(tool_name, tool_input): if tool_name == "read_file": path = tool_input["path"] - if not os.path.realpath(path).startswith(os.path.realpath(REPO_ROOT)): + if not _within_repo(path): return "Error: access denied — path is outside the repository" try: with open(path, 'r', encoding='utf-8', errors='replace') as f: @@ -58,15 +69,14 @@ def execute_tool(tool_name, tool_input): return f"Error: {e}" elif tool_name == "glob_files": pattern = tool_input["pattern"] - base = tool_input.get("base_dir", REPO_ROOT) if not pattern.startswith("/"): - pattern = os.path.join(base, pattern) - matches = glob_module.glob(pattern, recursive=True) + pattern = os.path.join(REPO_ROOT, pattern) + matches = [m for m in glob_module.glob(pattern, recursive=True) if _within_repo(m)] return "\n".join(matches[:20]) if matches else "No files found" elif tool_name == "grep_content": pattern = tool_input["pattern"] path = tool_input["path"] - if not os.path.realpath(path).startswith(os.path.realpath(REPO_ROOT)): + if not _within_repo(path): return "Error: access denied — path is outside the repository" try: result = subprocess.run( From 53636566f301e1339951ae619bed0e49f6e7048b Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 11:57:01 +0100 Subject: [PATCH 5/6] fix: restore base_dir support with validation; use -e for grep pattern glob_files was silently discarding the declared base_dir parameter. Now reads base_dir from tool_input and validates it is within REPO_ROOT before using it (falls back to REPO_ROOT if not). grep_content now uses -e flag so LLM-provided patterns starting with '-' are treated as patterns rather than grep flags. Co-Authored-By: Claude Sonnet 4.6 --- benchmark.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmark.py b/benchmark.py index 403bf8b2c1..9f69c29b7d 100644 --- a/benchmark.py +++ b/benchmark.py @@ -69,8 +69,11 @@ def execute_tool(tool_name, tool_input): return f"Error: {e}" elif tool_name == "glob_files": pattern = tool_input["pattern"] + base = tool_input.get("base_dir", REPO_ROOT) + if not _within_repo(base): + base = REPO_ROOT if not pattern.startswith("/"): - pattern = os.path.join(REPO_ROOT, pattern) + pattern = os.path.join(base, pattern) matches = [m for m in glob_module.glob(pattern, recursive=True) if _within_repo(m)] return "\n".join(matches[:20]) if matches else "No files found" elif tool_name == "grep_content": @@ -80,7 +83,7 @@ def execute_tool(tool_name, tool_input): return "Error: access denied — path is outside the repository" try: result = subprocess.run( - ["grep", "-r", "-l", "-m", "5", pattern, path], + ["grep", "-r", "-l", "-m", "5", "-e", pattern, path], capture_output=True, text=True, timeout=10 ) return result.stdout[:2000] if result.stdout else "No matches" From f5329323a47873151ebbf33aa4cd507aaf5d08d1 Mon Sep 17 00:00:00 2001 From: totto Date: Sun, 1 Mar 2026 12:19:08 +0100 Subject: [PATCH 6/6] fix(kcp): correct summary_of relationships for combined TL;DR units agents-tasks-tldr and tools-memory-tldr each cover two concepts but summary_of can only reference one unit ID. The previous manifest had summary_unit on both tasks and memory pointing to the combined TL;DRs, but those TL;DRs declared summary_of for only the first topic (agents, tools). A strict KCP agent could skip the TL;DR when starting from tasks or memory, defeating the optimisation. Fix: remove summary_available/summary_unit from tasks and memory. Instead add a relationships section with context links from each combined TL;DR to its secondary topic unit, so agents can still discover the TL;DR via the relationship graph without misusing the summary_of field. Co-Authored-By: Claude Sonnet 4.6 --- knowledge.yaml | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/knowledge.yaml b/knowledge.yaml index 8445b1847d..2377d959bc 100644 --- a/knowledge.yaml +++ b/knowledge.yaml @@ -131,8 +131,6 @@ units: token_estimate: 11000 load_strategy: lazy priority: medium - summary_available: true - summary_unit: agents-tasks-tldr - id: crews path: docs/en/concepts/crews.mdx @@ -263,8 +261,6 @@ units: token_estimate: 8000 load_strategy: lazy priority: medium - summary_available: true - summary_unit: tools-memory-tldr - id: llms path: docs/en/concepts/llms.mdx @@ -340,3 +336,18 @@ units: token_estimate: 6000 load_strategy: lazy priority: low + +relationships: + # agents-tasks-tldr is the declared summary for agents (summary_of: agents). + # It also covers tasks conceptually; link via context so agents discover it + # when starting from the tasks unit. + - from: agents-tasks-tldr + to: tasks + type: context + + # tools-memory-tldr is the declared summary for tools (summary_of: tools). + # It also covers memory conceptually; link via context so agents discover it + # when starting from the memory unit. + - from: tools-memory-tldr + to: memory + type: context