Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions pkg/runtime/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,17 +333,19 @@ func SessionTitle(sessionID, title string) Event {
type SessionSummaryEvent struct {
AgentContext

Type string `json:"type"`
SessionID string `json:"session_id"`
Summary string `json:"summary"`
Type string `json:"type"`
SessionID string `json:"session_id"`
Summary string `json:"summary"`
FirstKeptEntry int `json:"first_kept_entry,omitempty"`
}

func SessionSummary(sessionID, summary, agentName string) Event {
func SessionSummary(sessionID, summary, agentName string, firstKeptEntry int) Event {
return &SessionSummaryEvent{
Type: "session_summary",
SessionID: sessionID,
Summary: summary,
AgentContext: newAgentContext(agentName),
Type: "session_summary",
SessionID: sessionID,
Summary: summary,
FirstKeptEntry: firstKeptEntry,
AgentContext: newAgentContext(agentName),
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/runtime/persistent_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func (r *PersistentRuntime) handleEvent(ctx context.Context, sess *session.Sessi
}

case *SessionSummaryEvent:
if err := r.sessionStore.AddSummary(ctx, e.SessionID, e.Summary); err != nil {
if err := r.sessionStore.AddSummary(ctx, e.SessionID, e.Summary, e.FirstKeptEntry); err != nil {
slog.Warn("Failed to persist summary", "session_id", e.SessionID, "error", err)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/runtime/remote_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ func (r *RemoteRuntime) Resume(ctx context.Context, req ResumeRequest) {
// Summarize generates a summary for the session
func (r *RemoteRuntime) Summarize(_ context.Context, sess *session.Session, _ string, events chan Event) {
slog.Debug("Summarize not yet implemented for remote runtime", "session_id", r.sessionID)
events <- SessionSummary(sess.ID, "Summary generation not yet implemented for remote runtime", r.currentAgent)
events <- SessionSummary(sess.ID, "Summary generation not yet implemented for remote runtime", r.currentAgent, 0)
}

func (r *RemoteRuntime) convertSessionMessages(sess *session.Session) []api.Message {
Expand Down
77 changes: 70 additions & 7 deletions pkg/runtime/session_compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ import (

const maxSummaryTokens = 16_000

// maxKeepTokens is the maximum number of tokens to preserve from the end of
// the conversation during compaction. These recent messages are kept verbatim
// so the LLM can continue naturally after compaction.
const maxKeepTokens = 20_000

// doCompact runs compaction on a session and applies the result (events,
// persistence, token count updates). The agent is used to extract the
// conversation from the session and to obtain the model for summarization.
Expand All @@ -41,8 +46,8 @@ func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *

compactionAgent := agent.New("root", compaction.SystemPrompt, agent.WithModel(summaryModel))

// Compute the messages to compact.
messages := extractMessagesToCompact(sess, compactionAgent, int64(m.Limit.Context), additionalPrompt)
// Compute the messages to compact, keeping recent messages aside.
messages, firstKeptEntry := extractMessagesToCompact(sess, compactionAgent, int64(m.Limit.Context), additionalPrompt)

// Run the compaction.
compactionSession := session.New(
Expand Down Expand Up @@ -72,16 +77,21 @@ func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *
sess.InputTokens = compactionSession.OutputTokens
sess.OutputTokens = 0
sess.Messages = append(sess.Messages, session.Item{
Summary: summary,
Cost: compactionSession.TotalCost(),
Summary: summary,
FirstKeptEntry: firstKeptEntry,
Cost: compactionSession.TotalCost(),
})
_ = r.sessionStore.UpdateSession(ctx, sess)

slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(summary))
events <- SessionSummary(sess.ID, summary, a.Name())
events <- SessionSummary(sess.ID, summary, a.Name(), firstKeptEntry)
}

func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agent, contextLimit int64, additionalPrompt string) []chat.Message {
// extractMessagesToCompact returns the messages to send to the compaction model
// and the index (into sess.Messages) of the first message that was kept aside.
// Recent messages (up to maxKeepTokens) are excluded from compaction so they
// can be preserved verbatim in the session after summarization.
func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
// Add all the existing messages.
var messages []chat.Message
for _, msg := range sess.GetMessages(compactionAgent) {
Expand All @@ -95,6 +105,17 @@ func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agen
messages = append(messages, msg)
}

// Split: keep the last N tokens of messages aside so the LLM retains
// recent context after compaction.
splitIdx := splitIndexForKeep(messages, maxKeepTokens)
messagesToCompact := messages[:splitIdx]
// Compute firstKeptEntry: index into sess.Messages of the first kept message.
// The kept messages start at splitIdx in the non-system filtered list. We
// need to map this back to the original sess.Messages index.
firstKeptEntry := mapToSessionIndex(sess, splitIdx)

messages = messagesToCompact

// Prepare the first (system) message.
systemPromptMessage := chat.Message{
Role: chat.MessageRoleSystem,
Expand Down Expand Up @@ -131,7 +152,49 @@ func extractMessagesToCompact(sess *session.Session, compactionAgent *agent.Agen
// Append the last (user) message.
messages = append(messages, userPromptMessage)

return messages
return messages, firstKeptEntry
}

// splitIndexForKeep returns the index that splits messages into [0:idx] (to
// compact) and [idx:] (to keep). It walks backwards accumulating tokens up to
// maxTokens, snapping to user/assistant boundaries.
func splitIndexForKeep(messages []chat.Message, maxTokens int64) int {
if len(messages) == 0 {
return 0
}

var tokens int64
// Walk from the end; find the earliest index whose suffix fits in maxTokens.
lastValidBoundary := len(messages)
for i := len(messages) - 1; i >= 0; i-- {
tokens += compaction.EstimateMessageTokens(&messages[i])
if tokens > maxTokens {
return lastValidBoundary
}
role := messages[i].Role
if role == chat.MessageRoleUser || role == chat.MessageRoleAssistant {
lastValidBoundary = i
}
}
// All messages fit within maxTokens — don't keep any aside (compact everything).
return len(messages)
}

// mapToSessionIndex maps an index in the non-system-filtered message list back
// to the corresponding index in sess.Messages. It counts only message items
// that are not system messages.
func mapToSessionIndex(sess *session.Session, filteredIdx int) int {
count := 0
for i, item := range sess.Messages {
if item.IsMessage() && item.Message.Message.Role != chat.MessageRoleSystem {
if count == filteredIdx {
return i
}
count++
}
}
// filteredIdx is past the end — no messages to keep.
return len(sess.Messages)
}

func firstMessageToKeep(messages []chat.Message, contextLimit int64) int {
Expand Down
170 changes: 169 additions & 1 deletion pkg/runtime/session_compaction_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package runtime

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/docker/docker-agent/pkg/agent"
"github.com/docker/docker-agent/pkg/chat"
Expand Down Expand Up @@ -96,7 +98,7 @@ func TestExtractMessagesToCompact(t *testing.T) {
sess := session.New(session.WithMessages(tt.messages))

a := agent.New("test", "test prompt")
result := extractMessagesToCompact(sess, a, tt.contextLimit, tt.additionalPrompt)
result, _ := extractMessagesToCompact(sess, a, tt.contextLimit, tt.additionalPrompt)

assert.GreaterOrEqual(t, len(result), tt.wantConversationMsgCount+2)
assert.Equal(t, chat.MessageRoleSystem, result[0].Role)
Expand All @@ -121,3 +123,169 @@ func TestExtractMessagesToCompact(t *testing.T) {
})
}
}

func TestSplitIndexForKeep(t *testing.T) {
msg := func(role chat.MessageRole, content string) chat.Message {
return chat.Message{Role: role, Content: content}
}

tests := []struct {
name string
messages []chat.Message
maxTokens int64
wantSplit int // expected split index
}{
{
name: "empty messages",
messages: nil,
maxTokens: 1000,
wantSplit: 0,
},
{
name: "all messages fit in keep budget - compact everything",
messages: []chat.Message{
msg(chat.MessageRoleUser, "short"),
msg(chat.MessageRoleAssistant, "short"),
},
maxTokens: 100_000,
wantSplit: 2, // all fit → compact everything
},
{
name: "recent messages kept, older ones compacted",
messages: []chat.Message{
msg(chat.MessageRoleUser, strings.Repeat("a", 40000)), // ~10005 tokens
msg(chat.MessageRoleAssistant, strings.Repeat("b", 40000)), // ~10005 tokens
msg(chat.MessageRoleUser, strings.Repeat("c", 40000)), // ~10005 tokens
msg(chat.MessageRoleAssistant, strings.Repeat("d", 40000)), // ~10005 tokens
msg(chat.MessageRoleUser, strings.Repeat("e", 40000)), // ~10005 tokens
msg(chat.MessageRoleAssistant, strings.Repeat("f", 40000)), // ~10005 tokens
},
maxTokens: 20_100, // enough for exactly 2 messages
wantSplit: 4, // last 2 messages are kept
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := splitIndexForKeep(tt.messages, tt.maxTokens)
assert.Equal(t, tt.wantSplit, got)
})
}
}

func TestExtractMessagesToCompact_KeepsRecentMessages(t *testing.T) {
// Create a session with many messages, some large enough that the last
// ~20k tokens are kept aside.
var items []session.Item
for range 10 {
items = append(items, session.NewMessageItem(&session.Message{
Message: chat.Message{
Role: chat.MessageRoleUser,
Content: strings.Repeat("x", 20000), // ~5k tokens each
},
}), session.NewMessageItem(&session.Message{
Message: chat.Message{
Role: chat.MessageRoleAssistant,
Content: strings.Repeat("y", 20000), // ~5k tokens each
},
}))
}

sess := session.New(session.WithMessages(items))
a := agent.New("test", "test prompt")

result, firstKeptEntry := extractMessagesToCompact(sess, a, 200_000, "")

// The kept messages should not appear in the compaction result
// (only system + compacted messages + user prompt).
// Total: 20 messages × ~5k tokens = ~100k tokens.
// Keep budget: 20k tokens → ~4 messages kept.
// So compacted messages should be 20 - 4 = 16.
compactedMsgCount := len(result) - 2 // minus system and user prompt
assert.Less(t, compactedMsgCount, 20, "some messages should have been kept aside")
assert.Positive(t, compactedMsgCount, "some messages should be compacted")

// firstKeptEntry should point into sess.Messages
assert.Positive(t, firstKeptEntry, "firstKeptEntry should be > 0")
assert.Less(t, firstKeptEntry, len(sess.Messages), "firstKeptEntry should be within bounds")
}

func TestSessionGetMessages_WithFirstKeptEntry(t *testing.T) {
// Build a session with some messages, then add a summary with FirstKeptEntry.
items := []session.Item{
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m1"},
}),
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m2"},
}),
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m3"},
}),
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m4"},
}),
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m5"},
}),
}

// Add summary that says "first kept entry is index 3" (m4).
// So we expect: [system...] + [summary] + [m4, m5]
items = append(items, session.Item{
Summary: "This is a summary of m1-m3",
FirstKeptEntry: 3, // index of m4 in the Messages slice
})

sess := session.New(session.WithMessages(items))
a := agent.New("test", "test instruction")

messages := sess.GetMessages(a)

// Extract just the non-system messages
var conversationMessages []chat.Message
for _, msg := range messages {
if msg.Role != chat.MessageRoleSystem {
conversationMessages = append(conversationMessages, msg)
}
}

// Should have: summary (as user message), m4, m5
require.Len(t, conversationMessages, 3, "expected summary + 2 kept messages")
assert.Contains(t, conversationMessages[0].Content, "Session Summary:")
assert.Equal(t, "m4", conversationMessages[1].Content)
assert.Equal(t, "m5", conversationMessages[2].Content)
}

func TestSessionGetMessages_SummaryWithoutFirstKeptEntry(t *testing.T) {
// Backward compatibility: summary without FirstKeptEntry should work as before.
items := []session.Item{
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m1"},
}),
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "m2"},
}),
{Summary: "This is a summary"},
session.NewMessageItem(&session.Message{
Message: chat.Message{Role: chat.MessageRoleUser, Content: "m3"},
}),
}

sess := session.New(session.WithMessages(items))
a := agent.New("test", "test instruction")

messages := sess.GetMessages(a)

var conversationMessages []chat.Message
for _, msg := range messages {
if msg.Role != chat.MessageRoleSystem {
conversationMessages = append(conversationMessages, msg)
}
}

// Should have: summary + m3 (messages after the summary)
require.Len(t, conversationMessages, 2)
assert.Contains(t, conversationMessages[0].Content, "Session Summary:")
assert.Equal(t, "m3", conversationMessages[1].Content)
}
6 changes: 6 additions & 0 deletions pkg/session/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ func getAllMigrations() []Migration {
Description: "Drop the legacy messages JSON column now that all data lives in session_items",
UpSQL: `ALTER TABLE sessions DROP COLUMN messages`,
},
{
ID: 21,
Name: "021_add_first_kept_entry_column",
Description: "Add first_kept_entry column to session_items for compaction-preserved messages",
UpSQL: `ALTER TABLE session_items ADD COLUMN first_kept_entry INTEGER DEFAULT 0`,
},
}
}

Expand Down
Loading
Loading