Skip to content

Commit 79efcc5

Browse files
authored
context management cleanup (#807)
* added system prompt composer * further clean up * breakdown memory context building * further clean up * unify composition * more cleanups * updated compaction * centralize tool selections * minor clean up * minor improvements * updated cache performance * fix issues
1 parent 48fe119 commit 79efcc5

21 files changed

+1360
-1019
lines changed

Packages/OsaurusCore/Models/API/OpenAIAPI.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ struct ChatCompletionRequest: Codable, Sendable {
357357
var cache_hint: String? = nil
358358
/// Model-specific options from the active ModelProfile (not serialized to JSON).
359359
var modelOptions: [String: ModelOptionValue]? = nil
360+
/// Static system prompt content for prefix cache building (not serialized to JSON).
361+
var staticPrefix: String? = nil
360362

361363
/// Resolved max tokens, preferring max_tokens then max_completion_tokens.
362364
var resolvedMaxTokens: Int? { max_tokens ?? max_completion_tokens }
@@ -385,6 +387,7 @@ struct ChatCompletionRequest: Codable, Sendable {
385387
cache_hint: cache_hint
386388
)
387389
copy.modelOptions = modelOptions
390+
copy.staticPrefix = staticPrefix
388391
return copy
389392
}
390393
}

Packages/OsaurusCore/Networking/HTTPHandler.swift

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,27 +1312,21 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
13121312
_ request: ChatCompletionRequest,
13131313
agentId: String?
13141314
) async -> ChatCompletionRequest {
1315-
guard let agentId, !agentId.isEmpty else { return request }
1315+
guard let agentId, !agentId.isEmpty,
1316+
let agentUUID = UUID(uuidString: agentId)
1317+
else { return request }
13161318

13171319
var enriched = request
1318-
1319-
if let agentUUID = UUID(uuidString: agentId) {
1320-
let agentPrompt = await MainActor.run {
1321-
SystemPromptBuilder.effectiveBasePrompt(
1322-
AgentManager.shared.effectiveSystemPrompt(for: agentUUID)
1323-
)
1324-
}
1325-
SystemPromptBuilder.injectSystemContent(agentPrompt, into: &enriched.messages)
1326-
}
1327-
13281320
let query = request.messages.last(where: { $0.role == "user" })?.content ?? ""
1329-
let memoryContext = await MemoryContextAssembler.assembleContext(
1330-
agentId: agentId,
1331-
config: MemoryConfigurationStore.load(),
1332-
query: query
1321+
let (hint, prefix) = await SystemPromptComposer.injectAgentContext(
1322+
agentId: agentUUID,
1323+
query: query,
1324+
into: &enriched.messages
13331325
)
1334-
SystemPromptBuilder.injectMemoryContext(memoryContext, into: &enriched.messages)
1335-
1326+
if enriched.cache_hint == nil {
1327+
enriched.cache_hint = hint
1328+
enriched.staticPrefix = prefix
1329+
}
13361330
return enriched
13371331
}
13381332

Packages/OsaurusCore/Services/Chat/ChatEngine.swift

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,24 +27,6 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
2727
}
2828
struct EngineError: Error {}
2929

30-
private func enrichMessagesWithSystemPrompt(_ messages: [ChatMessage]) async -> [ChatMessage] {
31-
debugLog("[ChatEngine] enrichMessages: start count=\(messages.count)")
32-
if messages.contains(where: { $0.role == "system" }) {
33-
debugLog("[ChatEngine] enrichMessages: already has system, returning early")
34-
return messages
35-
}
36-
37-
let systemPrompt = await MainActor.run {
38-
ChatConfigurationStore.load().systemPrompt
39-
}
40-
debugLog("[ChatEngine] enrichMessages: got systemPrompt, injecting")
41-
42-
let effective = SystemPromptBuilder.effectiveBasePrompt(systemPrompt)
43-
var enriched = messages
44-
SystemPromptBuilder.injectSystemContent(effective, into: &enriched)
45-
return enriched
46-
}
47-
4830
/// Estimate input tokens from messages (rough heuristic: ~4 chars per token)
4931
private func estimateInputTokens(_ messages: [ChatMessage]) -> Int {
5032
let totalChars = messages.reduce(0) { sum, msg in
@@ -55,8 +37,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
5537

5638
func streamChat(request: ChatCompletionRequest) async throws -> AsyncThrowingStream<String, Error> {
5739
debugLog("[ChatEngine] streamChat: start model=\(request.model)")
58-
let messages = await enrichMessagesWithSystemPrompt(request.messages)
59-
debugLog("[ChatEngine] streamChat: enriched messages count=\(messages.count), fetching remote services")
40+
let messages = request.messages
41+
debugLog("[ChatEngine] streamChat: messages count=\(messages.count), fetching remote services")
6042
let temperature = request.temperature
6143
let maxTokens = request.max_tokens ?? 16384
6244
let repPenalty: Float? = {
@@ -72,7 +54,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
7254
repetitionPenalty: repPenalty,
7355
modelOptions: request.modelOptions ?? [:],
7456
sessionId: request.session_id,
75-
cacheHint: request.cache_hint
57+
cacheHint: request.cache_hint,
58+
staticPrefix: request.staticPrefix
7659
)
7760

7861
// Candidate services and installed models (injected for testability)
@@ -268,7 +251,7 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
268251

269252
func completeChat(request: ChatCompletionRequest) async throws -> ChatCompletionResponse {
270253
let startTime = Date()
271-
let messages = await enrichMessagesWithSystemPrompt(request.messages)
254+
let messages = request.messages
272255
let inputTokens = estimateInputTokens(messages)
273256
let temperature = request.temperature
274257
let maxTokens = request.max_tokens ?? 16384
@@ -284,7 +267,8 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
284267
repetitionPenalty: repPenalty2,
285268
modelOptions: request.modelOptions ?? [:],
286269
sessionId: request.session_id,
287-
cacheHint: request.cache_hint
270+
cacheHint: request.cache_hint,
271+
staticPrefix: request.staticPrefix
288272
)
289273

290274
let services = self.services

Packages/OsaurusCore/Services/Chat/ContextBudgetManager.swift

Lines changed: 103 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,46 +9,102 @@
99

1010
import Foundation
1111

12-
/// Per-category token breakdown for the context window, displayed in the
13-
/// context budget hover popover.
14-
public struct ContextTokenBreakdown: Equatable, Sendable {
15-
public var systemPrompt: Int = 0
16-
public var memory: Int = 0
17-
public var tools: Int = 0
18-
public var conversation: Int = 0
19-
public var input: Int = 0
20-
public var output: Int = 0
12+
/// Dynamic token breakdown for the context window, displayed in the
13+
/// context budget hover popover. Entries are derived from the composer's
14+
/// manifest sections rather than hardcoded fields.
15+
public struct ContextBreakdown: Equatable, Sendable {
16+
17+
public struct Entry: Identifiable, Equatable, Sendable {
18+
public let id: String
19+
public let label: String
20+
public var tokens: Int
21+
public let tint: Tint
22+
}
23+
24+
public enum Tint: String, Sendable {
25+
case purple, blue, orange, green, gray, cyan, teal, indigo
26+
}
27+
28+
/// Prompt sections + tools
29+
public var context: [Entry]
30+
/// Conversation + input + output
31+
public var messages: [Entry]
2132

2233
public var total: Int {
23-
systemPrompt + memory + tools + conversation + input + output
34+
context.reduce(0) { $0 + $1.tokens } + messages.reduce(0) { $0 + $1.tokens }
2435
}
2536

26-
public static let zero = ContextTokenBreakdown()
37+
public var allEntries: [Entry] { context + messages }
38+
39+
public static let zero = ContextBreakdown(context: [], messages: [])
40+
41+
/// Tint for a given prompt section ID.
42+
static func tint(for sectionId: String) -> Tint {
43+
switch sectionId {
44+
case "base": return .purple
45+
case "workMode": return .indigo
46+
case "sandbox": return .teal
47+
case "memory": return .blue
48+
case "preflight": return .cyan
49+
case "skills": return .orange
50+
default: return .gray
51+
}
52+
}
2753

28-
/// Non-zero categories with their display metadata.
29-
public var categories: [Category] {
30-
Category.all(from: self).filter { $0.tokens > 0 }
54+
/// Build a breakdown from a `ComposedContext` with optional message token counts.
55+
static func from(
56+
context composed: ComposedContext,
57+
conversationTokens: Int = 0,
58+
inputTokens: Int = 0,
59+
outputTokens: Int = 0
60+
) -> ContextBreakdown {
61+
.from(
62+
manifest: composed.manifest,
63+
toolTokens: composed.toolTokens,
64+
conversationTokens: conversationTokens,
65+
inputTokens: inputTokens,
66+
outputTokens: outputTokens
67+
)
3168
}
3269

33-
public struct Category: Identifiable {
34-
public let label: String
35-
public let tokens: Int
36-
public let tint: Tint
37-
public var id: String { label }
70+
/// Build a breakdown from a manifest + tool tokens.
71+
public static func from(
72+
manifest: PromptManifest,
73+
toolTokens: Int = 0,
74+
conversationTokens: Int = 0,
75+
inputTokens: Int = 0,
76+
outputTokens: Int = 0
77+
) -> ContextBreakdown {
78+
var ctx: [Entry] = manifest.sections
79+
.filter { $0.estimatedTokens > 0 }
80+
.map { Entry(id: $0.id, label: $0.label, tokens: $0.estimatedTokens, tint: tint(for: $0.id)) }
81+
if toolTokens > 0 {
82+
ctx.append(Entry(id: "tools", label: "Tools", tokens: toolTokens, tint: .orange))
83+
}
3884

39-
public enum Tint: String {
40-
case purple, blue, orange, green, gray, cyan
85+
var msgs: [Entry] = []
86+
if conversationTokens > 0 {
87+
msgs.append(Entry(id: "conversation", label: "Conversation", tokens: conversationTokens, tint: .gray))
4188
}
89+
if inputTokens > 0 { msgs.append(Entry(id: "input", label: "Input", tokens: inputTokens, tint: .cyan)) }
90+
if outputTokens > 0 { msgs.append(Entry(id: "output", label: "Output", tokens: outputTokens, tint: .green)) }
4291

43-
static func all(from b: ContextTokenBreakdown) -> [Category] {
44-
[
45-
Category(label: "System Prompt", tokens: b.systemPrompt, tint: .purple),
46-
Category(label: "Memory", tokens: b.memory, tint: .blue),
47-
Category(label: "Tools", tokens: b.tools, tint: .orange),
48-
Category(label: "Conversation", tokens: b.conversation, tint: .gray),
49-
Category(label: "Input", tokens: b.input, tint: .cyan),
50-
Category(label: "Output", tokens: b.output, tint: .green),
51-
]
92+
return ContextBreakdown(context: ctx, messages: msgs)
93+
}
94+
95+
/// Update the token count for an entry by ID, or append it if not present.
96+
public mutating func setTokens(
97+
for id: String,
98+
in group: WritableKeyPath<ContextBreakdown, [Entry]>,
99+
tokens: Int,
100+
label: String = "",
101+
tint: Tint = .gray
102+
) {
103+
if let idx = self[keyPath: group].firstIndex(where: { $0.id == id }) {
104+
let existing = self[keyPath: group][idx]
105+
self[keyPath: group][idx] = Entry(id: id, label: existing.label, tokens: tokens, tint: existing.tint)
106+
} else if tokens > 0 {
107+
self[keyPath: group].append(Entry(id: id, label: label, tokens: tokens, tint: tint))
52108
}
53109
}
54110
}
@@ -344,47 +400,45 @@ public struct ContextBudgetManager: Sendable {
344400
/// Tracks the active request's token breakdown during streaming/execution.
345401
///
346402
/// Both `ChatSession` and `WorkSession` own an instance. The lifecycle is:
347-
/// 1. `snapshot()` — after preflight, captures the actual system prompt, memory, and tool tokens
348-
/// 2. `updateConversation()` — at each agent-loop iteration, updates conversation tokens
349-
/// 3. `activeBreakdown()` — O(1) read returning the snapshot + live output tokens
403+
/// 1. `snapshot()` — captures context from ComposedContext or manifest
404+
/// 2. `updateConversation()` — at each agent-loop iteration, updates conversation + output tokens
405+
/// 3. `activeBreakdown()` — O(1) read returning the snapshot with live message tokens
350406
/// 4. `clear()` — on completion/error/cancellation
351407
@MainActor
352408
final class ContextBudgetTracker {
353-
private var breakdown: ContextTokenBreakdown?
409+
private var breakdown: ContextBreakdown?
354410
private var cumulativeOutputTokens: Int = 0
355411

356-
/// Snapshot the fixed components of the actual request context.
357-
func snapshot(systemPromptChars: Int, memoryTokens: Int, toolTokens: Int) {
358-
var bd = ContextTokenBreakdown()
359-
bd.systemPrompt = max(1, systemPromptChars / ContextBudgetManager.charsPerToken)
360-
bd.memory = memoryTokens
361-
bd.tools = toolTokens
362-
breakdown = bd
412+
/// Snapshot from a ComposedContext (chat path).
413+
func snapshot(context: ComposedContext) {
414+
breakdown = .from(context: context)
415+
}
416+
417+
/// Snapshot from a manifest + tool tokens (work path where ComposedContext isn't available).
418+
func snapshot(manifest: PromptManifest, toolTokens: Int) {
419+
breakdown = .from(manifest: manifest, toolTokens: toolTokens)
363420
}
364421

365422
/// Update conversation tokens at each agent-loop iteration start.
366-
/// Accumulates the finished turn's output before starting a new iteration
367-
/// so the `output` category reflects total model output across all turns.
368423
func updateConversation(tokens: Int, finishedOutputTurn: ChatTurn? = nil) {
369424
if let turn = finishedOutputTurn, turn.role == .assistant {
370425
cumulativeOutputTokens += ContextBudgetManager.estimateOutputTokens(for: turn)
371426
}
372-
breakdown?.conversation = tokens
427+
breakdown?.setTokens(for: "conversation", in: \.messages, tokens: tokens, label: "Conversation", tint: .gray)
373428
}
374429

375-
/// Returns the snapshot with live output tokens appended, or nil if
376-
/// no snapshot is active (caller falls back to full recomputation).
377-
func activeBreakdown(isActive: Bool, outputTurn: ChatTurn?) -> ContextTokenBreakdown? {
430+
/// Returns the snapshot with live output tokens, or nil if no snapshot is active.
431+
func activeBreakdown(isActive: Bool, outputTurn: ChatTurn?) -> ContextBreakdown? {
378432
guard var bd = breakdown, isActive else { return nil }
379433
var currentTurnOutput = 0
380434
if let turn = outputTurn, turn.role == .assistant {
381435
currentTurnOutput = ContextBudgetManager.estimateOutputTokens(for: turn)
382436
}
383-
bd.output = cumulativeOutputTokens + currentTurnOutput
437+
let totalOutput = cumulativeOutputTokens + currentTurnOutput
438+
bd.setTokens(for: "output", in: \.messages, tokens: totalOutput, label: "Output", tint: .green)
384439
return bd
385440
}
386441

387-
/// Clear the active snapshot. Next read falls back to full recomputation.
388442
func clear() {
389443
breakdown = nil
390444
cumulativeOutputTokens = 0

0 commit comments

Comments
 (0)