Skip to content

Commit 4000635

Browse files
committed
perf: optimize LLM orchestration with one-turn resolution and parallel prefetching
1 parent ef596af commit 4000635

File tree

3 files changed

+105
-27
lines changed

3 files changed

+105
-27
lines changed

agent.go

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,23 @@ func (a *Agent) Generate(ctx context.Context, sessionID, userInput string) (any,
11501150
return "", errors.New("user input is empty")
11511151
}
11521152

1153+
// -------------------------------------------------------------
1154+
// PREFETCH: Start context retrieval and tool discovery in parallel
1155+
// -------------------------------------------------------------
1156+
var (
1157+
prefetchWG sync.WaitGroup
1158+
records []memory.MemoryRecord
1159+
)
1160+
1161+
prefetchWG.Add(1)
1162+
go func() {
1163+
defer prefetchWG.Done()
1164+
records, _ = a.retrieveContext(ctx, sessionID, userInput, a.contextLimit)
1165+
}()
1166+
1167+
// ToolSpecs discovery is internally cached and thread-safe.
1168+
_ = a.ToolSpecs()
1169+
11531170
// ---------------------------------------------
11541171
// 0. DIRECT TOOL INVOCATION (bypass everything)
11551172
// ---------------------------------------------
@@ -1202,7 +1219,8 @@ func (a *Agent) Generate(ctx context.Context, sessionID, userInput string) (any,
12021219
// ---------------------------------------------
12031220
// 4. TOOL ORCHESTRATOR (normal UTCP tools)
12041221
// ---------------------------------------------
1205-
if handled, output, err := a.toolOrchestrator(ctx, sessionID, userInput); handled {
1222+
prefetchWG.Wait() // Ensure memory is ready for orchestrator
1223+
if handled, output, err := a.toolOrchestrator(ctx, sessionID, userInput, records); handled {
12061224
if err != nil {
12071225
return "", err
12081226
}
@@ -1225,14 +1243,24 @@ func (a *Agent) Generate(ctx context.Context, sessionID, userInput string) (any,
12251243
// ---------------------------------------------
12261244
// 6. LLM COMPLETION
12271245
// ---------------------------------------------
1228-
prompt, err := a.buildPrompt(ctx, sessionID, userInput)
1229-
if err != nil {
1230-
return "", err
1231-
}
1246+
// Build LLM prompt without tools/subagents:
1247+
var sb strings.Builder
1248+
sb.Grow(4096)
1249+
1250+
sb.WriteString(a.systemPrompt)
1251+
sb.WriteString("\n\nConversation memory (TOON):\n")
1252+
sb.WriteString(a.renderMemory(records))
1253+
1254+
sb.WriteString("\n\nUser: ")
1255+
sb.WriteString(sanitizeInput(userInput))
1256+
sb.WriteString("\n\n")
1257+
1258+
prompt := sb.String()
12321259

12331260
files, _ := a.RetrieveAttachmentFiles(ctx, sessionID, a.contextLimit)
12341261

12351262
var completion any
1263+
var err error
12361264
if len(files) > 0 {
12371265
completion, err = a.model.GenerateWithFiles(ctx, prompt, files)
12381266
} else {
@@ -1429,6 +1457,7 @@ type ToolChoice struct {
14291457
ToolName string `json:"tool_name"`
14301458
Arguments map[string]any `json:"arguments"`
14311459
Reason string `json:"reason"`
1460+
Answer string `json:"answer"` // Added for one-turn resolution
14321461
}
14331462

14341463
// In the toolOrchestrator function, modify the JSON parsing section:
@@ -1437,6 +1466,7 @@ func (a *Agent) toolOrchestrator(
14371466
ctx context.Context,
14381467
sessionID string,
14391468
userInput string,
1469+
records []memory.MemoryRecord,
14401470
) (bool, string, error) {
14411471

14421472
// FAST PATH: Skip LLM call for obvious non-tool queries
@@ -1476,31 +1506,40 @@ func (a *Agent) toolOrchestrator(
14761506
})
14771507
}
14781508

1479-
// Build tool selection prompt
1509+
// Build tool selection prompt with memory context
14801510
toolDesc := a.cachedToolPrompt(toolList)
1511+
memoryDesc := a.renderMemory(records)
14811512

14821513
choicePrompt := fmt.Sprintf(`
1483-
You are a UTCP tool selection engine.
1514+
You are a UTCP tool selection and planning engine.
14841515
1485-
A user asked:
1516+
USER REQUEST:
14861517
%q
14871518
1488-
You have access to these UTCP tools:
1519+
CONVERSATION MEMORY:
14891520
%s
14901521
1491-
Think step-by-step whether ANY tool should be used.
1522+
AVAILABLE UTCP TOOLS:
1523+
%s
1524+
1525+
OBJECTIVE:
1526+
Analyze if the user's request requires calling a tool or if it can be answered directly using conversational memory.
14921527
1493-
Return ONLY a JSON object EXACTLY like this:
1528+
RULES:
1529+
1. If a tool is needed, set "use_tool": true and provide "tool_name" and "arguments".
1530+
2. If NO tool is needed, set "use_tool": false and provide the final answer in "answer".
1531+
3. Use only the exact tool names provided.
14941532
1533+
Return ONLY a JSON object:
14951534
{
14961535
"use_tool": true|false,
14971536
"tool_name": "name or empty",
14981537
"arguments": { },
1499-
"stream": true|false
1538+
"answer": "Complete final answer if no tool is used",
1539+
"reason": "Short explanation"
15001540
}
15011541
1502-
Return ONLY JSON. No explanations.
1503-
`, userInput, toolDesc)
1542+
Return ONLY JSON.`, userInput, memoryDesc, toolDesc)
15041543

15051544
// Query LLM
15061545
raw, err := a.model.Generate(ctx, choicePrompt)
@@ -1522,6 +1561,10 @@ Return ONLY JSON. No explanations.
15221561
}
15231562

15241563
if !tc.UseTool {
1564+
if tc.Answer != "" {
1565+
a.storeMemory(sessionID, "assistant", tc.Answer, nil)
1566+
return true, tc.Answer, nil
1567+
}
15251568
return false, "", nil
15261569
}
15271570
if strings.TrimSpace(tc.ToolName) == "" {
@@ -1689,16 +1732,25 @@ func extractJSON(response string) string {
16891732
// This AVOIDS expensive LLM calls for obvious non-tool queries.
16901733
// EXTREMELY CONSERVATIVE: only filters pure informational questions.
16911734
func (a *Agent) likelyNeedsToolCall(lowerInput string) bool {
1692-
// ONLY filter out EXPLICIT pure informational questions
1693-
// Examples: "what is X?", "explain Y", "why does Z"
1735+
// 0. Skip for very short inputs or greetings
1736+
if len(lowerInput) < 2 {
1737+
return false
1738+
}
1739+
greetings := []string{"hello", "hi", "hey", "good morning", "good afternoon", "thanks", "thank you"}
1740+
for _, g := range greetings {
1741+
if lowerInput == g || strings.HasPrefix(lowerInput, g+" ") || strings.HasPrefix(lowerInput, g+",") {
1742+
return false
1743+
}
1744+
}
16941745

1695-
// Check for pure question patterns WITHOUT any action words
1746+
// 1. Check for pure informational question patterns WITHOUT any action words
16961747
pureQuestionStarters := []string{
16971748
"what is ", "what are ", "what does ", "what's ",
16981749
"why is ", "why are ", "why does ", "why do ",
16991750
"who is ", "who are ", "who was ",
17001751
"when is ", "when was ", "when did ",
17011752
"where is ", "where are ", "where was ",
1753+
"how is ", "how are ", "how does ",
17021754
"explain ", "describe ", "define ",
17031755
"tell me about ", "tell me what ",
17041756
}
@@ -1711,7 +1763,9 @@ func (a *Agent) likelyNeedsToolCall(lowerInput string) bool {
17111763
strings.Contains(lowerInput, " get") ||
17121764
strings.Contains(lowerInput, " list") ||
17131765
strings.Contains(lowerInput, " show") ||
1714-
strings.Contains(lowerInput, " files")
1766+
strings.Contains(lowerInput, " files") ||
1767+
strings.Contains(lowerInput, " run") ||
1768+
strings.Contains(lowerInput, " exec")
17151769

17161770
if !hasActionWord {
17171771
// Pure informational question - skip tool orchestration

agent_stream.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import (
55
"errors"
66
"fmt"
77
"strings"
8+
"sync"
89

10+
"github.com/Protocol-Lattice/go-agent/src/memory"
911
"github.com/Protocol-Lattice/go-agent/src/models"
1012
)
1113

@@ -17,6 +19,23 @@ func (a *Agent) GenerateStream(ctx context.Context, sessionID, userInput string)
1719
return nil, errors.New("user input is empty")
1820
}
1921

22+
// -------------------------------------------------------------
23+
// PREFETCH: Start context retrieval and tool discovery in parallel
24+
// -------------------------------------------------------------
25+
var (
26+
prefetchWG sync.WaitGroup
27+
records []memory.MemoryRecord
28+
)
29+
30+
prefetchWG.Add(1)
31+
go func() {
32+
defer prefetchWG.Done()
33+
records, _ = a.retrieveContext(ctx, sessionID, userInput, a.contextLimit)
34+
}()
35+
36+
// ToolSpecs discovery is internally cached and thread-safe.
37+
_ = a.ToolSpecs()
38+
2039
// Helper to wrap immediate result in a stream
2140
immediateStream := func(val any, err error) (<-chan models.StreamChunk, error) {
2241
ch := make(chan models.StreamChunk, 1)
@@ -58,7 +77,8 @@ func (a *Agent) GenerateStream(ctx context.Context, sessionID, userInput string)
5877
}
5978

6079
// 4. TOOL ORCHESTRATOR
61-
if handled, output, err := a.toolOrchestrator(ctx, sessionID, userInput); handled {
80+
prefetchWG.Wait()
81+
if handled, output, err := a.toolOrchestrator(ctx, sessionID, userInput, records); handled {
6282
return immediateStream(output, err)
6383
}
6484

@@ -71,13 +91,17 @@ func (a *Agent) GenerateStream(ctx context.Context, sessionID, userInput string)
7191
}
7292

7393
// 6. LLM COMPLETION (Streaming)
74-
prompt, err := a.buildPrompt(ctx, sessionID, userInput)
75-
if err != nil {
76-
return nil, err
77-
}
78-
79-
// Note: Currently GenerateStream does not support file attachments for streaming.
80-
// We proceed with text-only streaming.
94+
// Build prompt manually to use pre-fetched records
95+
var sb strings.Builder
96+
sb.Grow(4096)
97+
sb.WriteString(a.systemPrompt)
98+
sb.WriteString("\n\nConversation memory (TOON):\n")
99+
sb.WriteString(a.renderMemory(records))
100+
sb.WriteString("\n\nUser: ")
101+
sb.WriteString(sanitizeInput(userInput))
102+
sb.WriteString("\n\n")
103+
104+
prompt := sb.String()
81105

82106
stream, err := a.model.GenerateStream(ctx, prompt)
83107
if err != nil {

agent_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ func TestGenerate_ExecutesUTCPCalledTool(t *testing.T) {
799799
t.Fatalf("New returned error: %v", err)
800800
}
801801

802-
out, err := agent.Generate(ctx, "s1", "hello")
802+
out, err := agent.Generate(ctx, "s1", "echo something")
803803
if err != nil {
804804
t.Fatalf("Generate returned error: %v", err)
805805
}

0 commit comments

Comments
 (0)