Skip to content

Commit 6c69bf1

Browse files
committed
Semantic compression guide
1 parent f5619d7 commit 6c69bf1

File tree

5 files changed

+405
-0
lines changed

5 files changed

+405
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package io.quarkiverse.langchain4j.samples.compression;
2+
3+
import jakarta.enterprise.context.ApplicationScoped;
4+
5+
import dev.langchain4j.service.MemoryId;
6+
import dev.langchain4j.service.SystemMessage;
7+
import dev.langchain4j.service.UserMessage;
8+
import io.quarkiverse.langchain4j.RegisterAiService;
9+
10+
@RegisterAiService
11+
@SystemMessage("""
12+
You are a police and helpful assistant.
13+
""")
14+
@ApplicationScoped // For demo purpose.
15+
public interface Assistant {
16+
17+
String answer(@MemoryId String id, @UserMessage String question);
18+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package io.quarkiverse.langchain4j.samples.compression;
2+
3+
import jakarta.inject.Inject;
4+
import jakarta.ws.rs.POST;
5+
import jakarta.ws.rs.Path;
6+
import jakarta.ws.rs.Produces;
7+
import jakarta.ws.rs.core.MediaType;
8+
9+
@Path("/chat")
10+
public class ChatResource {
11+
12+
@Inject
13+
Assistant assistant;
14+
15+
@POST
16+
@Produces(MediaType.TEXT_PLAIN)
17+
public String chat(String question) {
18+
// Use the same memory ID for all questions in this demo.
19+
// This is just to trigger the compression logic.
20+
return assistant.answer("abc", question);
21+
}
22+
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package io.quarkiverse.langchain4j.samples.compression;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
import jakarta.enterprise.context.ApplicationScoped;
7+
8+
import org.eclipse.microprofile.config.inject.ConfigProperty;
9+
10+
import dev.langchain4j.data.message.AiMessage;
11+
import dev.langchain4j.data.message.ChatMessage;
12+
import dev.langchain4j.data.message.ChatMessageType;
13+
import dev.langchain4j.data.message.SystemMessage;
14+
import dev.langchain4j.data.message.UserMessage;
15+
import dev.langchain4j.model.chat.ChatModel;
16+
import dev.langchain4j.store.memory.chat.ChatMemoryStore;
17+
import dev.langchain4j.store.memory.chat.InMemoryChatMemoryStore;
18+
import io.quarkus.logging.Log;
19+
20+
@ApplicationScoped
21+
public class CompressingChatMemoryStore implements ChatMemoryStore {
22+
23+
/**
24+
* The delegate store that will hold the chat messages.
25+
* This could be a database-backed store, but for simplicity,
26+
* we are using an in-memory store here.
27+
*/
28+
private final ChatMemoryStore delegate;
29+
30+
/**
31+
* The chat model used for summarization.
32+
*/
33+
private final ChatModel chatModel;
34+
35+
/**
36+
* The threshold for the number of messages before compression is triggered.
37+
*/
38+
private final int threshold;
39+
40+
/**
41+
* The prefix used to identify the summary in the system message.
42+
* This is used to ensure that we can extract and update the summary correctly.
43+
*/
44+
private static final String SUMMARY_PREFIX = "Context: The following is a summary of the previous conversation:";
45+
46+
public CompressingChatMemoryStore(ChatModel model, // We use the default chat model, but you can select any chat model
47+
@ConfigProperty(name = "semantic-compression-threshold", defaultValue = "5") int threshold) {
48+
this.delegate = new InMemoryChatMemoryStore();
49+
this.chatModel = model;
50+
this.threshold = threshold;
51+
}
52+
53+
@Override
54+
public void updateMessages(Object memoryId, List<ChatMessage> messages) {
55+
// Extract the last message if any, as we do not want to compress during function calls
56+
if (messages.isEmpty()) {
57+
Log.warnf("No messages to compress for memory ID: %s", memoryId);
58+
return;
59+
}
60+
ChatMessage lastMessage = messages.get(messages.size() - 1);
61+
if (lastMessage.type() == ChatMessageType.AI && ((AiMessage) lastMessage).hasToolExecutionRequests()) {
62+
Log.infof("Skipping compression for memory ID: %s due to function call in the last message", memoryId);
63+
delegate.updateMessages(memoryId, messages);
64+
return;
65+
}
66+
// Also skip compression if the last message is a system message or a function call response
67+
if (lastMessage.type() == ChatMessageType.SYSTEM || lastMessage.type() == ChatMessageType.TOOL_EXECUTION_RESULT) {
68+
Log.infof(
69+
"Skipping compression for memory ID: %s due to system message or function call response in the last message",
70+
memoryId);
71+
delegate.updateMessages(memoryId, messages);
72+
return;
73+
}
74+
75+
// If the number of messages exceeds the threshold, compress them
76+
if (messages.size() > threshold) {
77+
Log.infof("Triggering semantic compression for memory ID: %s with %d messages", memoryId, messages.size());
78+
List<ChatMessage> compressed = new ArrayList<>();
79+
80+
// Retain the first system message if present
81+
SystemMessage systemMsg = (SystemMessage) messages.stream()
82+
.filter(m -> m.type() == ChatMessageType.SYSTEM)
83+
.findFirst().orElse(null);
84+
85+
// Collect messages since last compression and extract any existing summary
86+
for (ChatMessage msg : messages) {
87+
if (msg.type() == ChatMessageType.SYSTEM) {
88+
// We found a system message, we need to check if it contains a previous summary
89+
extractSummaryFromSystemMessageIfAny((SystemMessage) msg, compressed);
90+
} else {
91+
compressed.add(msg);
92+
}
93+
}
94+
// compressed now contains a "fake" system message with the previous summary if it existed, and all other messages.
95+
96+
// Build compression prompt
97+
StringBuilder sb = new StringBuilder(
98+
"Summarize the following dialogue into a brief summary, preserving context and tone:\n\n");
99+
for (ChatMessage msg : messages) {
100+
switch (msg.type()) {
101+
case SYSTEM ->
102+
// This is the previous summary
103+
sb.append("Context: ").append(((SystemMessage) msg).text()).append("\n");
104+
case USER -> sb.append("User: ").append(((UserMessage) msg).singleText()).append("\n");
105+
case AI -> sb.append("Assistant: ").append(((AiMessage) msg).text()).append("\n");
106+
default -> {
107+
// Ignore other message types for compression
108+
}
109+
}
110+
}
111+
String summary = chatModel.chat(sb.toString());
112+
systemMsg = appendSummaryToSystemMessage(systemMsg, summary);
113+
Log.infof("Generated system message with summary: %s", systemMsg.text());
114+
delegate.updateMessages(memoryId, List.of(systemMsg));
115+
} else {
116+
delegate.updateMessages(memoryId, messages);
117+
}
118+
}
119+
120+
private SystemMessage appendSummaryToSystemMessage(SystemMessage systemMsg, String summary) {
121+
if (systemMsg == null) {
122+
// If no system message exists, create a new one with the summary
123+
return SystemMessage.systemMessage(SUMMARY_PREFIX + "\n" + summary);
124+
}
125+
// Check if the system message already contains a summary
126+
String content = systemMsg.text();
127+
if (content.contains(SUMMARY_PREFIX)) {
128+
// Replace the existing summary with the new one
129+
int startIndex = content.indexOf(SUMMARY_PREFIX) + SUMMARY_PREFIX.length();
130+
String newContent = content.substring(0, startIndex) + "\n\n";
131+
newContent = newContent + "\n\n" + SUMMARY_PREFIX + "\n" + summary;
132+
return SystemMessage.systemMessage(newContent);
133+
} else {
134+
// If no summary exists, append the new summary
135+
String newContent = content + "\n\n" + SUMMARY_PREFIX + "\n" + summary;
136+
return SystemMessage.systemMessage(newContent);
137+
}
138+
}
139+
140+
private void extractSummaryFromSystemMessageIfAny(SystemMessage systemMsg, List<ChatMessage> compressed) {
141+
String content = systemMsg.text();
142+
if (content.contains(SUMMARY_PREFIX)) {
143+
// Extract the summary part
144+
int startIndex = content.indexOf(SUMMARY_PREFIX) + SUMMARY_PREFIX.length();
145+
String summary = content.substring(startIndex).trim();
146+
// Add the sanitized summary to the compressed messages
147+
compressed.add(SystemMessage.systemMessage(sanitize(summary)));
148+
}
149+
// Otherwise, do nothing, as we don't want to include the system message in the compressed messages.
150+
}
151+
152+
private String sanitize(String text) {
153+
// Remove the previous summary if it exists
154+
int index = text.indexOf("Context: The following is a summary of the previous conversation:");
155+
if (index != -1) {
156+
return text.substring(0, index).trim();
157+
}
158+
return text.trim();
159+
}
160+
161+
@Override
162+
public List<ChatMessage> getMessages(Object memoryId) {
163+
return delegate.getMessages(memoryId);
164+
}
165+
166+
@Override
167+
public void deleteMessages(Object memoryId) {
168+
delegate.deleteMessages(memoryId);
169+
}
170+
}

docs/modules/ROOT/nav.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* xref:guide-web-search.adoc[Using Tavily Web Search]
2020
* xref:guide-passing-image.adoc[Passing Images to Models]
2121
* xref:guide-generating-image.adoc[Generating Images]
22+
* xref:guide-semantic-compression.adoc[Compressing Chat History]
2223
// * xref:guide-agentic-patterns.adoc[Implementing Agentic patterns]
2324
// * xref:guide-structured-output.adoc[Returning structured data from a model]
2425
// * xref:guide-streamed-responses.adoc[Using function calling]

0 commit comments

Comments
 (0)