deploy chat_summary_buffer example

James Liounis · James Liounis · commit 095e8693596f · 2025-04-06T13:48:39.000-04:00
diff --git a/perplexity-llamaindex/memory/chat_summary_memory_buffer/demo/chat_buffer_memory_demo.mov b/perplexity-llamaindex/memory/chat_summary_memory_buffer/demo/chat_buffer_memory_demo.mov
diff --git a/perplexity-llamaindex/memory/chat_summary_memory_buffer/scripts/chat_memory_buffer.py b/perplexity-llamaindex/memory/chat_summary_memory_buffer/scripts/chat_memory_buffer.py
@@ -0,0 +1,60 @@
+from llama_index.core.memory import ChatSummaryMemoryBuffer
+from llama_index.core.llms import ChatMessage  # Add this import
+from llama_index.llms.openai import OpenAI as LlamaOpenAI
+from openai import OpenAI as PerplexityClient
+import os
+
+# Configure LLM for memory summarization
+llm = LlamaOpenAI(
+    model="gpt-4o-2024-08-06",
+    api_key=os.environ["PERPLEXITY_API_KEY"],
+    base_url="https://api.openai.com/v1/chat/completions"
+)
+
+# Initialize memory with token-aware summarization
+memory = ChatSummaryMemoryBuffer.from_defaults(
+    token_limit=3000,
+    llm=llm
+)
+
+# Add system prompt using ChatMessage
+memory.put(ChatMessage(
+    role="system",
+    content="You're an AI assistant providing detailed, accurate answers"
+))
+
+# Create API client
+sonar_client = PerplexityClient(
+    api_key=os.environ["PERPLEXITY_API_KEY"],
+    base_url="https://api.perplexity.ai"
+)
+
+def chat_with_memory(user_query: str):
+    # Store user message as ChatMessage
+    memory.put(ChatMessage(role="user", content=user_query))
+    
+    # Get optimized message history
+    messages = memory.get()
+    
+    # Convert to Perplexity-compatible format
+    messages_dict = [
+        {"role": m.role, "content": m.content}
+        for m in messages
+    ]
+    
+    # Execute API call
+    response = sonar_client.chat.completions.create(
+        model="sonar-pro",
+        messages=messages_dict,
+        temperature=0.3
+    )
+    
+    # Store response
+    assistant_response = response.choices[0].message.content
+    memory.put(ChatMessage(
+        role="assistant",
+        content=assistant_response
+    ))
+    
+    return assistant_response
+
diff --git a/perplexity-llamaindex/memory/chat_summary_memory_buffer/scripts/example_usage.py b/perplexity-llamaindex/memory/chat_summary_memory_buffer/scripts/example_usage.py
@@ -0,0 +1,29 @@
+# example_usage.py
+from chat_memory_buffer import chat_with_memory
+import os
+
+
+def demonstrate_conversation():
+    # First interaction
+    print("User: What is the latest news about the US Stock Market?")
+    response = chat_with_memory("What is the latest news about the US Stock Market?")
+    print(f"Assistant: {response}\n")
+
+    # Follow-up question using memory
+    print("User: How does this compare to its performance last week?")
+    response = chat_with_memory("How does this compare to its performance last week?")
+    print(f"Assistant: {response}\n")
+
+    # Cross-session persistence demo
+    print("User: Save this conversation about the US stock market.")
+    chat_with_memory("Save this conversation about the US stock market.")
+    
+    # New session
+    print("\n--- New Session ---")
+    print("User: What were we discussing earlier?")
+    response = chat_with_memory("What were we discussing earlier?")
+    print(f"Assistant: {response}")
+
+if __name__ == "__main__":
+    demonstrate_conversation()
+