1- import requests
1+ # file: extract_actions_memories.py
2+
23import os
3- from dotenv import load_dotenv
44import json
5- from typing import Dict , List , Optional , Type , Any
5+ from typing import List , Dict
6+ from dotenv import load_dotenv
7+ import requests
68
7- # Load environment variables at the start
89load_dotenv ()
910
10- # --- Base Runnable Class ---
11- class BaseRunnable :
12- def __init__ (
13- self ,
14- model_url : Optional [str ], # URL might be different for different providers
15- model_name : str ,
16- system_prompt_template : str ,
17- user_prompt_template : str ,
18- input_variables : List [str ],
19- required_format : Optional [Dict ],
20- response_type : str = "chat" ,
21- stream : bool = False ,
22- stateful : bool = False ,
23- max_tokens : Optional [int ] = None , # Added max_tokens
24- temperature : float = 0.7 # Added temperature
25- ):
26- self .model_url = model_url # Kept for Ollama, not strictly needed for OpenRouter (hardcoded URL)
27- self .model_name = model_name
28- self .system_prompt_template = system_prompt_template # Use template for formatting later
29- self .user_prompt_template = user_prompt_template # Use template for formatting later
30- self .input_variables = input_variables
31- self .required_format = required_format
32- self .response_type = response_type
33- self .stream = stream
34- self .stateful = stateful
35- self .history : List [Dict [str , str ]] = [] # Type hint for history
36- self .max_tokens = max_tokens
37- self .temperature = temperature
38-
39- def add_to_history (self , chat_history : List [Dict [str , str ]]) -> None :
40- self .history .extend (chat_history )
41-
42- def invoke (self , inputs : Dict [str , str ]) -> Dict [str , Any ]: # Return type hint
43- raise NotImplementedError ("Subclasses must implement invoke method" )
44-
45- # --- Ollama Runnable (Kept for comparison/alternative) ---
46- class OllamaRunnable (BaseRunnable ):
47- def invoke (self , inputs : Dict [str , str ]) -> Dict [str , Any ]:
48- # Format prompts with input variables
49- system_prompt = self .system_prompt_template .format (** inputs )
50- user_prompt = self .user_prompt_template .format (** inputs )
51-
52- # Ollama typically uses a single prompt string or messages list depending on endpoint/version
53- # This example uses a single prompt string for simplicity with generate endpoint
54- full_prompt = f"{ system_prompt } \n \n { user_prompt } "
55-
56- # Prepare payload for Ollama API
57- payload : Dict [str , Any ] = {
58- "model" : self .model_name ,
59- "prompt" : full_prompt ,
60- "stream" : self .stream ,
61- "options" : { # Ollama options object for temperature, max_tokens etc.
62- "temperature" : self .temperature ,
63- "num_predict" : self .max_tokens , # Ollama uses num_predict for max_tokens
64- }
65- }
11+ # -------- Step 1: Load Raw Data from Text File -------- #
12+ def load_raw_data () -> List [str ]:
13+ import user_data
14+ return user_data .raw_data_list
6615
67- # Add format if requested (Ollama requires "json" string, not required_format dict)
68- if self . response_type == "json" :
69- payload [ "format" ] = "json"
16+ # -------- Step 2: Define Prompt Templates -------- #
17+ system_prompt_template = """
18+ You are an expert assistant specialized in extracting structured information from various types of user communication data such as emails, messages, queries, and notifications.
7019
71- # Make API call to Ollama
72- try :
73- # Ensure model_url is set for Ollama
74- ollama_url = self .model_url or "http://localhost:11434"
75- response = requests .post (f"{ ollama_url } /api/generate" , json = payload )
76- response .raise_for_status ()
77-
78- result = response .json ()
79-
80- if self .response_type == "json" :
81- # Ollama's /api/generate with format="json" still returns JSON wrapper
82- response_content = result .get ("response" , "{}" )
83- try :
84- # Need to parse the string inside the "response" key
85- return json .loads (response_content )
86- except json .JSONDecodeError :
87- print (f"Warning: Ollama returned non-JSON response despite format='json'. Content: { response_content } " )
88- return {"error" : "Model did not return valid JSON." }
89- else :
90- # For chat response, just return the 'response' key content
91- return {"response" : result .get ("response" , "" )}
92-
93- except requests .RequestException as e :
94- return {"error" : f"Failed to invoke Ollama model { self .model_name } : { str (e )} " }
95- except Exception as e :
96- return {"error" : f"An unexpected error occurred with Ollama: { str (e )} " }
20+ Your goal is to analyze the provided raw user data carefully and extract two specific categories of information:
21+
22+ 1. Action Items:
23+ - Clear, actionable tasks or requests the user needs to complete.
24+ - Often involve interactions with tools like calendars, emails, reminders, documents, or appointments.
25+ - Provide concise, unambiguous task descriptions.
9726
27+ 2. Memory Items:
28+ - Personal notes, facts, or context about the user's life: health, family, career, events, preferences.
29+ - Informational and non-actionable, useful for future reference.
9830
99- # --- OpenRouter Runnable ---
100- class OpenRouterRunnable (BaseRunnable ):
101- OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
31+ Important Instructions:
32+ - Only extract information explicitly or strongly implied by the data.
33+ - Do NOT add assumptions or unrelated details.
34+ - Format the output strictly as a JSON object matching the required schema.
35+ - If no relevant items exist in a category, return an empty list for that category.
36+ - The input data may be formal or informal, questions, notifications, or any mix, so interpret accordingly.
10237
103- def __init__ (self , api_key : str , ** kwargs ):
104- super ().__init__ (** kwargs )
105- self .api_key = api_key
38+ Examples:
10639
107- # Ensure response_type is 'chat' for OpenRouter chat completions endpoint
108- if self .response_type not in ["chat" , "json" ]:
109- print (f"Warning: OpenRouter chat completion supports 'chat' or 'json' response_type, not '{ self .response_type } '. Using 'chat'." )
110- self .response_type = "chat" # Default to chat if something else was passed
40+ Example 1:
41+ Input:
42+ \" \" \"
43+ Subject: Invoice #INV-2023-01 Received
44+ To whom it may concern,
11145
112- # OpenRouter handles JSON formatting if the prompt guides the model to produce it
113- # We don't set a specific 'format' parameter in the OpenRouter payload like Ollama
114- # The 'required_format' is used as instruction in the system/user prompt
46+ We have received your invoice #INV-2023-01 dated October 26, 2023. Payment processing is underway and is expected within 10 business days.
11547
116- def invoke (self , inputs : Dict [str , str ]) -> Dict [str , Any ]:
117- if not self .api_key :
118- return {"error" : "OpenRouter API key is not configured." }
48+ Regards,
49+ Accounts Payable Department
50+ \" \" \"
51+
52+ Output:
53+ {
54+ "action_items": [
55+ "Track payment processing for invoice #INV-2023-01"
56+ ],
57+ "memory_items": [
58+ "Invoice #INV-2023-01 was received on October 26, 2023",
59+ "Payment expected within 10 business days"
60+ ]
61+ }
62+
63+ Example 2:
64+ Input:
65+ \" \" \"
66+ Hi Sarah,
67+
68+ Are you doing anything fun this weekend? Thought we might grab pizza if you're free.
69+
70+ Best,
71+ Mike
72+ \" \" \"
73+
74+ Output:
75+ {
76+ "action_items": [
77+ "Ask Sarah if she is free to grab pizza this weekend"
78+ ],
79+ "memory_items": [
80+ "User wants to make weekend plans with Sarah"
81+ ]
82+ }
11983
120- # Format prompts with input variables
121- system_prompt = self . system_prompt_template . format ( ** inputs )
122- user_prompt = self . user_prompt_template . format ( ** inputs )
84+ Your structured output must contain exactly two fields:
85+ - "action_items": an array of actionable task strings.
86+ - "memory_items": an array of personal or contextual information strings.
12387
124- # OpenRouter uses the standard OpenAI chat messages format
88+ Focus on precision, clarity, and relevance in your extraction.
89+ """
90+
91+ user_prompt_template = """
92+ Given the following raw user data:
93+ {raw_data}
94+
95+ Analyze it thoroughly and extract all relevant action items and memory items as per the instructions.
96+
97+ Return the output ONLY as a JSON object following this structure:
98+ {{
99+ "action_items": [ ... ],
100+ "memory_items": [ ... ]
101+ }}
102+ """
103+
104+
105+ required_format = """
106+ {
107+ "type": "object",
108+ "properties": {
109+ "action_items": {
110+ "type": "array",
111+ "items": {
112+ "type": "string",
113+ "description": "A task or action the user needs to complete"
114+ },
115+ "description": "List of tasks or actions the user needs to take"
116+ },
117+ "memory_items": {
118+ "type": "array",
119+ "items": {
120+ "type": "string",
121+ "description": "A personal note or memory"
122+ },
123+ "description": "List of personal notes or memories"
124+ }
125+ """
126+
127+ # -------- Step 3: Create a Runnable Class for OpenRouter -------- #
128+ class OpenRouterRunnable :
129+ def __init__ (self , model_url : str , model_name : str ):
130+ self .api_url = model_url
131+ self .model = model_name
132+ self .headers = {
133+ "Authorization" : f"Bearer { os .getenv ('OPENROUTER_API_KEY' )} " ,
134+ "Content-Type" : "application/json" ,
135+ }
136+
137+ def invoke (self , inputs : Dict [str , str ]) -> Dict [str , List [str ]]:
125138 messages = [
126- {"role" : "system" , "content" : system_prompt },
127- {"role" : "user" , "content" : user_prompt },
128- # You could add self.history here if stateful is True
139+ {"role" : "system" , "content" : system_prompt_template },
140+ {"role" : "user" , "content" : user_prompt_template .format (raw_data = inputs ["raw_data" ]) + "\n " + required_format },
129141 ]
130142
131- # Prepare payload for OpenRouter API
132- payload : Dict [str , Any ] = {
133- "model" : self .model_name ,
143+ payload = {
144+ "model" : self .model ,
134145 "messages" : messages ,
135- "temperature" : self .temperature ,
136- "stream" : self .stream ,
146+ "stream" : False
137147 }
138- if self .max_tokens is not None :
139- payload ["max_tokens" ] = self .max_tokens
140148
141- headers = {
142- "Authorization" : f"Bearer { self .api_key } " ,
143- "Content-Type" : "application/json"
144- }
149+ response = requests .post (self .api_url , headers = self .headers , json = payload )
150+ response .raise_for_status ()
145151
146- # Make API call to OpenRouter
147152 try :
148- print (f"Calling OpenRouter model: { self .model_name } ..." )
149- response = requests .post (self .OPENROUTER_API_URL , headers = headers , json = payload )
150- response .raise_for_status () # Raise HTTPError for bad responses (4xx or 5xx)
151-
152- result = response .json ()
153-
154- # OpenRouter (like OpenAI) returns the response in result['choices'][0]['message']['content']
155- if result and result .get ('choices' ):
156- response_content = result ['choices' ][0 ]['message' ]['content' ]
157-
158- if self .response_type == "json" :
159- try :
160- # Attempt to parse the content as JSON
161- return json .loads (response_content )
162- except json .JSONDecodeError :
163- print (f"Warning: Model response was not valid JSON despite request for json type. Content: { response_content [:200 ]} ..." ) # Print truncated content
164- return {"error" : "Model response was not valid JSON." , "raw_response" : response_content }
165- else :
166- # Return as plain text response
167- return {"response" : response_content }
168- else :
169- # Handle cases where 'choices' is missing or empty
170- print (f"Warning: OpenRouter response missing 'choices'. Full response: { result } " )
171- return {"error" : "Unexpected API response structure." }
172-
173- except requests .exceptions .RequestException as e :
174- print (f"OpenRouter API request failed: { e } " )
175- # Try to get more details from the response if available in the exception
176- if e .response is not None :
177- try :
178- error_details = e .response .json ()
179- print (f"OpenRouter Error Details: { error_details } " )
180- return {"error" : f"OpenRouter API request failed: { e } " , "details" : error_details }
181- except json .JSONDecodeError :
182- return {"error" : f"OpenRouter API request failed: { e } " , "raw_response" : e .response .text }
183- return {"error" : f"OpenRouter API request failed: { e } " }
153+ content = response .json ()["choices" ][0 ]["message" ]["content" ]
154+ return json .loads (content )
184155 except Exception as e :
185- print (f"An unexpected error occurred with OpenRouter: { str (e )} " )
186- return {"error" : f"An unexpected error occurred: { str (e )} " }
187-
188-
189- # --- Configuration and Runnable Selection ---
190-
191- # Environment variable to select provider (e.g., in .env: LLM_PROVIDER=openrouter)
192- LLM_PROVIDER = os .getenv ("LLM_PROVIDER" , "ollama" ).lower () # Default to ollama if not set
193-
194- # Environment variables for model names
195- OLLAMA_MODEL_NAME = os .getenv ("OLLAMA_MODEL_NAME" , "qwen:4b" ) # Default Ollama Qwen
196- # Confirmed OpenRouter name for Qwen 1.5 4B Chat
197- OPENROUTER_MODEL_NAME = os .getenv ("OPENROUTER_MODEL_NAME" , "Qwen/Qwen1.5-4B-Chat" )
198- OPENROUTER_API_KEY = os .getenv ("OPENROUTER_API_KEY" )
199-
200- # --- Updated Prompt Templates and Format ---
201- # System prompt guiding the extraction and JSON format
202- SYSTEM_PROMPT_TEMPLATE = """
203- You are an AI assistant designed to process user input and extract specific information.
204- Your task is to read the provided text content and the user's query.
205- Based on the content, identify:
206- 1. **Action Items:** Tasks or activities suggested or implied that require follow-up or use of tools (like creating a presentation, drafting an email, scheduling). These should be clear, actionable phrases.
207- 2. **Memory Items:** Key facts, details, names, dates, or concepts from the text that the user might want to remember or reference later. These should be concise summaries.
208-
209- Structure your response strictly as a JSON object with two keys: "action_items" and "memory_items".
210- Each key should map to a JSON array (list) of strings.
211- If no items are found for a category, the corresponding array should be empty ([]).
212- Do NOT include any other text or formatting outside the JSON object.
213- Example required format:
214- ```json
215- {
216- "action_items": ["Create a presentation on Agriculture Visit", "Draft follow-up email"],
217- "memory_items": ["Visited Maharashtra farms", "Meeting with Mr. Patil on Oct 26th"]
218- }
219- ```
220- """
156+ print ("Failed to parse model response:" , e )
157+ print ("Raw output:" , response .text )
158+ return {"action_items" : [], "memory_items" : []}
159+
160+ # -------- Step 4: Main Processing Logic -------- #
161+ def extract_action_and_memory_items () -> Dict [str , List [str ]]:
162+ raw_data_list = load_raw_data ()
163+ if not raw_data_list :
164+ return {"action_items" : [], "memory_items" : []}
165+
166+ # Example: process the first item (index 0)
167+ raw_data_str = raw_data_list [10 ]
168+
169+ runnable = OpenRouterRunnable (
170+ model_url = "https://openrouter.ai/api/v1/chat/completions" ,
171+ model_name = "qwen/qwen3-8b"
172+ )
173+
174+ return runnable .invoke ({"raw_data" : raw_data_str , "required_format" : required_format })
175+
176+
177+ # -------- Optional Entry Point -------- #
178+ if __name__ == "__main__" :
179+ result = extract_action_and_memory_items ()
180+ print (json .dumps (result , indent = 2 ))
0 commit comments