1+ import os
2+ import sys
3+ import uuid
4+
5+
6+ sys .path .insert (
7+ 0 , os .path .dirname (os .path .dirname (os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))))
8+ )
9+ sys .path .insert (
10+ 0 ,
11+ os .path .join (
12+ os .path .dirname (
13+ os .path .dirname (os .path .dirname (os .path .dirname (os .path .abspath (__file__ ))))
14+ ),
15+ "evaluation" ,
16+ "scripts" ,
17+ ),
18+ )
19+
120import argparse
221import concurrent .futures
322import json
4- import os
23+ import threading
524import time
625
726from datetime import datetime , timezone
1029
1130from dotenv import load_dotenv
1231from mem0 import MemoryClient
32+ from memobase import ChatBlob
1333from tqdm import tqdm
34+ from utils .client import memobase_client , memos_client
1435from zep_cloud .client import Zep
1536
1637from memos .configs .mem_cube import GeneralMemCubeConfig
@@ -93,7 +114,34 @@ def get_client(frame: str, user_id: str | None = None, version: str = "default")
93114 return mos
94115
95116
96- def ingest_session (client , session , frame , metadata , revised_client = None ):
117+ def string_to_uuid (s : str , salt = "memobase_client" ) -> str :
118+ return str (uuid .uuid5 (uuid .NAMESPACE_DNS , s + salt ))
119+
120+
121+ def memobase_add_memory (user , message , retries = 3 ):
122+ for attempt in range (retries ):
123+ try :
124+ _ = user .insert (ChatBlob (messages = message ), sync = True )
125+ return
126+ except Exception as e :
127+ if attempt < retries - 1 :
128+ time .sleep (1 )
129+ continue
130+ else :
131+ raise e
132+
133+
134+ def memobase_add_memories_for_speaker (client , speaker , messages ):
135+ real_uid = string_to_uuid (speaker )
136+ u = client .get_or_create_user (real_uid )
137+ for i in range (0 , len (messages ), 2 ):
138+ batch_messages = messages [i : i + 2 ]
139+ memobase_add_memory (u , batch_messages )
140+ print (f"[{ i + 1 } /{ len (messages )} ] Added messages for { speaker } successfully." )
141+ u .flush (sync = True )
142+
143+
144+ def ingest_session (client , session , frame , version , metadata , revised_client = None ):
97145 session_date = metadata ["session_date" ]
98146 date_format = "%I:%M %p on %d %B, %Y UTC"
99147 date_string = datetime .strptime (session_date , date_format ).replace (tzinfo = timezone .utc )
@@ -125,7 +173,7 @@ def ingest_session(client, session, frame, metadata, revised_client=None):
125173 group_id = conv_id ,
126174 )
127175
128- elif frame == "memos" :
176+ elif frame == "memos" or frame == "memos-api" :
129177 messages = []
130178 messages_reverse = []
131179
@@ -149,16 +197,22 @@ def ingest_session(client, session, frame, metadata, revised_client=None):
149197
150198 speaker_a_user_id = conv_id + "_speaker_a"
151199 speaker_b_user_id = conv_id + "_speaker_b"
200+ if frame == "memos-api" :
201+ client .add (messages = messages , user_id = f"{ speaker_a_user_id .replace ('_' , '' )} { version } " )
152202
153- client .add (
154- messages = messages ,
155- user_id = speaker_a_user_id ,
156- )
203+ revised_client .add (
204+ messages = messages_reverse , user_id = f"{ speaker_b_user_id .replace ('_' , '' )} { version } "
205+ )
206+ elif frame == "memos" :
207+ client .add (
208+ messages = messages ,
209+ user_id = speaker_a_user_id ,
210+ )
157211
158- revised_client .add (
159- messages = messages_reverse ,
160- user_id = speaker_b_user_id ,
161- )
212+ revised_client .add (
213+ messages = messages_reverse ,
214+ user_id = speaker_b_user_id ,
215+ )
162216 print (f"Added messages for { speaker_a_user_id } and { speaker_b_user_id } successfully." )
163217
164218 elif frame == "mem0" or frame == "mem0_graph" :
@@ -217,6 +271,77 @@ def ingest_session(client, session, frame, metadata, revised_client=None):
217271 version = "v2" ,
218272 enable_graph = True ,
219273 )
274+ elif frame == "memobase" :
275+ print (f"Processing abc for { metadata ['session_key' ]} " )
276+ messages = []
277+ messages_reverse = []
278+
279+ for chat in tqdm (session , desc = f"{ metadata ['session_key' ]} " ):
280+ data = chat .get ("speaker" ) + ": " + chat .get ("text" )
281+
282+ if chat .get ("speaker" ) == metadata ["speaker_a" ]:
283+ messages .append (
284+ {
285+ "role" : "user" ,
286+ "content" : chat .get ("text" ),
287+ "alias" : metadata ["speaker_a" ],
288+ "created_at" : iso_date ,
289+ }
290+ )
291+ messages_reverse .append (
292+ {
293+ "role" : "assistant" ,
294+ "content" : chat .get ("text" ),
295+ "alias" : metadata ["speaker_b" ],
296+ "created_at" : iso_date ,
297+ }
298+ )
299+ elif chat .get ("speaker" ) == metadata ["speaker_b" ]:
300+ messages .append (
301+ {
302+ "role" : "assistant" ,
303+ "content" : chat .get ("text" ),
304+ "alias" : metadata ["speaker_b" ],
305+ "created_at" : iso_date ,
306+ }
307+ )
308+ messages_reverse .append (
309+ {
310+ "role" : "user" ,
311+ "content" : chat .get ("text" ),
312+ "alias" : metadata ["speaker_a" ],
313+ "created_at" : iso_date ,
314+ }
315+ )
316+ else :
317+ raise ValueError (
318+ f"Unknown speaker { chat .get ('speaker' )} in session { metadata ['session_key' ]} "
319+ )
320+
321+ print ({"context" : data , "conv_id" : conv_id , "created_at" : iso_date })
322+
323+ thread_a = threading .Thread (
324+ target = memobase_add_memories_for_speaker ,
325+ args = (
326+ client ,
327+ metadata ["speaker_a_user_id" ],
328+ messages ,
329+ ),
330+ )
331+
332+ thread_b = threading .Thread (
333+ target = memobase_add_memories_for_speaker ,
334+ args = (
335+ client ,
336+ metadata ["speaker_b_user_id" ],
337+ messages_reverse ,
338+ ),
339+ )
340+
341+ thread_a .start ()
342+ thread_b .start ()
343+ thread_a .join ()
344+ thread_b .join ()
220345
221346 end_time = time .time ()
222347 elapsed_time = round (end_time - start_time , 2 )
@@ -246,7 +371,19 @@ def process_user(conv_idx, frame, locomo_df, version, num_workers=1):
246371 speaker_b_user_id = conv_id + "_speaker_b"
247372 client = get_client ("memos" , speaker_a_user_id , version )
248373 revised_client = get_client ("memos" , speaker_b_user_id , version )
249-
374+ elif frame == "memos-api" :
375+ conv_id = "locomo_exp_user_" + str (conv_idx )
376+ speaker_a_user_id = conv_id + "_speaker_a"
377+ speaker_b_user_id = conv_id + "_speaker_b"
378+ client = memos_client (mode = "api" )
379+ revised_client = memos_client (mode = "api" )
380+ elif frame == "memobase" :
381+ client = memobase_client ()
382+ conv_id = "locomo_exp_user_" + str (conv_idx )
383+ speaker_a_user_id = conv_id + "_speaker_a"
384+ speaker_b_user_id = conv_id + "_speaker_b"
385+ client .delete_user (string_to_uuid (speaker_a_user_id ))
386+ client .delete_user (string_to_uuid (speaker_b_user_id ))
250387 sessions_to_process = []
251388 for session_idx in range (max_session_count ):
252389 session_key = f"session_{ session_idx } "
@@ -272,7 +409,7 @@ def process_user(conv_idx, frame, locomo_df, version, num_workers=1):
272409 with concurrent .futures .ThreadPoolExecutor (max_workers = num_workers ) as executor :
273410 futures = {
274411 executor .submit (
275- ingest_session , client , session , frame , metadata , revised_client
412+ ingest_session , client , session , frame , version , metadata , revised_client
276413 ): metadata ["session_key" ]
277414 for session , metadata in sessions_to_process
278415 }
@@ -340,8 +477,7 @@ def main(frame, version="default", num_workers=4):
340477 parser .add_argument (
341478 "--lib" ,
342479 type = str ,
343- choices = ["zep" , "memos" , "mem0" , "mem0_graph" ],
344- help = "Specify the memory framework (zep or memos or mem0 or mem0_graph)" ,
480+ choices = ["zep" , "memos" , "mem0" , "mem0_graph" , "memos-api" , "memobase" ],
345481 )
346482 parser .add_argument (
347483 "--version" ,
0 commit comments