MemTensor
diff --git a/‎evaluation/scripts/PrefEval/pref_eval.py‎
Lines changed: 22 additions & 23 deletions b/‎evaluation/scripts/PrefEval/pref_eval.py‎
Lines changed: 22 additions & 23 deletions
diff --git a/‎evaluation/scripts/PrefEval/pref_memos.py‎
Lines changed: 69 additions & 57 deletions b/‎evaluation/scripts/PrefEval/pref_memos.py‎
Lines changed: 69 additions & 57 deletions
diff --git a/‎evaluation/scripts/PrefEval/prefeval_preprocess.py‎
Lines changed: 0 additions & 1 deletion b/‎evaluation/scripts/PrefEval/prefeval_preprocess.py‎
Lines changed: 0 additions & 1 deletion
@@ -8,7 +8,7 @@
 import os
 import pandas as pd
 from dotenv import load_dotenv
-from openai import OpenAI 
+from openai import OpenAI
 
 load_dotenv()
 
@@ -21,17 +21,16 @@
 
 
 async def call_gpt4o_mini_async(client: OpenAI, prompt: str) -> str:
-
     messages = [{"role": "user", "content": prompt}]
 
     try:
         response = await asyncio.to_thread(
-            client.chat.completions.create, 
-            model="gpt-4o-mini",          
+            client.chat.completions.create,
+            model="gpt-4o-mini",
             messages=messages,
             temperature=0,
             max_tokens=500,
-            timeout=30.0
+            timeout=30.0,
         )
         return response.choices[0].message.content
     except Exception as e:
@@ -45,7 +44,7 @@ def parse_xml_response(response: str, tag: str) -> str:
 
 
 async def evaluate_violate_preference_async(
-    client: OpenAI, preference: str, question: str, response: str 
+    client: OpenAI, preference: str, question: str, response: str
 ) -> Dict[str, str]:
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's response violates the user's stated preference.
 Evaluate the response based on these criteria:
@@ -69,15 +68,15 @@ async def evaluate_violate_preference_async(
 <explanation>[1 very short sentence explanation]</explanation>
 <answer>[Yes/No]</answer>"""
 
-    api_response = await call_gpt4o_mini_async(client, prompt) 
+    api_response = await call_gpt4o_mini_async(client, prompt)
     return {
         "explanation": parse_xml_response(api_response, "explanation"),
         "answer": parse_xml_response(api_response, "answer"),
     }
 
 
 async def evaluate_acknowledge_preference_async(
-    client: OpenAI, question: str, response: str 
+    client: OpenAI, question: str, response: str
 ) -> Dict[str, str]:
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query. 
 
@@ -99,15 +98,15 @@ async def evaluate_acknowledge_preference_async(
 Examine the response meticulously and answer. Please answer in this exact XML format without any additional text:
 <preference>[quote of the sentence that acknowledges/mentions what the preference is; leave it blank if there is none]</preference>
 <answer>[Yes/No]</answer>"""
-    api_response = await call_gpt4o_mini_async(client, prompt) 
+    api_response = await call_gpt4o_mini_async(client, prompt)
     return {
         "preference_mention": parse_xml_response(api_response, "preference"),
         "answer": parse_xml_response(api_response, "answer"),
     }
 
 
 async def evaluate_hallucinate_preference_async(
-    client: OpenAI, preference: str, restatement: str 
+    client: OpenAI, preference: str, restatement: str
 ) -> Dict[str, str]:
     if not restatement.strip():
         return {"explanation": "No restatement provided by assistant", "answer": "No"}
@@ -132,15 +131,15 @@ async def evaluate_hallucinate_preference_async(
 <explanation>[1 short sentence explanation]</explanation>
 <answer>[Yes/No]</answer>"""
 
-    api_response = await call_gpt4o_mini_async(client, prompt) 
+    api_response = await call_gpt4o_mini_async(client, prompt)
     return {
         "explanation": parse_xml_response(api_response, "explanation"),
         "answer": parse_xml_response(api_response, "answer"),
     }
 
 
 async def evaluate_helpful_response_async(
-    client: OpenAI, question: str, response: str 
+    client: OpenAI, question: str, response: str
 ) -> Dict[str, str]:
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant provides any substantive response to the user's query.
 Evaluate the response based on these stringent criteria:
@@ -172,7 +171,7 @@ async def evaluate_helpful_response_async(
 <explanation>[1 very short sentence explanation]</explanation>
 <answer>[Yes/No]</answer>"""
 
-    api_response = await call_gpt4o_mini_async(client, prompt) 
+    api_response = await call_gpt4o_mini_async(client, prompt)
     return {
         "explanation": parse_xml_response(api_response, "explanation"),
         "answer": parse_xml_response(api_response, "answer"),
@@ -197,9 +196,7 @@ def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
         return "Personalized Response"
 
 
-async def process_line(
-    line: str, client: OpenAI, semaphore: asyncio.Semaphore
-) -> Dict[str, Any]:
+async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
     async with semaphore:
         data = json.loads(line.strip())
         preference = data["preference"]
@@ -258,7 +255,7 @@ def generate_excel_summary(
     avg_search_time: float,
     avg_context_tokens: float,
     avg_add_time: float,
-    model_name: str = "gpt-4o-mini", 
+    model_name: str = "gpt-4o-mini",
 ):
     print(f"Generating Excel summary at {OUTPUT_EXCEL_FILE}...")
 
@@ -280,7 +277,7 @@ def get_pct(key):
         "Personalized Response\n个性化回答": [personalized_pct / 100],
         "context token": [avg_context_tokens],
         "Time添加": [f"{avg_add_time:.2f}s"],
-        "Time搜索": [f"{avg_search_time:.2f}s"]
+        "Time搜索": [f"{avg_search_time:.2f}s"],
     }
 
     df = pd.DataFrame(data)
@@ -355,9 +352,9 @@ async def main(concurrency_limit: int):
                 context_tokens = metrics.get("memory_tokens_used")
                 add_time = metrics.get("add_memories_duration_seconds")
 
-                all_metrics_valid = (search_time is not None and
-                                     add_time is not None and
-                                     context_tokens is not None)
+                all_metrics_valid = (
+                    search_time is not None and add_time is not None and context_tokens is not None
+                )
 
                 if all_metrics_valid:
                     total_search_time += float(search_time)
@@ -375,7 +372,9 @@ async def main(concurrency_limit: int):
 
     avg_search_time = (total_search_time / valid_metric_samples) if valid_metric_samples > 0 else 0
     avg_add_time = (total_add_time / valid_metric_samples) if valid_metric_samples > 0 else 0
-    avg_context_tokens = (total_context_tokens / valid_metric_samples) if valid_metric_samples > 0 else 0
+    avg_context_tokens = (
+        (total_context_tokens / valid_metric_samples) if valid_metric_samples > 0 else 0
+    )
 
     try:
         generate_excel_summary(
@@ -398,4 +397,4 @@ async def main(concurrency_limit: int):
     )
     args = parser.parse_args()
 
-    asyncio.run(main(concurrency_limit=args.concurrency_limit))
+    asyncio.run(main(concurrency_limit=args.concurrency_limit))
@@ -1,15 +1,14 @@
+import argparse
+import concurrent.futures
+import json
 import os
 import sys
-import json
 import time
-import uuid
 import tiktoken
-import requests
 from dotenv import load_dotenv
 from openai import OpenAI
 from tqdm import tqdm
-import concurrent.futures
-import argparse
+
 from irrelevant_conv import irre_10, irre_300
 
 ROOT_DIR = os.path.dirname(
@@ -19,15 +18,10 @@
 
 sys.path.insert(0, ROOT_DIR)
 sys.path.insert(0, EVAL_SCRIPTS_DIR)
-
-from utils.client import memos_api_client
-
 load_dotenv()
-
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 BASE_URL = os.getenv("OPENAI_BASE_URL")
 MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
-
 tokenizer = tiktoken.get_encoding("cl100k_base")
 
 
@@ -60,11 +54,9 @@ def add_memory_for_line(
                 mem_client.add(messages=conversation, user_id=user_id, conv_id=None)
         end_time_add = time.monotonic()
         add_duration = end_time_add - start_time_add
-        
+
         original_data["user_id"] = user_id
-        original_data["metrics"] = {
-            "add_memories_duration_seconds": add_duration
-        }
+        original_data["metrics"] = {"add_memories_duration_seconds": add_duration}
         return original_data
 
     except Exception as e:
@@ -98,7 +90,9 @@ def process_line_with_id(
         start_time_search = time.monotonic()
         relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
         search_memories_duration = time.monotonic() - start_time_search
-        memories_str = "\n".join(f"- {entry.get('memory', '')}" for entry in relevant_memories["text_mem"][0]['memories'])
+        memories_str = "\n".join(
+            f"- {entry.get('memory', '')}" for entry in relevant_memories["text_mem"][0]["memories"]
+        )
 
         memory_tokens_used = len(tokenizer.encode(memories_str))
 
@@ -111,12 +105,14 @@ def process_line_with_id(
         response = openai_client.chat.completions.create(model=MODEL_NAME, messages=messages)
         assistant_response = response.choices[0].message.content
         original_data["response"] = assistant_response
-        
-        metrics_dict.update({
-            "search_memories_duration_seconds": search_memories_duration,
-            "memory_tokens_used": memory_tokens_used,
-            "retrieved_memories_text": memories_str
-        })
+
+        metrics_dict.update(
+            {
+                "search_memories_duration_seconds": search_memories_duration,
+                "memory_tokens_used": memory_tokens_used,
+                "retrieved_memories_text": memories_str,
+            }
+        )
         original_data["metrics"] = metrics_dict
 
         return original_data
@@ -169,56 +165,72 @@ def main():
         print(f"Error: Input file '{args.input}' not found")
         return
 
-    mem_client = memos_api_client()
+    from utils.client import memosApiClient
+
+    mem_client = memosApiClient()
 
     if args.mode == "add":
         print(f"Running in 'add' mode. Ingesting memories from '{args.input}'...")
         print(f"Adding {args.add_turn} irrelevant turns.")
         print(f"Using {args.max_workers} workers.")
-        with open(args.output, "w", encoding="utf-8") as outfile, \
-             concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
-                futures = [
-                    executor.submit(
-                        add_memory_for_line,
-                        (i, line),
-                        mem_client,
-                        args.add_turn,
-                        args.lib,
-                        args.version,
-                    )
-                    for i, line in enumerate(lines)
-                ]
-
-                pbar = tqdm(
-                    concurrent.futures.as_completed(futures),
-                    total=len(lines),
-                    desc="Adding memories...",
+        with (
+            open(args.output, "w", encoding="utf-8") as outfile,
+            concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
+        ):
+            futures = [
+                executor.submit(
+                    add_memory_for_line,
+                    (i, line),
+                    mem_client,
+                    args.add_turn,
+                    args.lib,
+                    args.version,
                 )
-                for future in pbar:
-                    result = future.result()
-                    if result:
-                        outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
+                for i, line in enumerate(lines)
+            ]
+
+            pbar = tqdm(
+                concurrent.futures.as_completed(futures),
+                total=len(lines),
+                desc="Adding memories...",
+            )
+            for future in pbar:
+                result = future.result()
+                if result:
+                    outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
         print(f"\n'add' mode complete! Data with user_id written to '{args.output}'.")
 
     elif args.mode == "process":
         print(f"Running in 'process' mode. Processing questions from '{args.input}'...")
         print(f"Retrieving top {args.top_k} memories for each query.")
         print(f"Using {args.max_workers} workers.")
         openai_client = OpenAI(api_key=OPENAI_API_KEY, base_url=BASE_URL)
-        with open(args.output, "w", encoding="utf-8") as outfile, \
-             concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
-
-                futures = [executor.submit(process_line_with_id, (i, line), mem_client, openai_client, args.top_k, args.lib, args.version) for i, line in enumerate(lines)]
-
-                pbar = tqdm(
-                    concurrent.futures.as_completed(futures),
-                    total=len(lines),
-                    desc="Processing questions...",
+        with (
+            open(args.output, "w", encoding="utf-8") as outfile,
+            concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
+        ):
+            futures = [
+                executor.submit(
+                    process_line_with_id,
+                    (i, line),
+                    mem_client,
+                    openai_client,
+                    args.top_k,
+                    args.lib,
+                    args.version,
                 )
-                for future in pbar:
-                    result = future.result()
-                    if result:
-                        outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
+                for i, line in enumerate(lines)
+            ]
+
+            pbar = tqdm(
+                concurrent.futures.as_completed(futures),
+                total=len(lines),
+                desc="Processing questions...",
+            )
+            for future in pbar:
+                result = future.result()
+                if result:
+                    outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
         print(f"\n'process' mode complete! Final results written to '{args.output}'.")
 
 
 
@@ -92,7 +92,6 @@ def process_jsonl_file(input_filepath, output_filepath):
 
 def main():
     huggingface_dataset_name = "siyanzhao/prefeval_implicit_persona"
-    # output_directory = "./PrefEval"
     output_directory = "./data/prefeval"
     input_file_path = os.path.join(output_directory, "train.jsonl")
     processed_file_path = os.path.join(output_directory, "pref_processed.jsonl")