modify the initial run file

xinSky00 · xinSky00 · commit c6aa1656ea2e · 2025-12-24T06:13:47.000-08:00
diff --git a/examples/offline_inference_rerope.py b/examples/offline_inference_rerope.py
@@ -23,14 +23,17 @@
 def setup_environment_variables():
     os.environ["VLLM_USE_V1"] = "1"
     os.environ["PYTHONHASHSEED"] = "123456"
-    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,6,7"
+    
     os.environ["VLLM_ATTENTION_BACKEND"] = "TRITON_ATTN_VLLM_V1"
     os.environ["REROPE_WINDOW"] = "32768"
     os.environ["TRAINING_LENGTH"] = "32768"
 
-    global data_dir
-    data_dir = os.getenv("DATA_DIR", "/home/externals/wangwenxin21/wx_data")
 
+    global data_dir
+    data_dir = os.getenv("DATA_DIR", "/home/data/kv_cache")
+    data_dir = input(
+        "Enter the directory for UCMStore to save kv cache, e.g. /home/data/kv_cache: "
+    )
     if not os.path.isdir(data_dir):
         create = input(f"Directory {data_dir} dose not exist. Create it? (Y/n): ")
         if create.lower() == "y":
@@ -63,13 +66,13 @@ def build_llm_with_uc(module_path: str, name: str, model: str):
         model=model,
         kv_transfer_config=ktc,
         hf_overrides={
-            "max_position_embeddings": 430080,
+            "max_position_embeddings": 327680,
         },
-        gpu_memory_utilization=0.8,
+        gpu_memory_utilization=0.9,
         max_num_batched_tokens=8192,
         block_size=16,
         enforce_eager=True,
-        tensor_parallel_size=4,
+        tensor_parallel_size=2,
     )
 
     llm = LLM(**asdict(llm_args))
@@ -98,24 +101,39 @@ def print_output(
 def main():
     module_path = "ucm.integration.vllm.ucm_connector"
     name = "UCMConnector"
-    model = os.getenv("MODEL_PATH", "/home/wx/models/Qwen2.5-14B-Instruct")
+    model = os.getenv("MODEL_PATH", "/home/models/Qwen2.5-14B-Instruct")
+    if not os.path.isdir(model):
+        model = input("Enter path to model, e.g. /home/models/Qwen2.5-14B-Instruct: ")
+        if not os.path.isdir(model):
+            print("Exiting. Incorrect model_path")
+            sys.exit(1)
 
     tokenizer = AutoTokenizer.from_pretrained(model, use_chat_template=True)
     setup_environment_variables()
 
     with build_llm_with_uc(module_path, name, model) as llm:
 
         data_all = []
+        path_to_dataset = os.getenv(
+            "DATASET_PATH", "/home/data/Longbench/data/multifieldqa_zh.jsonl"
+        )
+        if not os.path.isfile(path_to_dataset):
+            path_to_dataset = input(
+                "Enter path to one of the longbench dataset, e.g. /home/data/Longbench/data/multifieldqa_zh.jsonl: "
+            )
+            if not os.path.isfile(path_to_dataset):
+                print("Exiting. Incorrect dataset path")
+                sys.exit(1)
         with open(
-            "/home/wx/va_clean/data/multifieldqa_zh.jsonl", "r", encoding="utf-8"
+            path_to_dataset, "r", encoding="utf-8"
         ) as f:
             for line in f:
                 data_all.append(json.loads(line))
 
         materials = []
         questions = []
         references = []
-        batch_size = 75
+        batch_size = 30
         num_batch = 2
         for idx in range(num_batch):
             data = data_all[idx * batch_size : (idx + 1) * batch_size]
@@ -151,7 +169,7 @@ def main():
                 "【文本内容开始】\n"
                 f"{material}\n"
                 "【文本内容结束】\n\n"
-                "请回答以下问题：\n"
+                "请直接回答以下问题：\n"
                 f"{question}"
             )