forked from EternityJune25/ComoRAG
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_vllm.py
More file actions
95 lines (72 loc) · 3.16 KB
/
main_vllm.py
File metadata and controls
95 lines (72 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import json
import copy
from src.comorag.ComoRAG import ComoRAG
from src.comorag.utils.config_utils import BaseConfig
from src.comorag.utils.misc_utils import get_gold_answers
def process_dataset(dataset_path, config):
dataset_name = os.path.basename(dataset_path)
corpus_path = os.path.join(dataset_path, "corpus.jsonl")
qas_path = os.path.join(dataset_path, "qas.jsonl")
with open(corpus_path, 'r', encoding='utf-8') as f:
corpus = [json.loads(line) for line in f if line.strip()]
docs = [doc['contents'] for doc in corpus]
with open(qas_path, 'r', encoding='utf-8') as f:
samples = [json.loads(line) for line in f if line.strip()]
all_queries = [s['question'] for s in samples]
config.corpus_len = len(corpus)
comorag = ComoRAG(global_config=config)
comorag.index(docs)
solutions = comorag.try_answer(all_queries)
gold_answers = get_gold_answers(samples)
for idx, q in enumerate(solutions):
q.gold_answers = list(gold_answers[idx])
result_list = []
for idx, (q, solution) in enumerate(zip(all_queries, solutions)):
result_list.append({
"idx": idx,
"question": q,
"golden_answers": solution.gold_answers,
"output": solution.answer
})
folder_path = os.path.join(config.output_dir)
os.makedirs(folder_path, exist_ok=True)
with open(os.path.join(folder_path, "results.json"), "w", encoding="utf-8") as f:
json.dump(result_list, f, ensure_ascii=False, indent=2)
def main():
# 1) Start a vLLM OpenAI-compatible server separately, e.g.:
# vllm serve /path/to/your/model --tensor-parallel-size 1 --max_model_len 4096*2 --gpu-memory-utilization 0.95
# 2) Then run this script. We will call the server via base_url.
# Ensure an API key is present for OpenAI-compatible clients
os.environ["OPENAI_API_KEY"] = "your-api-key-here"
# Optional: select visible GPUs for the client side
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
base_path = './dataset/cinderella'
dataset_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
dataset_dirs.sort()
dataset_paths = [os.path.join(base_path, d) for d in dataset_dirs]
vllm_base_url = 'http://localhost:8000/v1'
served_model_name = '/path/to/your/model'
config = BaseConfig(
llm_base_url=vllm_base_url,
llm_name=served_model_name,
llm_api_key=os.environ["OPENAI_API_KEY"],
dataset='cinderella',
embedding_model_name='/path/to/your/embedding/model',
embedding_batch_size=4,
need_cluster=True,
output_dir='result/cinderella_vllm',
save_dir='outputs/cinderella_vllm',
max_meta_loop_max_iterations=5,
is_mc=False,
max_tokens_ver=2000,
max_tokens_sem=2000,
max_tokens_epi=2000
)
for dataset_path in dataset_paths:
tempconfig = copy.deepcopy(config)
tempconfig.output_dir += f'/{os.path.basename(dataset_path)}'
tempconfig.save_dir += f'/{os.path.basename(dataset_path)}'
process_dataset(dataset_path, tempconfig)
if __name__ == "__main__":
main()