@@ -16,7 +16,7 @@ def __init__(
1616 model : str ,
1717 tensor_parallel_size : int = 1 ,
1818 gpu_memory_utilization : float = 0.9 ,
19- temperature : float = 0.0 ,
19+ temperature : float = 0.6 ,
2020 top_p : float = 1.0 ,
2121 topk : int = 5 ,
2222 ** kwargs : Any ,
@@ -66,7 +66,7 @@ async def generate_answer(
6666 sp = self .SamplingParams (
6767 temperature = self .temperature if self .temperature > 0 else 1.0 ,
6868 top_p = self .top_p if self .temperature > 0 else 1.0 ,
69- max_tokens = extra .get ("max_new_tokens" , 512 ),
69+ max_tokens = extra .get ("max_new_tokens" , 2048 ),
7070 )
7171
7272 result_generator = self .engine .generate (full_prompt , sp , request_id = request_id )
@@ -82,7 +82,7 @@ async def generate_answer(
8282
8383 async def generate_topk_per_token (
8484 self , text : str , history : Optional [List [str ]] = None , ** extra : Any
85- ) -> List [Token ]:
85+ ) -> List [Token ]:
8686 full_prompt = self ._build_inputs (text , history )
8787 request_id = f"graphgen_topk_{ uuid .uuid4 ()} "
8888
@@ -110,7 +110,9 @@ async def generate_topk_per_token(
110110
111111 candidate_tokens = []
112112 for _ , logprob_obj in top_logprobs .items ():
113- tok_str = logprob_obj .decoded_token .strip () if logprob_obj .decoded_token else ""
113+ tok_str = (
114+ logprob_obj .decoded_token .strip () if logprob_obj .decoded_token else ""
115+ )
114116 prob = float (math .exp (logprob_obj .logprob ))
115117 candidate_tokens .append (Token (tok_str , prob ))
116118
@@ -120,7 +122,7 @@ async def generate_topk_per_token(
120122 main_token = Token (
121123 text = candidate_tokens [0 ].text ,
122124 prob = candidate_tokens [0 ].prob ,
123- top_candidates = candidate_tokens
125+ top_candidates = candidate_tokens ,
124126 )
125127 return [main_token ]
126128 return []
0 commit comments