update

codelion · codelion · commit 9afdadb9e8bf · 2024-10-14T15:55:09.000+08:00
diff --git a/optillm.py b/optillm.py
@@ -49,7 +49,11 @@ def get_config():
     # OpenAI, Azure, or LiteLLM API configuration
     if os.environ.get("OPENAI_API_KEY"):
         API_KEY = os.environ.get("OPENAI_API_KEY")
-        default_client = OpenAI(api_key=API_KEY)
+        base_url = server_config['base_url']
+        if base_url != "":
+            default_client = OpenAI(api_key=API_KEY, base_url=base_url)
+        else:
+            default_client = OpenAI(api_key=API_KEY)
     elif os.environ.get("AZURE_OPENAI_API_KEY"):
         API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
         API_VERSION = os.environ.get("AZURE_API_VERSION")
diff --git a/optillm/plugins/router_plugin.py b/optillm/plugins/router_plugin.py
@@ -74,7 +74,7 @@ def preprocess_input(tokenizer, system_prompt, initial_query):
     )
     return encoding['input_ids'], encoding['attention_mask']
 
-def predict_approach(model, input_ids, attention_mask, device, effort=0.8):
+def predict_approach(model, input_ids, attention_mask, device, effort=0.7):
     model.eval()
     with torch.no_grad():
         input_ids = input_ids.to(device)
@@ -151,4 +151,4 @@ def run(system_prompt, initial_query, client, model, **kwargs):
                 {"role": "user", "content": initial_query}
             ]
         )
-        return response.choices[0].message.content, response.usage.completion_tokens()
+        return response.choices[0].message.content, response.usage.completion_tokens

Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ def preprocess_input(tokenizer, system_prompt, initial_query):`
`74`	`74`	`)`
`75`	`75`	`return encoding['input_ids'], encoding['attention_mask']`
`76`	`76`
`77`		`-def predict_approach(model, input_ids, attention_mask, device, effort=0.8):`
	`77`	`+def predict_approach(model, input_ids, attention_mask, device, effort=0.7):`
`78`	`78`	`model.eval()`
`79`	`79`	`with torch.no_grad():`
`80`	`80`	`input_ids = input_ids.to(device)`
`@@ -151,4 +151,4 @@ def run(system_prompt, initial_query, client, model, **kwargs):`
`151`	`151`	`{"role": "user", "content": initial_query}`
`152`	`152`	`]`
`153`	`153`	`)`
`154`		`- return response.choices[0].message.content, response.usage.completion_tokens()`
	`154`	`+ return response.choices[0].message.content, response.usage.completion_tokens`