Skip to content

Commit 12ac786

Browse files
authored
Merge pull request #234 from codelion/codelion-patch-1
Add conversation logging
2 parents 4e414c4 + 8f340a7 commit 12ac786

21 files changed

+2240
-329
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ and then use the same in your OpenAI client. You can pass any HuggingFace model
287287
with your HuggingFace key. We also support adding any number of LoRAs on top of the model by using the `+` separator.
288288

289289
E.g. The following code loads the base model `meta-llama/Llama-3.2-1B-Instruct` and then adds two LoRAs on top - `patched-codes/Llama-3.2-1B-FixVulns` and `patched-codes/Llama-3.2-1B-FastApply`.
290-
You can specify which LoRA to use using the `active_adapter` param in `extra_args` field of OpenAI SDK client. By default we will load the last specified adapter.
290+
You can specify which LoRA to use using the `active_adapter` param in `extra_body` field of OpenAI SDK client. By default we will load the last specified adapter.
291291

292292
```python
293293
OPENAI_BASE_URL = "http://localhost:8000/v1"
@@ -748,4 +748,4 @@ If you use this library in your research, please cite:
748748

749749
<p align="center">
750750
⭐ <a href="https://github.com/codelion/optillm">Star us on GitHub</a> if you find OptiLLM useful!
751-
</p>
751+
</p>

optillm.py

Lines changed: 116 additions & 34 deletions
Large diffs are not rendered by default.

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33

44
# Version information
5-
__version__ = "0.1.28"
5+
__version__ = "0.2.0"
66

77
# Get the path to the root optillm.py
88
spec = util.spec_from_file_location(

optillm/bon.py

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import logging
2+
import optillm
3+
from optillm import conversation_logger
24

35
logger = logging.getLogger(__name__)
46

5-
def best_of_n_sampling(system_prompt: str, initial_query: str, client, model: str, n: int = 3) -> str:
7+
def best_of_n_sampling(system_prompt: str, initial_query: str, client, model: str, n: int = 3, request_id: str = None) -> str:
68
bon_completion_tokens = 0
79

810
messages = [{"role": "system", "content": system_prompt},
@@ -12,13 +14,20 @@ def best_of_n_sampling(system_prompt: str, initial_query: str, client, model: st
1214

1315
try:
1416
# Try to generate n completions in a single API call using n parameter
15-
response = client.chat.completions.create(
16-
model=model,
17-
messages=messages,
18-
max_tokens=4096,
19-
n=n,
20-
temperature=1
21-
)
17+
provider_request = {
18+
"model": model,
19+
"messages": messages,
20+
"max_tokens": 4096,
21+
"n": n,
22+
"temperature": 1
23+
}
24+
response = client.chat.completions.create(**provider_request)
25+
26+
# Log provider call
27+
if request_id:
28+
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
29+
conversation_logger.log_provider_call(request_id, provider_request, response_dict)
30+
2231
completions = [choice.message.content for choice in response.choices]
2332
logger.info(f"Generated {len(completions)} initial completions using n parameter. Tokens used: {response.usage.completion_tokens}")
2433
bon_completion_tokens += response.usage.completion_tokens
@@ -30,12 +39,19 @@ def best_of_n_sampling(system_prompt: str, initial_query: str, client, model: st
3039
# Fallback: Generate completions one by one in a loop
3140
for i in range(n):
3241
try:
33-
response = client.chat.completions.create(
34-
model=model,
35-
messages=messages,
36-
max_tokens=4096,
37-
temperature=1
38-
)
42+
provider_request = {
43+
"model": model,
44+
"messages": messages,
45+
"max_tokens": 4096,
46+
"temperature": 1
47+
}
48+
response = client.chat.completions.create(**provider_request)
49+
50+
# Log provider call
51+
if request_id:
52+
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
53+
conversation_logger.log_provider_call(request_id, provider_request, response_dict)
54+
3955
completions.append(response.choices[0].message.content)
4056
bon_completion_tokens += response.usage.completion_tokens
4157
logger.debug(f"Generated completion {i+1}/{n}")
@@ -59,13 +75,20 @@ def best_of_n_sampling(system_prompt: str, initial_query: str, client, model: st
5975
rating_messages.append({"role": "assistant", "content": completion})
6076
rating_messages.append({"role": "user", "content": "Rate the above response:"})
6177

62-
rating_response = client.chat.completions.create(
63-
model=model,
64-
messages=rating_messages,
65-
max_tokens=256,
66-
n=1,
67-
temperature=0.1
68-
)
78+
provider_request = {
79+
"model": model,
80+
"messages": rating_messages,
81+
"max_tokens": 256,
82+
"n": 1,
83+
"temperature": 0.1
84+
}
85+
rating_response = client.chat.completions.create(**provider_request)
86+
87+
# Log provider call
88+
if request_id:
89+
response_dict = rating_response.model_dump() if hasattr(rating_response, 'model_dump') else rating_response
90+
conversation_logger.log_provider_call(request_id, provider_request, response_dict)
91+
6992
bon_completion_tokens += rating_response.usage.completion_tokens
7093
try:
7194
rating = float(rating_response.choices[0].message.content.strip())

0 commit comments

Comments
 (0)