Skip to content

Commit 32f6370

Browse files
authored
Merge pull request #763 from parea-ai/PAI-449-improve-sdk-trace-log-system-architecture-pys
Send trace logs when complete
2 parents 9ed9c19 + 477f3ec commit 32f6370

File tree

10 files changed

+80
-51
lines changed

10 files changed

+80
-51
lines changed

parea/client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,6 @@ def _update_data_and_trace(self, data: Completion) -> Completion:
347347
if parent_trace_id:
348348
trace_data.get()[parent_trace_id].children.append(inference_id)
349349
trace_data.get()[parent_trace_id].experiment_uuid = experiment_uuid
350-
logger_record_log(parent_trace_id)
351350
except Exception as e:
352351
logger.debug(f"Error updating trace ids for completion. Trace log will be absent: {e}")
353352

parea/cookbook/simple_experiment_with_openai.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,27 @@ def eval_func(log: Log) -> float:
2222

2323

2424
@trace(eval_funcs=[eval_func])
25-
def func(lang: str, framework: str) -> str:
26-
return (
27-
client.chat.completions.create(
28-
model="gpt-4-turbo",
29-
messages=[
30-
{
31-
"role": "user",
32-
"content": f"Write a hello world program in {lang} using {framework}",
33-
}
34-
],
25+
def func(topic: str) -> dict[str, str | None]:
26+
return {
27+
"data": (
28+
client.chat.completions.create(
29+
model="gpt-4-turbo",
30+
messages=[
31+
{
32+
"role": "user",
33+
"content": f"Write a short haiku about {topic}",
34+
}
35+
],
36+
)
37+
.choices[0]
38+
.message.content
3539
)
36-
.choices[0]
37-
.message.content
38-
)
40+
}
3941

4042

4143
if __name__ == "__main__":
4244
p.experiment(
43-
name="hello-world-example",
44-
data=[{"lang": "Python", "framework": "Flask"}],
45+
name="hello-world-example-ch",
46+
data=[{"topic": "Fish"}, {"topic": "Python"}],
4547
func=func,
4648
).run()

parea/cookbook/tracing_and_evaluating_openai_endpoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
load_dotenv()
1717

1818
openai.api_key = os.getenv("OPENAI_API_KEY")
19-
19+
openai.api_type = "openai"
2020

2121
use_cache = False # by using the in memory cache, you don't need a Parea API key
2222
cache = InMemoryCache() if use_cache else None

parea/cookbook/tracing_without_deployed_prompt.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,15 @@
1313
p = Parea(api_key=os.getenv("PAREA_API_KEY"))
1414

1515

16-
@trace # <--- If you want to log the inputs to the LLM call you can optionally add a trace decorator here
1716
def call_llm(
1817
data: List[dict],
1918
model: str = "gpt-3.5-turbo-1106",
20-
provider: str = "openai",
2119
temperature: float = 0.0,
2220
) -> CompletionResponse:
2321
return p.completion(
2422
data=Completion(
2523
llm_configuration=LLMInputs(
2624
model=model,
27-
provider=provider,
2825
model_params=ModelParams(temp=temperature),
2926
messages=[Message(**d) for d in data],
3027
)
@@ -49,7 +46,7 @@ def critic(argument: str) -> str:
4946
[
5047
{
5148
"role": "system",
52-
"content": f"You are a critic."
49+
"content": "You are a critic."
5350
"\nWhat unresolved questions or criticism do you have after reading the following argument?"
5451
"Provide a concise summary of your feedback.",
5552
},
@@ -106,8 +103,7 @@ def refiner2(query: str, additional_description: str, current_arg: str, criticis
106103
"content": "Please generate a new argument that incorporates the feedback from the user.",
107104
},
108105
],
109-
model="claude-2",
110-
provider="anthropic",
106+
model="claude-3-haiku-20240307",
111107
)
112108

113109

@@ -128,7 +124,6 @@ def json_call():
128124
data=Completion(
129125
llm_configuration=LLMInputs(
130126
model="gpt-3.5-turbo-1106",
131-
provider="openai",
132127
model_params=ModelParams(temp=0.0, response_format={"type": "json_object"}),
133128
messages=[Message(**d) for d in json_messages],
134129
)
@@ -147,12 +142,12 @@ def json_call():
147142
"Whether wine is good for you.",
148143
additional_description="Provide a concise, few sentence argument on why wine is good for you.",
149144
)
150-
print(result2)
145+
print(trace_id2, result2)
151146
p.record_feedback(
152147
FeedbackRequest(
153148
trace_id=trace_id2,
154-
score=0.0, # 0.0 (bad) to 1.0 (good)
155-
target="Moonshine is wonderful.",
149+
score=0.7, # 0.0 (bad) to 1.0 (good)
150+
target="Wine is wonderful.",
156151
)
157152
)
158153

@@ -164,7 +159,7 @@ def json_call():
164159
p.record_feedback(
165160
FeedbackRequest(
166161
trace_id=result3.inference_id,
167-
score=0.7, # 0.0 (bad) to 1.0 (good)
162+
score=0.5, # 0.0 (bad) to 1.0 (good)
168163
target="Moonshine is wonderful. End of story.",
169164
)
170165
)

parea/evals/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from parea.schemas import EvaluationResult, Log
1414
from parea.schemas.log import Log
1515
from parea.schemas.models import UpdateLog
16+
from parea.utils.trace_utils import thread_ids_running_evals, trace_data
1617

1718
seg = pysbd.Segmenter(language="en", clean=False)
1819

@@ -142,7 +143,11 @@ def _make_evaluations(trace_id: str, log: Log, eval_funcs: List[EvalFuncTuple],
142143
elif result is not None:
143144
scores.append(EvaluationResult(name=eval.name, score=result))
144145

145-
parea_logger.update_log(data=UpdateLog(trace_id=trace_id, field_name_to_value_map={"scores": scores, "target": log.target}))
146+
trace_data.get()[trace_id].scores = scores
147+
trace_data.get()[trace_id].target = log.target
148+
data_with_scores = trace_data.get()[trace_id]
149+
thread_ids_running_evals.get().remove(trace_id)
150+
parea_logger.default_log(data=data_with_scores)
146151
if verbose:
147152
print("###Eval Results###")
148153
for score in scores:

parea/experiment/experiment.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def calculate_avg_std_for_experiment(experiment_stats: ExperimentStatsSchema) ->
4141
score_accumulators[score.name] += score.score
4242
score_counts[score.name] += 1
4343

44-
averages = {attr: "N/A" if counts[attr] == 0 else f"{accumulators[attr] / counts[attr]:.2f}" for attr in accumulators}
44+
averages = {attr: "N/A" if counts[attr] == 0 else f"{accumulators[attr] / counts[attr]:.{5 if attr == 'cost' else 2}f}" for attr in accumulators}
4545

4646
score_averages = {f"{name}": "N/A" if score_counts[name] == 0 else f"{score_accumulators[name] / score_counts[name]:.2f}" for name in score_accumulators}
4747

@@ -157,7 +157,7 @@ def limit_concurrency_sync(sample):
157157
experiment_stats: ExperimentStatsSchema = p.finish_experiment(experiment_uuid, FinishExperimentRequestSchema(dataset_level_stats=dataset_level_eval_results))
158158
stat_name_to_avg_std = calculate_avg_std_for_experiment(experiment_stats)
159159
if dataset_level_eval_results:
160-
stat_name_to_avg_std.update({eval_result.name: eval_result.score for eval_result in dataset_level_eval_results})
160+
stat_name_to_avg_std.update(**{eval_result.name: eval_result.score for eval_result in dataset_level_eval_results})
161161
print(f"Experiment {experiment_name} Run {run_name} stats:\n{json_dumps(stat_name_to_avg_std, indent=2)}\n\n")
162162
print(f"View experiment & traces at: https://app.parea.ai/experiments/{experiment_name}/{experiment_uuid}\n")
163163
save_results_to_dvc_if_init(run_name, stat_name_to_avg_std)
@@ -223,6 +223,26 @@ def run(self, run_name: Optional[str] = None) -> None:
223223
traceback.print_exc()
224224
print(f"Error running experiment: {e}")
225225

226+
async def arun(self, run_name: Optional[str] = None) -> None:
227+
"""Run the experiment and save the results to DVC.
228+
param run_name: The run name of the experiment. This name must be unique across experiment runs.
229+
If no run name is provided a memorable name will be generated automatically.
230+
"""
231+
if TURN_OFF_PAREA_LOGGING:
232+
print("Parea logging is turned off. Experiment can't be run without logging. Set env var TURN_OFF_PAREA_LOGGING to False to enable.")
233+
return
234+
235+
try:
236+
self._gen_run_name_if_none(run_name)
237+
self.experiment_stats = await experiment(
238+
self.experiment_name, self.run_name, self.data, self.func, self.p, self.n_trials, self.metadata, self.dataset_level_evals, self.n_workers
239+
)
240+
except Exception as e:
241+
import traceback
242+
243+
traceback.print_exc()
244+
print(f"Error running experiment: {e}")
245+
226246
@property
227247
def avg_scores(self) -> Dict[str, float]:
228248
"""Returns the average score across all evals."""

parea/schemas/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ class UseDeployedPromptResponse:
7777
class FeedbackRequest:
7878
score: float = field(validator=[validators.ge(0), validators.le(1)])
7979
trace_id: Optional[str] = None
80-
inference_id: Optional[str] = None
8180
name: Optional[str] = None
8281
target: Optional[str] = None
8382

@@ -219,6 +218,7 @@ class UpdateTraceScenario(str, Enum):
219218
ERROR: str = "error"
220219
CHAIN: str = "chain"
221220
USAGE: str = "usage"
221+
OPENAICONFIG: str = "openaiconfig"
222222

223223

224224
@define

parea/utils/trace_utils.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from parea.helpers import gen_trace_id, timezone_aware_now
1616
from parea.parea_logger import parea_logger
1717
from parea.schemas import EvaluationResult
18-
from parea.schemas.models import TraceLog, UpdateLog, UpdateTraceScenario
18+
from parea.schemas.models import TraceLog, UpdateTraceScenario
1919
from parea.utils.universal_encoder import json_dumps
2020

2121
logger = logging.getLogger()
@@ -117,6 +117,13 @@ def fill_trace_data(trace_id: str, data: Dict[str, Any], scenario: UpdateTraceSc
117117
elif scenario == UpdateTraceScenario.CHAIN:
118118
trace_data.get()[trace_id].parent_trace_id = data["parent_trace_id"]
119119
trace_data.get()[data["parent_trace_id"]].children.append(trace_id)
120+
elif scenario == UpdateTraceScenario.OPENAICONFIG:
121+
trace_data.get()[trace_id].configuration = data["configuration"]
122+
trace_data.get()[trace_id].output = data["output"]
123+
trace_data.get()[trace_id].input_tokens = data["input_tokens"]
124+
trace_data.get()[trace_id].output_tokens = data["output_tokens"]
125+
trace_data.get()[trace_id].total_tokens = data["total_tokens"]
126+
trace_data.get()[trace_id].cost = data["cost"]
120127
else:
121128
logger.debug(f"Error occurred filling trace data. Scenario not valid: {scenario}")
122129
except Exception as e:
@@ -265,7 +272,7 @@ def wrapper(*args, **kwargs):
265272

266273
def call_eval_funcs_then_log(trace_id: str, eval_funcs: List[Callable] = None):
267274
data = trace_data.get()[trace_id]
268-
parea_logger.default_log(data=data)
275+
# parea_logger.default_log(data=data)
269276

270277
if eval_funcs and data.status == "success" and random() <= data.apply_eval_frac:
271278
thread_ids_running_evals.get().append(trace_id)
@@ -283,10 +290,13 @@ def call_eval_funcs_then_log(trace_id: str, eval_funcs: List[Callable] = None):
283290
scores.append(EvaluationResult(name=func.__name__, score=score))
284291
except Exception as e:
285292
logger.exception(f"Error occurred calling evaluation function '{func.__name__}', {e}", exc_info=e)
286-
parea_logger.update_log(data=UpdateLog(trace_id=trace_id, field_name_to_value_map={"scores": scores}))
293+
# parea_logger.update_log(data=UpdateLog(trace_id=trace_id, field_name_to_value_map={"scores": scores}))
287294
trace_data.get()[trace_id].scores = scores
288295
thread_ids_running_evals.get().remove(trace_id)
289296

297+
data_with_scores = trace_data.get()[trace_id]
298+
parea_logger.default_log(data=data_with_scores)
299+
290300

291301
def logger_record_log(trace_id: str):
292302
log_in_thread(parea_logger.record_log, {"data": trace_data.get()[trace_id]})

parea/wrapper/utils.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from parea.constants import ALL_NON_AZURE_MODELS_INFO, AZURE_MODEL_INFO
1212
from parea.parea_logger import parea_logger
1313
from parea.schemas.log import LLMInputs, Message, ModelParams, Role
14-
from parea.schemas.models import UpdateLog
15-
from parea.utils.trace_utils import get_current_trace_id, log_in_thread, trace_insert
14+
from parea.schemas.models import UpdateLog, UpdateTraceScenario
15+
from parea.utils.trace_utils import fill_trace_data, get_current_trace_id, log_in_thread, trace_data, trace_insert
1616
from parea.utils.universal_encoder import json_dumps
1717

1818
is_openai_1 = openai_version.startswith("1.")
@@ -318,19 +318,17 @@ def _process_stream_response(content: list, tools: dict, data: dict, trace_id: s
318318
data.get("model"),
319319
)
320320
completion_tokens = _num_tokens_from_string(final_content if final_content else json_dumps(tool_calls), model)
321-
parea_logger.update_log(
322-
UpdateLog(
323-
trace_id=trace_id,
324-
field_name_to_value_map={
325-
"configuration": _kwargs_to_llm_configuration(data, model),
326-
"output": completion,
327-
"input_tokens": prompt_tokens,
328-
"output_tokens": completion_tokens,
329-
"total_tokens": prompt_tokens + completion_tokens,
330-
"cost": _compute_cost(prompt_tokens, completion_tokens, model),
331-
},
332-
)
333-
)
321+
data = {
322+
"configuration": _kwargs_to_llm_configuration(data, model),
323+
"output": completion,
324+
"input_tokens": prompt_tokens,
325+
"output_tokens": completion_tokens,
326+
"total_tokens": prompt_tokens + completion_tokens,
327+
"cost": _compute_cost(prompt_tokens, completion_tokens, model),
328+
}
329+
fill_trace_data(trace_id, data, UpdateTraceScenario.OPENAICONFIG)
330+
data_with_config = trace_data.get()[trace_id]
331+
parea_logger.default_log(data=data_with_config)
334332

335333

336334
def convert_openai_raw_stream_to_log(content: list, tools: dict, data: dict, trace_id: str):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
66
[tool.poetry]
77
name = "parea-ai"
88
packages = [{ include = "parea" }]
9-
version = "0.2.139"
9+
version = "0.2.140"
1010
description = "Parea python sdk"
1111
readme = "README.md"
1212
authors = ["joel-parea-ai <[email protected]>"]

0 commit comments

Comments
 (0)