Skip to content

Commit a7868e9

Browse files
authored
A way to output simulated qa in a format compatible with eval (#34479)
* Changed the parameter from max_count to limit * Add a method to have output in qa format from simulator * simulation_result_limit was missing from main
1 parent 6452024 commit a7868e9

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,19 @@ def to_json_lines(self):
1010
for item in self:
1111
json_lines += json.dumps(item) + "\n"
1212
return json_lines
13+
14+
def to_eval_qa_json_lines(self):
15+
json_lines = ""
16+
for item in self:
17+
user_message = None
18+
assistant_message = None
19+
for message in item['messages']:
20+
if message['role'] == 'user':
21+
user_message = message['content']
22+
elif message['role'] == 'assistant':
23+
assistant_message = message['content']
24+
if user_message and assistant_message:
25+
json_lines += json.dumps({'question': user_message, 'answer': assistant_message}) + "\n"
26+
return json_lines
27+
28+

sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ async def simulate_async(
219219
api_call_retry_limit: int = 3,
220220
api_call_retry_sleep_sec: int = 1,
221221
api_call_delay_sec: float = 0,
222-
concurrent_async_task: int = 3
222+
concurrent_async_task: int = 3,
223+
simulation_result_limit: int = 3,
223224
):
224225
"""Asynchronously simulate conversations using the provided template and parameters
225226
@@ -241,6 +242,8 @@ async def simulate_async(
241242
:paramtype api_call_delay_sec: float, optional
242243
:keyword concurrent_async_task: The maximum number of asynchronous tasks to run concurrently. Defaults to 3.
243244
:paramtype concurrent_async_task: int, optional
245+
:keyword simulation_result_limit: The maximum number of simulation results to return. Defaults to 3.
246+
:paramtype simulation_result_limit: int, optional
244247
245248
:return: A list of dictionaries containing the simulation results.
246249
:rtype: List[Dict]
@@ -271,7 +274,6 @@ async def simulate_async(
271274
semaphore = asyncio.Semaphore(concurrent_async_task)
272275
sim_results = []
273276
tasks = []
274-
275277
for t in templates:
276278
for p in t.template_parameters:
277279
if jailbreak:
@@ -294,6 +296,12 @@ async def simulate_async(
294296
)
295297
)
296298

299+
if len(tasks) >= simulation_result_limit:
300+
break
301+
302+
if len(tasks) >= simulation_result_limit:
303+
break
304+
297305
sim_results = await asyncio.gather(*tasks)
298306

299307
return JsonLineList(sim_results)
@@ -324,6 +332,8 @@ async def _simulate_async(
324332
api_call_delay_sec (float, optional): The time in seconds to wait between API calls. Defaults to 0.
325333
concurrent_async_task (int, optional): The maximum number of asynchronous tasks to run concurrently.
326334
Defaults to 3.
335+
simulation_result_limit (int, optional): The maximum number of simulation results to return. Defaults to 3.
336+
327337
Returns:
328338
List[Dict]: A list of dictionaries containing the simulation results.
329339

0 commit comments

Comments
 (0)