@@ -1482,91 +1482,14 @@ async def mock_async_task(inputs: TaskInput) -> TaskOutput:
1482
1482
'evaluate {name}' ,
1483
1483
{
1484
1484
'name' : 'mock_async_task' ,
1485
+ 'n_cases' : 2 ,
1486
+ 'assertion_pass_rate' : 1.0 ,
1485
1487
'logfire.msg_template' : 'evaluate {name}' ,
1486
1488
'logfire.msg' : 'evaluate mock_async_task' ,
1487
1489
'logfire.span_type' : 'span' ,
1488
- 'cases' : [
1489
- {
1490
- 'name' : 'case1' ,
1491
- 'inputs' : {'query' : 'What is 2+2?' },
1492
- 'metadata' : {'difficulty' : 'easy' , 'category' : 'general' },
1493
- 'expected_output' : {'answer' : '4' , 'confidence' : 1.0 },
1494
- 'output' : {'answer' : '4' , 'confidence' : 1.0 },
1495
- 'metrics' : {},
1496
- 'attributes' : {},
1497
- 'scores' : {
1498
- 'confidence' : {
1499
- 'name' : 'confidence' ,
1500
- 'value' : 1.0 ,
1501
- 'reason' : None ,
1502
- 'source' : {'name' : 'SimpleEvaluator' , 'arguments' : None },
1503
- }
1504
- },
1505
- 'labels' : {},
1506
- 'assertions' : {
1507
- 'correct' : {
1508
- 'name' : 'correct' ,
1509
- 'value' : True ,
1510
- 'reason' : None ,
1511
- 'source' : {'name' : 'SimpleEvaluator' , 'arguments' : None },
1512
- }
1513
- },
1514
- 'task_duration' : 1.0 ,
1515
- 'total_duration' : 10.0 ,
1516
- 'trace_id' : '00000000000000000000000000000001' ,
1517
- 'span_id' : '0000000000000003' ,
1518
- 'evaluator_failures' : [],
1519
- },
1520
- {
1521
- 'name' : 'case2' ,
1522
- 'inputs' : {'query' : 'What is the capital of France?' },
1523
- 'metadata' : {'difficulty' : 'medium' , 'category' : 'geography' },
1524
- 'expected_output' : {'answer' : 'Paris' , 'confidence' : 1.0 },
1525
- 'output' : {'answer' : 'Paris' , 'confidence' : 1.0 },
1526
- 'metrics' : {},
1527
- 'attributes' : {},
1528
- 'scores' : {
1529
- 'confidence' : {
1530
- 'name' : 'confidence' ,
1531
- 'value' : 1.0 ,
1532
- 'reason' : None ,
1533
- 'source' : {'name' : 'SimpleEvaluator' , 'arguments' : None },
1534
- }
1535
- },
1536
- 'labels' : {},
1537
- 'assertions' : {
1538
- 'correct' : {
1539
- 'name' : 'correct' ,
1540
- 'value' : True ,
1541
- 'reason' : None ,
1542
- 'source' : {'name' : 'SimpleEvaluator' , 'arguments' : None },
1543
- }
1544
- },
1545
- 'task_duration' : 1.0 ,
1546
- 'total_duration' : 8.0 ,
1547
- 'trace_id' : '00000000000000000000000000000001' ,
1548
- 'span_id' : '0000000000000007' ,
1549
- 'evaluator_failures' : [],
1550
- },
1551
- ],
1552
- 'failures' : [],
1553
- 'averages' : {
1554
- 'name' : 'Averages' ,
1555
- 'scores' : {'confidence' : 1.0 },
1556
- 'labels' : {},
1557
- 'metrics' : {},
1558
- 'assertions' : 1.0 ,
1559
- 'task_duration' : 1.0 ,
1560
- 'total_duration' : 9.0 ,
1561
- },
1562
1490
'logfire.json_schema' : {
1563
1491
'type' : 'object' ,
1564
- 'properties' : {
1565
- 'name' : {},
1566
- 'cases' : {'type' : 'array' },
1567
- 'failures' : {'type' : 'array' },
1568
- 'averages' : {'type' : 'object' },
1569
- },
1492
+ 'properties' : {'name' : {}, 'n_cases' : {}, 'assertion_pass_rate' : {}},
1570
1493
},
1571
1494
},
1572
1495
),
0 commit comments