|
1 | | -// Eval Response |
2 | | - |
| 1 | +// Eval Create - Response |
3 | 2 | { |
4 | 3 | "object": "eval", |
5 | 4 | "created_at": 1762479886, |
|
282 | 281 | ] |
283 | 282 | } |
284 | 283 |
|
285 | | - |
286 | 284 | // Evaluation Results (Used for Mainline UI) |
287 | | - |
288 | 285 | { |
289 | 286 | "type": "evaluationresults", |
290 | 287 | "name": "eval-result-evalrun_4130272e0793469faa407eee7a8c2a28-CAKR", |
|
358 | 355 | } |
359 | 356 | } |
360 | 357 |
|
361 | | - |
362 | | -// Eval Run |
363 | | - |
364 | | -// End of eval results |
365 | | - |
| 358 | +// Eval Run - Response |
366 | 359 | { |
367 | 360 | "first_id": "evalrun_4130272e0793469faa407eee7a8c2a28", |
368 | 361 | "last_id": "evalrun_4130272e0793469faa407eee7a8c2a28", |
|
477 | 470 | "has_more": false |
478 | 471 | } |
479 | 472 |
|
480 | | -// eval results |
481 | | - |
482 | | -{ |
483 | | - "first_id": "evalrun_4130272e0793469faa407eee7a8c2a28", |
484 | | - "last_id": "evalrun_4130272e0793469faa407eee7a8c2a28", |
485 | | - "object": "list", |
486 | | - "data": [ |
487 | | - { |
488 | | - "object": "eval.run", |
489 | | - "created_at": 1762479888, |
490 | | - "modified_at": 1762479904, |
491 | | - "created_by": "Waqas Javed", |
492 | | - "id": "evalrun_4130272e0793469faa407eee7a8c2a28", |
493 | | - "eval_id": "eval_c536320b386d46e1a8858936c1173390", |
494 | | - "status": "completed", |
495 | | - "data_source": { |
496 | | - "type": "jsonl", |
497 | | - "source": { |
498 | | - "type": "file_id", |
499 | | - "id": "azureai://accounts/aprilk-westus2/projects/aprilk-westus2-proj/data/q_res_gt_context/versions/1" |
500 | | - } |
501 | | - }, |
502 | | - "metadata": {}, |
503 | | - "properties": { |
504 | | - "eval_id": "eval_c536320b386d46e1a8858936c1173390", |
505 | | - "runType": "eval_run", |
506 | | - "_azureml.evaluation_run": "evaluation.service", |
507 | | - "_azureml.evaluate_artifacts": "[{\"path\": \"instance_results.jsonl\", \"type\": \"table\"}]", |
508 | | - "job_logs": "azureai://accounts/aprilk-westus2/projects/aprilk-westus2-proj/evaluationresults/eval-result-evalrun_4130272e0793469faa407eee7a8c2a28-CAKR/versions/1" |
509 | | - }, |
510 | | - "name": "eval-5aaqoeyv-all-quality", |
511 | | - "report_url": "https://ai.azure.com/nextgen/r/sWFUWMHqSbyFJsr8lI08JQ,rg-nextgen,,aprilk-westus2,aprilk-westus2-proj/build/evaluations/eval_c536320b386d46e1a8858936c1173390/run/evalrun_4130272e0793469faa407eee7a8c2a28", |
512 | | - "result_counts": { |
513 | | - "total": 1, |
514 | | - "passed": 0, |
515 | | - "failed": 0, |
516 | | - "errored": 1 |
517 | | - }, |
518 | | - "per_model_usage": [ |
519 | | - { |
520 | | - "model_name": "gpt-4o-mini-2024-07-18", |
521 | | - "invocation_count": 7, |
522 | | - "total_tokens": 11768, |
523 | | - "prompt_tokens": 10840, |
524 | | - "completion_tokens": 928, |
525 | | - "cached_tokens": 0 |
526 | | - } |
527 | | - ], |
528 | | - "per_testing_criteria_results": [ |
529 | | - { |
530 | | - "testing_criteria": "Retrieval", |
531 | | - "passed": 0, |
532 | | - "failed": 1 |
533 | | - }, |
534 | | - { |
535 | | - "testing_criteria": "ResponseCompleteness", |
536 | | - "passed": 0, |
537 | | - "failed": 1 |
538 | | - }, |
539 | | - { |
540 | | - "testing_criteria": "Relevance", |
541 | | - "passed": 1, |
542 | | - "failed": 0 |
543 | | - }, |
544 | | - { |
545 | | - "testing_criteria": "Groundedness", |
546 | | - "passed": 1, |
547 | | - "failed": 0 |
548 | | - }, |
549 | | - { |
550 | | - "testing_criteria": "Fluency", |
551 | | - "passed": 0, |
552 | | - "failed": 1 |
553 | | - }, |
554 | | - { |
555 | | - "testing_criteria": "Similarity", |
556 | | - "passed": 1, |
557 | | - "failed": 0 |
558 | | - }, |
559 | | - { |
560 | | - "testing_criteria": "ROUGEScore", |
561 | | - "passed": 3, |
562 | | - "failed": 0 |
563 | | - }, |
564 | | - { |
565 | | - "testing_criteria": "METEORScore", |
566 | | - "passed": 0, |
567 | | - "failed": 1 |
568 | | - }, |
569 | | - { |
570 | | - "testing_criteria": "GLEUScore", |
571 | | - "passed": 0, |
572 | | - "failed": 1 |
573 | | - }, |
574 | | - { |
575 | | - "testing_criteria": "F1Score", |
576 | | - "passed": 1, |
577 | | - "failed": 0 |
578 | | - }, |
579 | | - { |
580 | | - "testing_criteria": "Coherence", |
581 | | - "passed": 1, |
582 | | - "failed": 0 |
583 | | - }, |
584 | | - { |
585 | | - "testing_criteria": "BLEUScore", |
586 | | - "passed": 0, |
587 | | - "failed": 1 |
588 | | - } |
589 | | - ], |
590 | | - "error": null |
591 | | - } |
592 | | - ], |
593 | | - "has_more": false |
594 | | -} |
595 | 473 |
|
596 | 474 | // Eval Run Output Items |
597 | | - |
598 | 475 | { |
599 | 476 | "data": [ |
600 | 477 | { |
|
0 commit comments