File tree Expand file tree Collapse file tree 7 files changed +24
-22
lines changed
Microsoft.Extensions.AI.Evaluation.NLP
Microsoft.Extensions.AI.Evaluation.Quality Expand file tree Collapse file tree 7 files changed +24
-22
lines changed Original file line number Diff line number Diff line change @@ -20,11 +20,11 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
2020/// </summary>
2121/// <remarks>
2222/// <para>
23- /// The <see cref="BLEUEvaluator"/> computes the BLEU score of a response ("hypothesis") compared to a reference
24- /// supplied via <see cref="BLEUEvaluatorContext.References"/>. The score is returned in a <see cref="NumericMetric"/>
25- /// with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates a perfect match.
26- /// By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher is
27- /// passing and a score below 0.5 is failing.
23+ /// The <see cref="BLEUEvaluator"/> computes the BLEU score of a response ("hypothesis") compared to one or more
24+ /// reference responses supplied via <see cref="BLEUEvaluatorContext.References"/>. The score is returned in a
25+ /// <see cref="NumericMetric"/> with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates
26+ /// a perfect match. By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher
27+ /// is passing and a score below 0.5 is failing.
2828/// </para>
2929/// </remarks>
3030public sealed class BLEUEvaluator : IEvaluator
Original file line number Diff line number Diff line change @@ -15,8 +15,9 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
1515/// Contextual information that the <see cref="BLEUEvaluator"/> uses to compute the BLEU score for a response.
1616/// </summary>
1717/// <remarks>
18- /// <see cref="BLEUEvaluator"/> measures the BLEU score of a response compared to a reference. BLEU (Bilingual Evaluation Understudy)
19- /// is a metric used to evaluate the quality of machine-generated text.
18+ /// <see cref="BLEUEvaluator"/> measures the BLEU score of a response compared to one or more reference responses
19+ /// supplied via <see cref="References"/>. BLEU (Bilingual Evaluation Understudy) is a metric used to evaluate the
20+ /// quality of machine-generated text.
2021/// </remarks>
2122public sealed class BLEUEvaluatorContext : EvaluationContext
2223{
@@ -31,7 +32,7 @@ public sealed class BLEUEvaluatorContext : EvaluationContext
3132 /// </summary>
3233 /// <remarks>
3334 /// The <see cref="BLEUEvaluator"/> measures the degree to which the response being evaluated is similar to
34- /// the response supplied via <see cref="References"/>. The metric will be reported as a BLEU score.
35+ /// the responses supplied via <see cref="References"/>. The metric will be reported as a BLEU score.
3536 /// </remarks>
3637 public IReadOnlyList < string > References { get ; }
3738
Original file line number Diff line number Diff line change @@ -20,11 +20,11 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
2020/// </summary>
2121/// <remarks>
2222/// <para>
23- /// The <see cref="GLEUEvaluator"/> computes the GLEU score of a response ("hypothesis") compared to a reference
24- /// supplied via <see cref="GLEUEvaluatorContext.References"/>. The score is returned in a <see cref="NumericMetric"/>
25- /// with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates a perfect match.
26- /// By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher is
27- /// passing and a score below 0.5 is failing.
23+ /// The <see cref="GLEUEvaluator"/> computes the GLEU score of a response ("hypothesis") compared to one or more
24+ /// reference responses supplied via <see cref="GLEUEvaluatorContext.References"/>. The score is returned in a
25+ /// <see cref="NumericMetric"/> with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates
26+ /// a perfect match. By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher
27+ /// is passing and a score below 0.5 is failing.
2828/// </para>
2929/// </remarks>
3030public sealed class GLEUEvaluator : IEvaluator
Original file line number Diff line number Diff line change @@ -16,7 +16,8 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
1616/// </summary>
1717/// <remarks>
1818/// <see cref="GLEUEvaluator"/> measures the GLEU score of a response compared to one or more reference responses
19- /// supplied via <see cref="References"/>. GLEU (Google-BLEU) is a metric used to evaluate the quality of machine-generated text.
19+ /// supplied via <see cref="References"/>. GLEU (Google-BLEU) is a metric used to evaluate the quality of
20+ /// machine-generated text.
2021/// </remarks>
2122public sealed class GLEUEvaluatorContext : EvaluationContext
2223{
@@ -27,11 +28,11 @@ public sealed class GLEUEvaluatorContext : EvaluationContext
2728 public static string ReferencesContextName => "References (GLEU)" ;
2829
2930 /// <summary>
30- /// Gets the reference against which the provided response will be scored.
31+ /// Gets the references against which the provided response will be scored.
3132 /// </summary>
3233 /// <remarks>
3334 /// The <see cref="GLEUEvaluator"/> measures the degree to which the response being evaluated is similar to
34- /// the response supplied via <see cref="References"/>. The metric will be reported as a GLEU score.
35+ /// the responses supplied via <see cref="References"/>. The metric will be reported as a GLEU score.
3536 /// </remarks>
3637 public IReadOnlyList < string > References { get ; }
3738
Original file line number Diff line number Diff line change @@ -40,7 +40,7 @@ public sealed class IntentResolutionEvaluatorContext : EvaluationContext
4040 /// </para>
4141 /// </param>
4242 public IntentResolutionEvaluatorContext ( params AITool [ ] toolDefinitions )
43- : base ( name : IntentResolutionContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
43+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
4444 {
4545 ToolDefinitions = [ .. toolDefinitions ] ;
4646 }
@@ -67,7 +67,7 @@ public IntentResolutionEvaluatorContext(IEnumerable<AITool> toolDefinitions)
6767 /// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
6868 /// <see cref="IntentResolutionEvaluatorContext"/>.
6969 /// </summary>
70- public static string IntentResolutionContextName => "Tool Definitions (Intent Resolution)" ;
70+ public static string ToolDefinitionsContextName => "Tool Definitions (Intent Resolution)" ;
7171
7272 /// <summary>
7373 /// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
Original file line number Diff line number Diff line change @@ -41,7 +41,7 @@ public sealed class TaskAdherenceEvaluatorContext : EvaluationContext
4141 /// </para>
4242 /// </param>
4343 public TaskAdherenceEvaluatorContext ( params AITool [ ] toolDefinitions )
44- : base ( name : TaskAdherenceContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
44+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
4545 {
4646 ToolDefinitions = [ .. toolDefinitions ] ;
4747 }
@@ -68,7 +68,7 @@ public TaskAdherenceEvaluatorContext(IEnumerable<AITool> toolDefinitions)
6868 /// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
6969 /// <see cref="TaskAdherenceEvaluatorContext"/>.
7070 /// </summary>
71- public static string TaskAdherenceContextName => "Tool Definitions (Task Adherence)" ;
71+ public static string ToolDefinitionsContextName => "Tool Definitions (Task Adherence)" ;
7272
7373 /// <summary>
7474 /// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ public sealed class ToolCallAccuracyEvaluatorContext : EvaluationContext
4242 /// </para>
4343 /// </param>
4444 public ToolCallAccuracyEvaluatorContext ( params AITool [ ] toolDefinitions )
45- : base ( name : ToolCallAccuracyContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
45+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
4646 {
4747 ToolDefinitions = [ .. toolDefinitions ] ;
4848 }
@@ -69,7 +69,7 @@ public ToolCallAccuracyEvaluatorContext(IEnumerable<AITool> toolDefinitions)
6969 /// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
7070 /// <see cref="ToolCallAccuracyEvaluatorContext"/>.
7171 /// </summary>
72- public static string ToolCallAccuracyContextName => "Tool Definitions (Tool Call Accuracy)" ;
72+ public static string ToolDefinitionsContextName => "Tool Definitions (Tool Call Accuracy)" ;
7373
7474 /// <summary>
7575 /// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
You can’t perform that action at this time.
0 commit comments