Skip to content

Commit bead66c

Browse files
authored
9.3 release of Microsoft.Extensions.AI.Evaluation libraries (dotnet#6100)
1 parent 1b362f2 commit bead66c

35 files changed

+836
-613
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.Prompts.cs

Lines changed: 1 addition & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -7,93 +7,7 @@ public partial class RelevanceTruthAndCompletenessEvaluator
77
{
88
private static class Prompts
99
{
10-
internal static string BuildEvaluationPrompt(string userQuery, string modelResponse, string history)
11-
{
12-
#pragma warning disable S103 // Lines should not be too long
13-
return
14-
$$"""
15-
Read the History, User Query, and Model Response below and produce your response as a single JSON object.
16-
Do not include any other text in your response besides the JSON object.
17-
18-
The JSON object should have the following format. However, do not include any markdown tags in your
19-
response. Your response should start with an open curly brace and end with a closing curly brace for the
20-
JSON.
21-
```
22-
{
23-
"relevance": 1,
24-
"truth": 1,
25-
"completeness": 1
26-
}
27-
```
28-
29-
-----
30-
31-
History: {{history}}
32-
33-
-----
34-
35-
User Query: {{userQuery}}
36-
37-
-----
38-
39-
Model Response: {{modelResponse}}
40-
41-
-----
42-
43-
That's the History, User Query, and Model Response you will rate. Now, in 3 Steps, you will evaluate the Model Response on 3 criteria.
44-
45-
-----
46-
47-
Step 1: Rate the relevance of the response.
48-
49-
Regardless of truth of statements, how much of the response is directly related to the request?
50-
51-
1 = None of the response is at all related
52-
2 = A little of the response is directly related, or the response is a little bit related
53-
3 = Some of the response is related, or the response is somewhat related
54-
4 = Most of the response is related, or the response is mostly related
55-
5 = 100% of the response is 100% related to the request
56-
57-
Record your response as the value of the "relevance" property in the JSON output.
58-
59-
-----
60-
61-
Step 2: Rate the truth of the response.
62-
63-
Read the History, Query, and Model Response again.
64-
65-
Regardless of relevance, how true are the verifiable statements in the response?
66-
67-
1 = The entire response is totally false
68-
2 = A little of the response is true, or the response is a little bit true
69-
3 = Some of the response is true, or the response is somewhat true
70-
4 = Most of the response is true, or the response is mostly true
71-
5 = 100% of the response is 100% true
72-
73-
Record your response as the value of the "truth" property in the JSON output.
74-
75-
-----
76-
77-
Step 3: Rate the completeness of the response.
78-
79-
Read the History, Query, and Model Response again.
80-
81-
Regardless of whether the statements made in the response are true, how many of the points necessary to address the request, does the response contain?
82-
83-
1 = The response omits all points that are necessary to address the request.
84-
2 = The response includes a little of the points that are necessary to address the request.
85-
3 = The response includes some of the points that are necessary to address the request.
86-
4 = The response includes most of the points that are necessary to address the request.
87-
5 = The response includes all points that are necessary to address the request. For explain tasks, nothing is left unexplained. For improve tasks, I looked for all potential improvements, and none were left out. For fix tasks, the response purports to get the user all the way to a fixed state (regardless of whether it actually works). For "do task" responses, it does everything requested.
88-
89-
Record your response as the value of the "completeness" property in the JSON output.
90-
91-
-----
92-
""";
93-
#pragma warning restore S103
94-
}
95-
96-
internal static string BuildEvaluationPromptWithReasoning(
10+
internal static string BuildEvaluationPrompt(
9711
string userQuery,
9812
string modelResponse,
9913
string history)

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ namespace Microsoft.Extensions.AI.Evaluation.Quality;
2323
/// <remarks>
2424
/// <see cref="RelevanceTruthAndCompletenessEvaluator"/> returns three <see cref="NumericMetric"/>s that contain scores
2525
/// for 'Relevance', 'Truth' and 'Completeness' respectively. Each score is a number between 1 and 5, with 1 indicating
26-
/// a poor score, and 5 indicating an excellent score.
26+
/// a poor score, and 5 indicating an excellent score. Each returned score is also accompanied by a
27+
/// <see cref="EvaluationMetric.Reason"/> that provides an explanation for the score.
2728
/// </remarks>
28-
/// <param name="options">Options for <see cref="RelevanceTruthAndCompletenessEvaluator"/>.</param>
29-
public sealed partial class RelevanceTruthAndCompletenessEvaluator(
30-
RelevanceTruthAndCompletenessEvaluatorOptions? options = null) : ChatConversationEvaluator
29+
public sealed partial class RelevanceTruthAndCompletenessEvaluator : ChatConversationEvaluator
3130
{
3231
/// <summary>
3332
/// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="NumericMetric"/> returned by
@@ -61,9 +60,6 @@ public sealed partial class RelevanceTruthAndCompletenessEvaluator(
6160
ResponseFormat = ChatResponseFormat.Json
6261
};
6362

64-
private readonly RelevanceTruthAndCompletenessEvaluatorOptions _options =
65-
options ?? RelevanceTruthAndCompletenessEvaluatorOptions.Default;
66-
6763
/// <inheritdoc/>
6864
protected override EvaluationResult InitializeResult()
6965
{
@@ -101,17 +97,7 @@ userRequest is not null
10197

10298
string renderedHistory = builder.ToString();
10399

104-
string prompt =
105-
_options.IncludeReasoning
106-
? Prompts.BuildEvaluationPromptWithReasoning(
107-
renderedUserRequest,
108-
renderedModelResponse,
109-
renderedHistory)
110-
: Prompts.BuildEvaluationPrompt(
111-
renderedUserRequest,
112-
renderedModelResponse,
113-
renderedHistory);
114-
100+
string prompt = Prompts.BuildEvaluationPrompt(renderedUserRequest, renderedModelResponse, renderedHistory);
115101
return prompt;
116102
}
117103

@@ -192,23 +178,23 @@ void UpdateResult(Rating rating)
192178
relevance.Interpretation = relevance.InterpretScore();
193179
if (!string.IsNullOrWhiteSpace(rating.RelevanceReasoning))
194180
{
195-
relevance.AddDiagnostic(EvaluationDiagnostic.Informational(rating.RelevanceReasoning!));
181+
relevance.Reason = rating.RelevanceReasoning!;
196182
}
197183

198184
NumericMetric truth = result.Get<NumericMetric>(TruthMetricName);
199185
truth.Value = rating.Truth;
200186
truth.Interpretation = truth.InterpretScore();
201187
if (!string.IsNullOrWhiteSpace(rating.TruthReasoning))
202188
{
203-
truth.AddDiagnostic(EvaluationDiagnostic.Informational(rating.TruthReasoning!));
189+
truth.Reason = rating.TruthReasoning!;
204190
}
205191

206192
NumericMetric completeness = result.Get<NumericMetric>(CompletenessMetricName);
207193
completeness.Value = rating.Completeness;
208194
completeness.Interpretation = completeness.InterpretScore();
209195
if (!string.IsNullOrWhiteSpace(rating.CompletenessReasoning))
210196
{
211-
completeness.AddDiagnostic(EvaluationDiagnostic.Informational(rating.CompletenessReasoning!));
197+
completeness.Reason = rating.CompletenessReasoning!;
212198
}
213199

214200
if (!string.IsNullOrWhiteSpace(rating.Error))

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluatorOptions.cs

Lines changed: 0 additions & 41 deletions
This file was deleted.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Text.Encodings.Web;
5+
using System.Text.Json;
6+
using System.Text.Json.Serialization;
7+
using System.Text.Json.Serialization.Metadata;
8+
using static Microsoft.Extensions.AI.Evaluation.Reporting.Storage.AzureStorageResponseCache;
9+
10+
namespace Microsoft.Extensions.AI.Evaluation.Reporting.JsonSerialization;
11+
12+
internal static partial class AzureStorageJsonUtilities
13+
{
14+
[System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "CA1716:Identifiers should not match keywords", Justification = "Default matches the generated source naming convention.")]
15+
internal static class Default
16+
{
17+
private static JsonSerializerOptions? _options;
18+
internal static JsonSerializerOptions Options => _options ??= CreateJsonSerializerOptions(writeIndented: true);
19+
internal static JsonTypeInfo<CacheEntry> CacheEntryTypeInfo => Options.GetTypeInfo<CacheEntry>();
20+
internal static JsonTypeInfo<ScenarioRunResult> ScenarioRunResultTypeInfo => Options.GetTypeInfo<ScenarioRunResult>();
21+
}
22+
23+
internal static class Compact
24+
{
25+
private static JsonSerializerOptions? _options;
26+
internal static JsonSerializerOptions Options => _options ??= CreateJsonSerializerOptions(writeIndented: false);
27+
internal static JsonTypeInfo<ScenarioRunResult> ScenarioRunResultTypeInfo => Options.GetTypeInfo<ScenarioRunResult>();
28+
}
29+
30+
private static JsonTypeInfo<T> GetTypeInfo<T>(this JsonSerializerOptions options) => (JsonTypeInfo<T>)options.GetTypeInfo(typeof(T));
31+
32+
private static JsonSerializerOptions CreateJsonSerializerOptions(bool writeIndented)
33+
{
34+
var options = new JsonSerializerOptions(JsonContext.Default.Options)
35+
{
36+
WriteIndented = writeIndented,
37+
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
38+
};
39+
options.TypeInfoResolverChain.Add(AIJsonUtilities.DefaultOptions.TypeInfoResolver!);
40+
options.MakeReadOnly();
41+
return options;
42+
}
43+
44+
[JsonSerializable(typeof(ScenarioRunResult))]
45+
[JsonSerializable(typeof(CacheEntry))]
46+
[JsonSourceGenerationOptions(
47+
Converters = [
48+
typeof(AzureStorageCamelCaseEnumConverter<EvaluationDiagnosticSeverity>),
49+
typeof(AzureStorageCamelCaseEnumConverter<EvaluationRating>),
50+
typeof(AzureStorageTimeSpanConverter)
51+
],
52+
WriteIndented = true,
53+
IgnoreReadOnlyProperties = false,
54+
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
55+
PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
56+
private sealed partial class JsonContext : JsonSerializerContext;
57+
58+
}

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/JsonSerialization/AzureStorageSerializerContext.cs

Lines changed: 0 additions & 31 deletions
This file was deleted.

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/Storage/AzureStorageResponseCache.CacheEntry.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public static CacheEntry Read(
4545
CacheEntry cacheEntry =
4646
JsonSerializer.Deserialize(
4747
content.Value.Content.ToMemory().Span,
48-
AzureStorageSerializerContext.Default.CacheEntry)
48+
AzureStorageJsonUtilities.Default.CacheEntryTypeInfo)
4949
?? throw new JsonException(
5050
string.Format(CultureInfo.CurrentCulture, DeserializationFailedMessage, fileClient.Name));
5151

@@ -62,7 +62,7 @@ public static async Task<CacheEntry> ReadAsync(
6262
CacheEntry cacheEntry =
6363
await JsonSerializer.DeserializeAsync(
6464
content.Value.Content.ToStream(),
65-
AzureStorageSerializerContext.Default.CacheEntry,
65+
AzureStorageJsonUtilities.Default.CacheEntryTypeInfo,
6666
cancellationToken).ConfigureAwait(false)
6767
?? throw new JsonException(
6868
string.Format(CultureInfo.CurrentCulture, DeserializationFailedMessage, fileClient.Name));
@@ -76,7 +76,7 @@ public void Write(
7676
{
7777
MemoryStream stream = new();
7878

79-
JsonSerializer.Serialize(stream, this, AzureStorageSerializerContext.Default.CacheEntry);
79+
JsonSerializer.Serialize(stream, this, AzureStorageJsonUtilities.Default.CacheEntryTypeInfo);
8080

8181
_ = stream.Seek(0, SeekOrigin.Begin);
8282
_ = fileClient.Upload(stream, overwrite: true, cancellationToken);
@@ -91,7 +91,7 @@ public async Task WriteAsync(
9191
await JsonSerializer.SerializeAsync(
9292
stream,
9393
this,
94-
AzureStorageSerializerContext.Default.CacheEntry,
94+
AzureStorageJsonUtilities.Default.CacheEntryTypeInfo,
9595
cancellationToken).ConfigureAwait(false);
9696

9797
_ = stream.Seek(0, SeekOrigin.Begin);

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/Storage/AzureStorageResultStore.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public async IAsyncEnumerable<ScenarioRunResult> ReadResultsAsync(
119119

120120
ScenarioRunResult? result = await JsonSerializer.DeserializeAsync(
121121
content.Value.Content.ToStream(),
122-
AzureStorageSerializerContext.Default.ScenarioRunResult,
122+
AzureStorageJsonUtilities.Default.ScenarioRunResultTypeInfo,
123123
cancellationToken).ConfigureAwait(false)
124124
?? throw new JsonException(
125125
string.Format(CultureInfo.CurrentCulture, DeserializationFailedMessage, fileClient.Name));
@@ -171,7 +171,7 @@ public async ValueTask WriteResultsAsync(
171171
await JsonSerializer.SerializeAsync(
172172
stream,
173173
result,
174-
AzureStorageSerializerContext.Default.ScenarioRunResult,
174+
AzureStorageJsonUtilities.Default.ScenarioRunResultTypeInfo,
175175
cancellationToken).ConfigureAwait(false);
176176

177177
_ = stream.Seek(0, SeekOrigin.Begin);

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Formats/Html/HtmlReportWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public async ValueTask WriteReportAsync(
5656
await JsonSerializer.SerializeAsync(
5757
stream,
5858
dataset,
59-
SerializerContext.Compact.Dataset,
59+
JsonUtilities.Compact.DatasetTypeInfo,
6060
cancellationToken).ConfigureAwait(false);
6161

6262
#if NET

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/CSharp/Formats/Json/JsonReportWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public async ValueTask WriteReportAsync(
4545
await JsonSerializer.SerializeAsync(
4646
stream,
4747
dataset,
48-
SerializerContext.Default.Dataset,
48+
JsonUtilities.Default.DatasetTypeInfo,
4949
cancellationToken).ConfigureAwait(false);
5050
}
5151
}

0 commit comments

Comments
 (0)