Skip to content

Commit 4ee13d7

Browse files
authored
[Text Analytics] Add support for script detection (Azure#32197)
- Added `Script` property to `DetectedLanguage`. - Added `ScriptKind` enum.
1 parent 442c3ea commit 4ee13d7

12 files changed

+258
-5
lines changed

sdk/textanalytics/Azure.AI.TextAnalytics/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
- Added `WellKnownFhirVersion` and `HealthcareDocumentType` enums.
1010
- Added `TextAnalyticsActions.ExtractSummaryActions` to perform extractive summarization in a batch of actions.
1111
- Added `TextAnalyticsClient.StartExtractSummary` and `StartExtractSummaryAsync` to perform extractive summarization on a collection of documents.
12+
- Added `Script` property to `DetectedLanguage`.
13+
- Added `ScriptKind` enum.
1214

1315
### Breaking Changes
1416

sdk/textanalytics/Azure.AI.TextAnalytics/api/Azure.AI.TextAnalytics.netstandard2.0.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ public readonly partial struct DetectedLanguage
224224
public double ConfidenceScore { get { throw null; } }
225225
public string Iso6391Name { get { throw null; } }
226226
public string Name { get { throw null; } }
227+
public Azure.AI.TextAnalytics.ScriptKind? Script { get { throw null; } }
227228
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.TextAnalyticsWarning> Warnings { get { throw null; } }
228229
}
229230
public partial class DetectLanguageInput : Azure.AI.TextAnalytics.TextAnalyticsInput
@@ -925,6 +926,23 @@ internal RecognizePiiEntitiesResultCollection() : base (default(System.Collectio
925926
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
926927
}
927928
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
929+
public readonly partial struct ScriptKind : System.IEquatable<Azure.AI.TextAnalytics.ScriptKind>
930+
{
931+
private readonly object _dummy;
932+
private readonly int _dummyPrimitive;
933+
public ScriptKind(string value) { throw null; }
934+
public static Azure.AI.TextAnalytics.ScriptKind Latin { get { throw null; } }
935+
public bool Equals(Azure.AI.TextAnalytics.ScriptKind other) { throw null; }
936+
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
937+
public override bool Equals(object obj) { throw null; }
938+
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
939+
public override int GetHashCode() { throw null; }
940+
public static bool operator ==(Azure.AI.TextAnalytics.ScriptKind left, Azure.AI.TextAnalytics.ScriptKind right) { throw null; }
941+
public static implicit operator Azure.AI.TextAnalytics.ScriptKind (string value) { throw null; }
942+
public static bool operator !=(Azure.AI.TextAnalytics.ScriptKind left, Azure.AI.TextAnalytics.ScriptKind right) { throw null; }
943+
public override string ToString() { throw null; }
944+
}
945+
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
928946
public readonly partial struct SentenceOpinion
929947
{
930948
private readonly object _dummy;
@@ -1237,6 +1255,8 @@ public static partial class TextAnalyticsModelFactory
12371255
public static Azure.AI.TextAnalytics.ClassifyDocumentResult ClassifyDocumentResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
12381256
public static Azure.AI.TextAnalytics.ClassifyDocumentResult ClassifyDocumentResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.ClassificationCategoryCollection documentClassificationCollection, System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
12391257
public static Azure.AI.TextAnalytics.ClassifyDocumentResultCollection ClassifyDocumentResultCollection(System.Collections.Generic.IEnumerable<Azure.AI.TextAnalytics.ClassifyDocumentResult> classificationResultList, Azure.AI.TextAnalytics.TextDocumentBatchStatistics statistics, string projectName, string deploymentName) { throw null; }
1258+
public static Azure.AI.TextAnalytics.DetectedLanguage DetectedLanguage(string name, string iso6391Name, double confidenceScore, Azure.AI.TextAnalytics.ScriptKind script, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
1259+
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
12401260
public static Azure.AI.TextAnalytics.DetectedLanguage DetectedLanguage(string name, string iso6391Name, double confidenceScore, System.Collections.Generic.IList<Azure.AI.TextAnalytics.TextAnalyticsWarning> warnings = null) { throw null; }
12411261
public static Azure.AI.TextAnalytics.DetectLanguageResult DetectLanguageResult(string id, Azure.AI.TextAnalytics.TextAnalyticsError error) { throw null; }
12421262
public static Azure.AI.TextAnalytics.DetectLanguageResult DetectLanguageResult(string id, Azure.AI.TextAnalytics.TextDocumentStatistics statistics, Azure.AI.TextAnalytics.DetectedLanguage detectedLanguage) { throw null; }

sdk/textanalytics/Azure.AI.TextAnalytics/src/DetectedLanguage.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,16 @@ namespace Azure.AI.TextAnalytics
1313
public readonly struct DetectedLanguage
1414
{
1515
internal DetectedLanguage(DetectedLanguageInternal language, IList<TextAnalyticsWarning> warnings)
16-
: this(language.Name, language.Iso6391Name, language.ConfidenceScore, warnings)
16+
: this(language.Name, language.Iso6391Name, language.ConfidenceScore, language.Script, warnings)
1717
{
1818
}
1919

20-
internal DetectedLanguage(string name, string iso6391Name, double confidenceScore, IList<TextAnalyticsWarning> warnings)
20+
internal DetectedLanguage(string name, string iso6391Name, double confidenceScore, ScriptKind? script, IList<TextAnalyticsWarning> warnings)
2121
{
2222
Name = name;
2323
Iso6391Name = iso6391Name;
2424
ConfidenceScore = confidenceScore;
25+
Script = script;
2526
Warnings = new ReadOnlyCollection<TextAnalyticsWarning>(warnings);
2627
}
2728

@@ -43,6 +44,12 @@ internal DetectedLanguage(string name, string iso6391Name, double confidenceScor
4344
/// </summary>
4445
public double ConfidenceScore { get; }
4546

47+
/// <summary>
48+
/// Gets the non-native script of the detected language, if applicable
49+
/// (for example, "Latin" in the case of romanized Hindi).
50+
/// </summary>
51+
public ScriptKind? Script { get; }
52+
4653
/// <summary>
4754
/// Gets the warnings encountered while processing the document.
4855
/// </summary>

sdk/textanalytics/Azure.AI.TextAnalytics/src/Generated/Models/DetectedLanguageInternal.Serialization.cs

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/textanalytics/Azure.AI.TextAnalytics/src/Generated/Models/DetectedLanguageInternal.cs

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdk/textanalytics/Azure.AI.TextAnalytics/src/Generated/Models/ScriptKind.cs

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
using Azure.Core;
5+
6+
namespace Azure.AI.TextAnalytics
7+
{
8+
[CodeGenModel("ScriptKind")]
9+
public readonly partial struct ScriptKind
10+
{
11+
}
12+
}

sdk/textanalytics/Azure.AI.TextAnalytics/src/TextAnalyticsModelFactory.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,12 +216,28 @@ public static AnalyzeSentimentResultCollection AnalyzeSentimentResultCollection(
216216
/// <param name="confidenceScore">Sets the <see cref="DetectedLanguage.ConfidenceScore"/> property.</param>
217217
/// <param name="warnings">Sets the <see cref="DetectedLanguage.Warnings"/> property.</param>
218218
/// <returns>A new instance of <see cref="TextAnalytics.DetectedLanguage"/> for mocking purposes.</returns>
219+
[EditorBrowsable(EditorBrowsableState.Never)]
219220
public static DetectedLanguage DetectedLanguage(string name, string iso6391Name, double confidenceScore, IList<TextAnalyticsWarning> warnings = default)
220221
{
221222
warnings ??= new List<TextAnalyticsWarning>();
222223
return new DetectedLanguage(new DetectedLanguageInternal(name, iso6391Name, confidenceScore, default), warnings);
223224
}
224225

226+
/// <summary>
227+
/// Initializes a new instance of <see cref="TextAnalytics.DetectedLanguage"/> for mocking purposes.
228+
/// </summary>
229+
/// <param name="name">Sets the <see cref="DetectedLanguage.Name"/> property.</param>
230+
/// <param name="iso6391Name">Sets the <see cref="DetectedLanguage.Iso6391Name"/> property.</param>
231+
/// <param name="confidenceScore">Sets the <see cref="DetectedLanguage.ConfidenceScore"/> property.</param>
232+
/// <param name="script">Sets the <see cref="DetectedLanguage.Script"/> property.</param>
233+
/// <param name="warnings">Sets the <see cref="DetectedLanguage.Warnings"/> property.</param>
234+
/// <returns>A new instance of <see cref="TextAnalytics.DetectedLanguage"/> for mocking purposes.</returns>
235+
public static DetectedLanguage DetectedLanguage(string name, string iso6391Name, double confidenceScore, ScriptKind script, IList<TextAnalyticsWarning> warnings = default)
236+
{
237+
warnings ??= new List<TextAnalyticsWarning>();
238+
return new DetectedLanguage(new DetectedLanguageInternal(name, iso6391Name, confidenceScore, script), warnings);
239+
}
240+
225241
/// <summary>
226242
/// Initializes a new instance of <see cref="TextAnalytics.DetectLanguageResult"/> for mocking purposes.
227243
/// </summary>

sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.Legacy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ internal static TextAnalyticsOperationStatus ConvertToTextAnalyticsOperationStat
130130
internal static DetectedLanguage ConvertToDetectedLanguage(Legacy.DocumentLanguage documentLanguage)
131131
{
132132
var detected = documentLanguage.DetectedLanguage;
133-
return new DetectedLanguage(detected.Name, detected.Iso6391Name, detected.ConfidenceScore, ConvertToWarnings(documentLanguage.Warnings));
133+
return new DetectedLanguage(detected.Name, detected.Iso6391Name, detected.ConfidenceScore, default, ConvertToWarnings(documentLanguage.Warnings));
134134
}
135135

136136
internal static DetectLanguageResultCollection ConvertToDetectLanguageResultCollection(Legacy.LanguageResult results, IDictionary<string, int> idToIndexMap)

sdk/textanalytics/Azure.AI.TextAnalytics/tests/DetectLanguageTests.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Collections.Generic;
6+
using System.Linq;
67
using System.Threading.Tasks;
78
using Azure.Core.TestFramework;
89
using NUnit.Framework;
@@ -266,11 +267,39 @@ public void DetectLanguageBatchDisableServiceLogsThrows()
266267
Assert.AreEqual("TextAnalyticsRequestOptions.DisableServiceLogs is not available in API version v3.0. Use service API version v3.1 or newer.", ex.Message);
267268
}
268269

270+
[RecordedTest]
271+
[ServiceVersion(Min = TextAnalyticsClientOptions.ServiceVersion.V2022_10_01_Preview)]
272+
public async Task DetectLanguageBatchWithScriptTest()
273+
{
274+
TextAnalyticsClient client = GetClient();
275+
276+
// BUGBUG: The only model version that currently supports script detection is 2022-04-10-preview, which is even older
277+
// than the latest GA API version (i.e., 2022-05-01). Ideally, we shouldn't have to pin to such an old version.
278+
// See https://github.com/Azure/azure-sdk-for-net/issues/32234.
279+
TextAnalyticsRequestOptions options = new TextAnalyticsRequestOptions() { ModelVersion = "2022-04-10-preview" };
280+
281+
DetectLanguageResultCollection results = await client.DetectLanguageBatchAsync(new List<DetectLanguageInput>() {
282+
new("1", "What is your name?"),
283+
new("2", "Tumhara naam kya hai?")
284+
}, options: options);
285+
286+
DetectLanguageResult result1 = results.Where(result => result.Id == "1").FirstOrDefault();
287+
Assert.IsNotNull(result1);
288+
Assert.IsNull(result1.PrimaryLanguage.Script);
289+
290+
DetectLanguageResult result2 = results.Where(result => result.Id == "2").FirstOrDefault();
291+
Assert.IsNotNull(result2);
292+
Assert.AreEqual(ScriptKind.Latin, result2.PrimaryLanguage.Script);
293+
294+
ValidateBatchDocumentsResult(results);
295+
}
296+
269297
private void ValidateInDocumenResult(DetectedLanguage language)
270298
{
271299
Assert.That(language.Name, Is.Not.Null.And.Not.Empty);
272300
Assert.That(language.Iso6391Name, Is.Not.Null.And.Not.Empty);
273301
Assert.GreaterOrEqual(language.ConfidenceScore, 0.0);
302+
Assert.LessOrEqual(language.ConfidenceScore, 1.0);
274303
Assert.IsNotNull(language.Warnings);
275304
}
276305

0 commit comments

Comments
 (0)