Skip to content

Commit 8a759a3

Browse files
authored
Add support for hi-IN Orthography Information (#2945)
* Add support for hi-IN OrthographyInformation * Default to english parsing * Move MAUI to .Net 9 to resolve build error * Remove bom * Add trace warning macro when not using SDK macros
1 parent 388938d commit 8a759a3

File tree

7 files changed

+126
-5
lines changed

7 files changed

+126
-5
lines changed

samples/cpp/intent-recognition/samples/intent_recognizer/integer_entity.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ void CSpxIntegerEntity::Init(const std::string& name, const OrthographyInformati
8686
else
8787
{
8888
// fall back to default and use English
89-
SPX_TRACE_ERROR(
90-
"No explicit integer parser for '%s' language. Disabling integer parsing",
89+
SPX_TRACE_WARNING(
90+
"No explicit integer parser for '%s' language. Fallig back to English",
9191
orthography.Name.c_str());
92-
m_integerParser = std::static_pointer_cast<ISpxIntegerParser>(std::make_shared<NoOpIntegerParser>());
92+
m_integerParser = std::static_pointer_cast<ISpxIntegerParser>(std::make_shared<CSpxENIntegerParser>());
9393
}
9494
}
9595

samples/cpp/intent-recognition/samples/intent_recognizer/locale_information.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ namespace Intent {
1717
namespace Impl {
1818
namespace Locales {
1919

20-
static const std::array<const OrthographyInformation, 6> ORTHOGRAPHY_INFORMATION =
20+
static const std::array<const OrthographyInformation, 7> ORTHOGRAPHY_INFORMATION =
2121
{
2222
// English (the default one to use) should be first
2323
OrthographyInformation{
@@ -73,6 +73,15 @@ namespace Locales {
7373
"!。??", // SentenceEndCharacters
7474
{ }, // WordBoundary
7575
false // RightToLeft
76+
},
77+
OrthographyInformation{
78+
"hi",
79+
"\t\r\n ", // Whitespace
80+
"[]{}()!?।॥.,:;'\"@#$₹%&*+-=<>/\\^_=`~‑—‒⁻₋−➖﹣-…''""§@*/\\&#†‡′″£₤£﹩$€؉‰٪﹪%ʼ՚᾽᾿''∶︓﹕:․।︒﹒.。¥¥₩₩₨،٫⹁、︐︑﹐﹑،、⁺₊➕﬩﹢+", // InputPunctuation
81+
"!?।॥.,:;'\"@#$₹%&*+-=<>/\\^_=`~‑—‒⁻₋−➖﹣-…''""§@*/\\&#†‡′″£₤£﹩$€؉‰٪﹪%ʼ՚᾽᾿''∶︓﹕:․।︒﹒.。¥¥₩₩₨،٫⹁、︐︑﹐﹑،、⁺₊➕﬩﹢+", // PatternPunctuation
82+
"।!.?", // SentenceEndCharacters
83+
{ ' ' }, // WordBoundary
84+
false // RightToLeft
7685
}
7786
};
7887

samples/cpp/intent-recognition/samples/intent_recognizer/stdafx.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
#endif
3333
#define SPX_TRACE_ERROR(...)
3434

35+
#ifdef SPX_TRACE_WARNING
36+
#undef SPX_TRACE_WARNING
37+
#endif
38+
#define SPX_TRACE_WARNING(...)
39+
3540
#ifdef UNUSED
3641
#undef UNUSED
3742
#endif

samples/cpp/intent-recognition/samples/samples.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Copyright (c) Microsoft. All rights reserved.
33
// Licensed under the MIT license. See https://aka.ms/csspeech/license for the full license information.
44
//
5+
#include "intent_recognizer/stdafx.h"
56

67
#include <speechapi_cxx.h> // from Speech SDK
78
#include <intentapi_cxx.h> // from this project
@@ -5603,3 +5604,23 @@ TEST_CASE("IntentRecognizer::PatternMatching::ZH Prebuilt integer entities", "[z
56035604
intentResult = intentRecognizer->RecognizeOnceAsync(" ").get();
56045605
RequireIntentId(intentResult, "");
56055606
}
5607+
5608+
5609+
TEST_CASE("IntentRecognizer::PatternMatching::IntentList for hi", "[hi]")
5610+
{
5611+
SECTION("Basic Hindi intents")
5612+
{
5613+
auto phrase = std::string("पिछले सात कैरेक्टर को बोल्ड करें।");
5614+
auto intent = std::string("पिछले सात कैरेक्टर को बोल्ड करें");
5615+
5616+
auto intentRecognizer = IntentRecognizer::FromLanguage("hi-IN");
5617+
intentRecognizer->AddIntent(intent, "intent");
5618+
5619+
auto intentResult = intentRecognizer->RecognizeOnceAsync(phrase).get();
5620+
5621+
// Log the results
5622+
SPX_TRACE_INFO("Intent Id: %s", intentResult->IntentId.c_str());
5623+
SPX_TRACE_INFO("Recognized Text: %s", intentResult->GetDetailedResult().c_str());
5624+
RequireIntentId(intentResult, "intent");
5625+
}
5626+
}

samples/cpp/intent-recognition/samples/test_utils.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
#include <ajv.h>
77
#include <intentapi_cxx.h>
8+
#include <string>
9+
#include <sstream>
10+
#include <iomanip>
811

912
#include "catch2/catch_amalgamated.hpp"
1013
#include "test_utils.h"
@@ -79,3 +82,84 @@ void RequireAlternateCount(std::shared_ptr<IntentRecognitionResult> result, int
7982
std::cout << "*** Intent Count: " << actualCount << "\n";
8083
#endif
8184
}
85+
86+
87+
std::string stringToHex(const std::string& str) {
88+
std::ostringstream oss;
89+
oss << std::hex << std::setfill('0');
90+
91+
for (unsigned char c : str) {
92+
oss << std::setw(2) << static_cast<int>(c) << " ";
93+
}
94+
95+
return oss.str();
96+
}
97+
98+
std::string dumpStringToUTF8(const std::string& str, bool escapeAscii) {
99+
std::ostringstream oss;
100+
101+
for (size_t i = 0; i < str.size(); ) {
102+
unsigned char c = str[i];
103+
uint32_t codepoint = 0;
104+
int numBytes = 0;
105+
106+
// Determine number of bytes in this UTF-8 character
107+
if ((c & 0x80) == 0x00) {
108+
// 1-byte character (ASCII)
109+
codepoint = c;
110+
numBytes = 1;
111+
}
112+
else if ((c & 0xE0) == 0xC0) {
113+
// 2-byte character
114+
codepoint = c & 0x1F;
115+
numBytes = 2;
116+
}
117+
else if ((c & 0xF0) == 0xE0) {
118+
// 3-byte character
119+
codepoint = c & 0x0F;
120+
numBytes = 3;
121+
}
122+
else if ((c & 0xF8) == 0xF0) {
123+
// 4-byte character
124+
codepoint = c & 0x07;
125+
numBytes = 4;
126+
}
127+
else {
128+
// Invalid UTF-8, skip this byte
129+
oss << "\\x" << std::hex << std::setw(2) << std::setfill('0')
130+
<< static_cast<int>(c);
131+
i++;
132+
continue;
133+
}
134+
135+
// Read continuation bytes
136+
for (int j = 1; j < numBytes && (i + j) < str.size(); j++) {
137+
unsigned char cont = str[i + j];
138+
if ((cont & 0xC0) != 0x80) {
139+
// Invalid continuation byte
140+
break;
141+
}
142+
codepoint = (codepoint << 6) | (cont & 0x3F);
143+
}
144+
145+
// Output the character
146+
if (numBytes == 1 && !escapeAscii && std::isprint(c)) {
147+
// Keep printable ASCII as-is
148+
oss << static_cast<char>(c);
149+
}
150+
else if (codepoint <= 0xFFFF) {
151+
// Use \uHHHH for BMP characters
152+
oss << "\\u" << std::hex << std::setw(4) << std::setfill('0')
153+
<< codepoint;
154+
}
155+
else {
156+
// Use \UHHHHHHHH for characters outside BMP
157+
oss << "\\U" << std::hex << std::setw(8) << std::setfill('0')
158+
<< codepoint;
159+
}
160+
161+
i += numBytes;
162+
}
163+
164+
return oss.str();
165+
}

samples/cpp/intent-recognition/samples/test_utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ void RequireEntity(std::shared_ptr<IntentRecognitionResult> result, std::string
1313
void RequireNoEntity(std::shared_ptr<IntentRecognitionResult> result, std::string expectedEntityId);
1414
void RequireAlternateIntentId(std::shared_ptr<IntentRecognitionResult> result, std::string expectedIntentId);
1515
void RequireAlternateCount(std::shared_ptr<IntentRecognitionResult> result, int expectedCount);
16+
std::string stringToHex(const std::string& str);
17+
std::string dumpStringToUTF8(const std::string& str, bool escapeAscii = false);
1618

1719
inline bool exists(const std::string& name) {
1820
return std::ifstream(name.c_str()).good();

samples/csharp/maui/speech-to-text/speech-to-text/speech-to-text.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<ApplicationDisplayVersion>1.0</ApplicationDisplayVersion>
2424
<ApplicationVersion>1</ApplicationVersion>
2525

26-
<SupportedOSPlatformVersion Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) == 'ios'">11.0</SupportedOSPlatformVersion>
26+
<SupportedOSPlatformVersion Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) == 'ios'">15.0</SupportedOSPlatformVersion>
2727
<SupportedOSPlatformVersion Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) == 'maccatalyst'">13.1</SupportedOSPlatformVersion>
2828
<SupportedOSPlatformVersion Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) == 'android'">21.0</SupportedOSPlatformVersion>
2929
<SupportedOSPlatformVersion Condition="$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) == 'windows'">10.0.19041.0</SupportedOSPlatformVersion>

0 commit comments

Comments
 (0)