Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit b300d92

Browse files
authored
Add language support for qnamaker build related api (#1083)
* support output to file for kb:export command * add language support for qna builder * add more locale to language mappings * add more test cases and validation for locale or language support * fix typo
1 parent 17c7565 commit b300d92

File tree

9 files changed

+397
-22
lines changed

9 files changed

+397
-22
lines changed

packages/lu/src/parser/cross-train/crossTrainer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i
355355
try {
356356
for (const qnaObjectId of Array.from(qnaFileIdToResourceMap.keys())) {
357357
let fileName = path.basename(qnaObjectId, path.extname(qnaObjectId))
358-
const culture = fileHelper.getCultureFromPath(qnaObjectId)
358+
const culture = fileHelper.getQnACultureFromPath(qnaObjectId)
359359
fileName = culture ? fileName.substring(0, fileName.length - culture.length - 1) : fileName
360360

361361
const luObjectId = Array.from(luFileIdToResourceMap.keys()).find(x => x.toLowerCase() === qnaObjectId.toLowerCase())

packages/lu/src/parser/lubuild/builder.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ export class Builder {
4343
let fileCulture: string
4444
let fileName: string
4545

46-
let cultureFromPath = fileHelper.getCultureFromPath(file)
46+
let cultureFromPath = fileHelper.getLuisCultureFromPath(file)
4747
if (cultureFromPath) {
4848
fileCulture = cultureFromPath
4949
let fileNameWithCulture = path.basename(file, path.extname(file))

packages/lu/src/parser/qnabuild/builder.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const Content = require('./../lu/qna')
2020
const KB = require('./../qna/qnamaker/kb')
2121
const recognizerType = require('./../utils/enums/recognizertypes')
2222
const qnaOptions = require('./../lu/qnaOptions')
23+
const localeToQnALanguageMap = require('./../utils/enums/localeToQnALanguageMap')
2324

2425
export class Builder {
2526
private readonly handler: (input: string) => any
@@ -38,7 +39,7 @@ export class Builder {
3839
for (const file of files) {
3940
let fileCulture: string
4041
let fileName: string
41-
let cultureFromPath = fileHelper.getCultureFromPath(file)
42+
let cultureFromPath = fileHelper.getQnACultureFromPath(file)
4243
if (cultureFromPath) {
4344
fileCulture = cultureFromPath
4445
let fileNameWithCulture = path.basename(file, path.extname(file))
@@ -48,6 +49,10 @@ export class Builder {
4849
fileName = path.basename(file, path.extname(file))
4950
}
5051

52+
if (!fileCulture) {
53+
throw (new exception(retCode.errorCode.INVALID_INPUT_FILE, 'Culture is not set or unsupported by qnamaker service.'))
54+
}
55+
5156
let fileContent = ''
5257

5358
let qnaFiles = await fileHelper.getLuObjects(undefined, file, true, fileExtEnum.QnAFile)
@@ -209,7 +214,15 @@ export class Builder {
209214
// set kb name
210215
if (!currentQna.kb.name) currentQna.kb.name = `${botName}(${suffix}).${qnamakerContent.language}.qna`
211216

217+
// set kb locale and map it to language that qna service can recognize
218+
let locale = qnamakerContent.language
219+
let language = localeToQnALanguageMap[locale]
220+
if (!language) {
221+
throw new Error(`${locale} is not supported in current qnamaker service.`)
222+
}
223+
212224
let currentKB = currentQna.kb
225+
currentKB.language = language
213226
let currentAlt = currentQna.alterations
214227
let hostName = ''
215228
let kbId = ''
@@ -446,6 +459,7 @@ export class Builder {
446459
await delay(delayDuration)
447460
const emptyKBJson = {
448461
name: currentKB.name,
462+
language: currentKB.language,
449463
qnaList: [],
450464
urls: [],
451465
files: []
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License.
4+
*/
5+
module.exports = {
6+
'ar': 'Arabic',
7+
'ar-dz': 'Arabic',
8+
'ar-bh': 'Arabic',
9+
'ar-eg': 'Arabic',
10+
'ar-iq': 'Arabic',
11+
'ar-jo': 'Arabic',
12+
'ar-kw': 'Arabic',
13+
'ar-lb': 'Arabic',
14+
'ar-ly': 'Arabic',
15+
'ar-ma': 'Arabic',
16+
'ar-om': 'Arabic',
17+
'ar-qa': 'Arabic',
18+
'ar-sa': 'Arabic',
19+
'ar-sy': 'Arabic',
20+
'ar-tn': 'Arabic',
21+
'ar-ae': 'Arabic',
22+
'ar-ye': 'Arabic',
23+
'hy': 'Armenian',
24+
'hy-am': 'Armenian',
25+
'bn': 'Bangla',
26+
'bn-bd': 'Bangla',
27+
'bn-in': 'Bangla',
28+
'eu': 'Basque',
29+
'eu-es': 'Basque',
30+
'bg': 'Bulgarian',
31+
'bg-bg': 'Bulgarian',
32+
'ca': 'Catalan',
33+
'ca-es': 'Catalan',
34+
'zh': 'Chinese_Simplified',
35+
'zh-hans': 'Chinese_Simplified',
36+
'zh-cn': 'Chinese_Simplified',
37+
'zh-sg': 'Chinese_Simplified',
38+
'zh-hant': 'Chinese_Traditional',
39+
'zh-hk': 'Chinese_Traditional',
40+
'zh-mo': 'Chinese_Traditional',
41+
'zh-tw': 'Chinese_Traditional',
42+
'hr': 'Croatian',
43+
'hr-ba': 'Croatian',
44+
'hr-hr': 'Croatian',
45+
'cs': 'Czech',
46+
'cs-cz': 'Czech',
47+
'da': 'Danish',
48+
'da-dk': 'Danish',
49+
'nl': 'Dutch',
50+
'nl-be': 'Dutch',
51+
'nl-nl': 'Dutch',
52+
'en': 'English',
53+
'en-as': 'English',
54+
'en-ai': 'English',
55+
'en-ag': 'English',
56+
'en-au': 'English',
57+
'en-at': 'English',
58+
'en-bs': 'English',
59+
'en-bb': 'English',
60+
'en-be': 'English',
61+
'en-bz': 'English',
62+
'en-bm': 'English',
63+
'en-bw': 'English',
64+
'en-io': 'English',
65+
'en-vg': 'English',
66+
'en-bi': 'English',
67+
'en-cm': 'English',
68+
'en-ca': 'English',
69+
'en-029': 'English',
70+
'en-ky': 'English',
71+
'en-cx': 'English',
72+
'en-cc': 'English',
73+
'en-ck': 'English',
74+
'en-cy': 'English',
75+
'en-dk': 'English',
76+
'en-dm': 'English',
77+
'en-er': 'English',
78+
'en-150': 'English',
79+
'en-fk': 'English',
80+
'en-fj': 'English',
81+
'en-fi': 'English',
82+
'en-gm': 'English',
83+
'en-de': 'English',
84+
'en-gh': 'English',
85+
'en-gi': 'English',
86+
'en-gd': 'English',
87+
'en-gu': 'English',
88+
'en-gg': 'English',
89+
'en-gy': 'English',
90+
'en-hk': 'English',
91+
'en-in': 'English',
92+
'en-id': 'English',
93+
'en-ie': 'English',
94+
'en-im': 'English',
95+
'en-il': 'English',
96+
'en-jm': 'English',
97+
'en-je': 'English',
98+
'en-ke': 'English',
99+
'en-ki': 'English',
100+
'en-ls': 'English',
101+
'en-lr': 'English',
102+
'en-mo': 'English',
103+
'en-mg': 'English',
104+
'en-mw': 'English',
105+
'en-my': 'English',
106+
'en-mt': 'English',
107+
'en-mh': 'English',
108+
'en-mu': 'English',
109+
'en-fm': 'English',
110+
'en-ms': 'English',
111+
'en-na': 'English',
112+
'en-nr': 'English',
113+
'en-nl': 'English',
114+
'en-nz': 'English',
115+
'en-ng': 'English',
116+
'en-nu': 'English',
117+
'en-nf': 'English',
118+
'en-mp': 'English',
119+
'en-pk': 'English',
120+
'en-pw': 'English',
121+
'en-pg': 'English',
122+
'en-ph': 'English',
123+
'en-pn': 'English',
124+
'en-pr': 'English',
125+
'en-rw': 'English',
126+
'en-kn': 'English',
127+
'en-lc': 'English',
128+
'en-vc': 'English',
129+
'en-ws': 'English',
130+
'en-sc': 'English',
131+
'en-sl': 'English',
132+
'en-sg': 'English',
133+
'en-sx': 'English',
134+
'en-si': 'English',
135+
'en-sb': 'English',
136+
'en-za': 'English',
137+
'en-ss': 'English',
138+
'en-sh': 'English',
139+
'en-sd': 'English',
140+
'en-sz': 'English',
141+
'en-se': 'English',
142+
'en-ch': 'English',
143+
'en-tz': 'English',
144+
'en-tk': 'English',
145+
'en-to': 'English',
146+
'en-tt': 'English',
147+
'en-tc': 'English',
148+
'en-tv': 'English',
149+
'en-um': 'English',
150+
'en-vi': 'English',
151+
'en-ug': 'English',
152+
'en-gb': 'English',
153+
'en-us': 'English',
154+
'en-vu': 'English',
155+
'en-001': 'English',
156+
'en-zm': 'English',
157+
'en-zw': 'English',
158+
'et': 'Estonian',
159+
'et-ee': 'Estonian',
160+
'fi': 'Finnish',
161+
'fi-fi': 'Finnish',
162+
'fr': 'French',
163+
'fr-be': 'French',
164+
'fr-cm': 'French',
165+
'fr-ca': 'French',
166+
'fr-029': 'French',
167+
'fr-ci': 'French',
168+
'fr-fr': 'French',
169+
'fr-ht': 'French',
170+
'fr-lu': 'French',
171+
'fr-ml': 'French',
172+
'fr-mc': 'French',
173+
'fr-ma': 'French',
174+
'fr-re': 'French',
175+
'fr-sn': 'French',
176+
'fr-ch': 'French',
177+
'fr-cd': 'French',
178+
'gl': 'Galician',
179+
'gl-es': 'Galician',
180+
'de': 'German',
181+
'de-at': 'German',
182+
'de-de': 'German',
183+
'de-li': 'German',
184+
'de-lu': 'German',
185+
'de-ch': 'German',
186+
'el': 'Greek',
187+
'el-gr': 'Greek',
188+
'gu': 'Gujarati',
189+
'gu-in': 'Gujarati',
190+
'he': 'Hebrew',
191+
'he-il': 'Hebrew',
192+
'hi': 'Hindi',
193+
'hi-in': 'Hindi',
194+
'hu': 'Hungarian',
195+
'hu-hu': 'Hungarian',
196+
'is': 'Icelandic',
197+
'is-is': 'Icelandic',
198+
'id': 'Indonesian',
199+
'id-id': 'Indonesian',
200+
'ga': 'Irish',
201+
'ga-ie': 'Irish',
202+
'it': 'Italian',
203+
'it-it': 'Italian',
204+
'it-ch': 'Italian',
205+
'ja': 'Japanese',
206+
'ja-jp': 'Japanese',
207+
'kn': 'Kannada',
208+
'kn-in': 'Kannada',
209+
'ko': 'Korean',
210+
'ko-kr': 'Korean',
211+
'lv': 'Latvian',
212+
'lv-lv': 'Latvian',
213+
'lt': 'Lithuanian',
214+
'lt-lt': 'Lithuanian',
215+
'ml': 'Malayalam',
216+
'ml-in': 'Malayalam',
217+
'ms': 'Malay',
218+
'ms-bn': 'Malay',
219+
'ms-my': 'Malay',
220+
'no': 'Norwegian',
221+
'nb': 'Norwegian',
222+
'nb-no': 'Norwegian',
223+
'nn': 'Norwegian',
224+
'nn-no': 'Norwegian',
225+
'pl': 'Polish',
226+
'pl-pl': 'Polish',
227+
'pt': 'Portuguese',
228+
'pt-br': 'Portuguese',
229+
'pt-pt': 'Portuguese',
230+
'pa': 'Punjabi',
231+
'pa-arab': 'Punjabi',
232+
'pa-in': 'Punjabi',
233+
'pa-arab-pk': 'Punjabi',
234+
'ro': 'Romanian',
235+
'ro-md': 'Romanian',
236+
'ro-ro': 'Romanian',
237+
'ru': 'Russian',
238+
'ru-md': 'Russian',
239+
'ru-ru': 'Russian',
240+
'sr': 'Serbian_Cyrillic',
241+
'sr-cyrl': 'Serbian_Cyrillic',
242+
'sr-cyrl-ba': 'Serbian_Cyrillic',
243+
'sr-cyrl-me': 'Serbian_Cyrillic',
244+
'sr-cyrl-rs': 'Serbian_Cyrillic',
245+
'sr-latn': 'Serbian_Latin',
246+
'sr-latn-ba': 'Serbian_Latin',
247+
'sr-latn-me': 'Serbian_Latin',
248+
'sr-latn-rs': 'Serbian_Latin',
249+
'sk': 'Slovak',
250+
'sk-sk': 'Slovak',
251+
'sl': 'Slovenian',
252+
'sl-si': 'Slovenian',
253+
'es': 'Spanish',
254+
'es-ar': 'Spanish',
255+
'es-bo': 'Spanish',
256+
'es-cl': 'Spanish',
257+
'es-co': 'Spanish',
258+
'es-cr': 'Spanish',
259+
'es-cu': 'Spanish',
260+
'es-do': 'Spanish',
261+
'es-ec': 'Spanish',
262+
'es-sv': 'Spanish',
263+
'es-gt': 'Spanish',
264+
'es-hn': 'Spanish',
265+
'es-419': 'Spanish',
266+
'es-mx': 'Spanish',
267+
'es-ni': 'Spanish',
268+
'es-pa': 'Spanish',
269+
'es-py': 'Spanish',
270+
'es-pe': 'Spanish',
271+
'es-pr': 'Spanish',
272+
'es-es': 'Spanish',
273+
'es-us': 'Spanish',
274+
'es-uy': 'Spanish',
275+
'es-ve': 'Spanish',
276+
'sv': 'Swedish',
277+
'sv-fi': 'Swedish',
278+
'sv-se': 'Swedish',
279+
'ta': 'Tamil',
280+
'ta-in': 'Tamil',
281+
'ta-lk': 'Tamil',
282+
'te': 'Telugu',
283+
'te-in': 'Telugu',
284+
'th': 'Thai',
285+
'th-th': 'Thai',
286+
'tr': 'Turkish',
287+
'tr-tr': 'Turkish',
288+
'uk': 'Ukrainian',
289+
'uk-ua': 'Ukrainian',
290+
'ur': 'Urdu',
291+
'ur-in': 'Urdu',
292+
'ur-pk': 'Urdu',
293+
'vi': 'Vietnamese',
294+
'vi-vn': 'Vietnamese'
295+
};

0 commit comments

Comments
 (0)