Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 8518fb1

Browse files
feich-mslei9444vishwacsenaVishwac Sena Kannan
authored
cross train CLI (#690)
* add qna cross training * Revert "remove cross-train cli related changes and only keep the api" This reverts commit ab1fb84. * call qnaCrossTrain * add qna cross traini * expose function * optimize and add more corner test cases * rename crossTrainer class * remove cli code pieces and optimize tests * move cross-train.ts to lu folder * optimize function headers and naming * typo: dialogname to dialogName * add more test cases * remove unused ' * add cross-train.ts cli file for test convenience and will remove it once review done * fix bugs * trigger ci * update qna cross train based on feedbacks * add config path parameter to specify the config file * fix posttest failure * remove dup utterances in interuption intents * remove dup 1 * optimize all de-dup logic * resolve all feedbacks * add test cases to test dedup functions * fix tslint errors * add more friendly description for config CLI parameter * fix config full path issue and remove uncessary loop validation * move cross train CLI to a new package named cross-train * fix build error in CI validation * add missing package in package.json * remove unused file * fix typo * optimize dedep logic and resolve feedbacks * adjust test cases * support qna source, id, prompts reconstruction in cross training * typo: interuption to interruption * move CLI test cases from lu to cross-train * adjust test cases of cross-train CLI to make it more readable * update some function to support load file content * support auto detect config based on root dialog and file system * update some code style * update some ref * optimize config parameter * add fileHelper.ts * fix typo * remove semicolon * add the object * change the value * make all the path the same parttern * optimize * adjust test cases to use file name as id * fix typo * update fileHelper.ts * fix config id issue * update pnpm lock * update pnpm * fix test cases * fix tslint * optimzie cross train to care only trigger intents and allow empty trigger intent or dialog * merge PR 706 and adjust test cases to cover more corner cases * support multi trigger intents point to same lu file * optimize config structure * optimize config object parser * fix bug * adjust pnpm * fix minor typo * docs. * remove cross-train from top level command and add it to luis command * fix test * add cross train in qna maker cli * support to write corsstrained recognizer * support crosstrained recognizer in qnamaker * refine the write dialog logic to support crosstrained dialog * update docs * fix crosstrained recognizer configuration issue * remove patterns from lu in crosstrained qna queations * fix test case * fix tslint * fix comments of reviewer Co-authored-by: leilzh <[email protected]> Co-authored-by: Vishwac Sena Kannan <[email protected]> Co-authored-by: Vishwac Sena Kannan <[email protected]>
1 parent 9ed91f7 commit 8518fb1

File tree

143 files changed

+3272
-821
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

143 files changed

+3272
-821
lines changed

common/config/rush/pnpm-lock.yaml

Lines changed: 1004 additions & 745 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*!
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License.
4+
*/
5+
6+
const fs = require('fs-extra')
7+
const path = require('path')
8+
const exception = require('../utils/exception')
9+
const retCode = require('../utils/enums/CLI-errors')
10+
const fileHelper = require('../../utils/filehelper')
11+
12+
const dialogExt = '.dialog'
13+
const luExt = '.lu'
14+
15+
module.exports = {
16+
generateConfig: async function (inputFolder, rootDialogFile) {
17+
let dialogFiles = []
18+
await getDialogFiles(inputFolder, dialogFiles)
19+
20+
let rootDialogObject = JSON.parse(await getInputFromFile(rootDialogFile))
21+
rootDialogObject.path = rootDialogFile
22+
rootDialogObject.isRoot = true
23+
24+
let dialogObjects = []
25+
for (const dialogFile of dialogFiles) {
26+
let dialogObject = JSON.parse(await getInputFromFile(dialogFile))
27+
dialogObject.path = dialogFile
28+
dialogObjects.push(dialogObject)
29+
}
30+
31+
const configObject = createConfig(rootDialogObject, dialogObjects, inputFolder)
32+
33+
return JSON.stringify(configObject)
34+
}
35+
}
36+
37+
const getDialogFiles = async function (inputFolder, results) {
38+
fs.readdirSync(inputFolder).forEach(async dirContent => {
39+
dirContent = path.resolve(inputFolder, dirContent)
40+
if (fs.statSync(dirContent).isDirectory()) {
41+
await getDialogFiles(dirContent, results)
42+
}
43+
44+
if (fs.statSync(dirContent).isFile()) {
45+
if (dirContent.endsWith(dialogExt)) {
46+
results.push(dirContent)
47+
}
48+
}
49+
})
50+
}
51+
52+
const getInputFromFile = async function (path) {
53+
if (path) {
54+
try {
55+
return await fileHelper.getContentFromFile(path)
56+
} catch (error) {
57+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Failed to read file: ${error}`))
58+
}
59+
}
60+
return ''
61+
}
62+
63+
const createConfig = function (rootDialog, dialogs, configPath) {
64+
let result = {}
65+
66+
const key = createPath(rootDialog.path, configPath)
67+
const rootLuPath = rootDialog.path.replace(dialogExt, luExt)
68+
69+
if (!fs.existsSync(rootLuPath)) {
70+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Failed to parse mapping rules config from file system: ${rootLuPath} does not exist. Please provide config file by --config`))
71+
}
72+
73+
rootDialog.triggers.forEach(trigger => {
74+
if (trigger.$type && trigger.$type === 'Microsoft.OnIntent') {
75+
const actions = trigger.actions || []
76+
for (const action of actions) {
77+
if (action.$type !== 'Microsoft.BeginDialog') continue
78+
79+
const dialogName = action.dialog
80+
const target = dialogs.find(dialog => path.basename(dialog.path, dialogExt) === dialogName)
81+
82+
if (!target) continue
83+
84+
const relativePath = createPath(target.path, configPath)
85+
if (!result[key]) result[key] = { triggers: {} }
86+
if (!result[key].triggers[trigger.intent]) {
87+
result[key].triggers[trigger.intent] = relativePath
88+
} else if (typeof result[key].triggers[trigger.intent] === 'string') {
89+
result[key].triggers[trigger.intent] = [result[key].triggers[trigger.intent], relativePath]
90+
} else {
91+
result[key].triggers[trigger.intent].push(relativePath)
92+
}
93+
94+
result = { ...result, ...createConfig(target, dialogs, configPath) }
95+
}
96+
}
97+
})
98+
99+
if (rootDialog.isRoot && result[key]) result[key].rootDialog = true
100+
101+
return result
102+
}
103+
104+
const createPath = function (dialogPath, configPath) {
105+
const luFilePath = dialogPath.replace('.dialog', '.lu')
106+
const relativePath = path.relative(configPath, luFilePath)
107+
return relativePath
108+
}

packages/lu/src/parser/cross-train/cross-train.js

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,24 @@ const fileExtEnum = require('../utils/helpers').FileExtTypeEnum
1010
const exception = require('../utils/exception')
1111
const retCode = require('../utils/enums/CLI-errors')
1212
const crossTrainer = require('./crossTrainer')
13+
const confighelper = require('./confighelper')
1314

1415
module.exports = {
16+
/**
17+
* Generate cross train config based on input folder and root dialog file.
18+
* @param {string} inputFolder full path of input lu and qna files folder.
19+
* @param {string} rootDialogFile full path of root dialog file.
20+
* @returns {string} config object json string.
21+
*/
22+
generateConfig: async function (inputFolder, rootDialogFile) {
23+
const configStr = await confighelper.generateConfig(inputFolder, rootDialogFile)
24+
25+
return configStr
26+
},
27+
1528
/**
1629
* Cross train lu and qna files.
17-
* @param {string} input input lu and qna files folder.
30+
* @param {string} input full path of input lu and qna files folder.
1831
* @param {string} intentName interruption intent name. Default value is _Interruption.
1932
* @param {string} config path to config of mapping rules or mapping rules json content itself. If undefined, it will read config.json from input folder.
2033
* @returns {luResult: any, qnaResult: any} trainedResult of luResult and qnaResult or undefined if no results.

packages/lu/src/parser/cross-train/crossTrainer.js

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -300,20 +300,21 @@ const removeDupUtterances = function (resource) {
300300

301301
const extractIntentUtterances = function(resource, intentName) {
302302
const intentSections = resource.Sections.filter(s => s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION || s.SectionType === LUSectionTypes.NESTEDINTENTSECTION)
303+
const curlyRe = /.*\{.*\}.*/
303304

304305
let intentUtterances = []
305306
if (intentName && intentName !== '') {
306307
const specificSections = intentSections.filter(s => s.Name === intentName)
307308
if (specificSections.length > 0) {
308-
intentUtterances = intentUtterances.concat(specificSections[0].UtteranceAndEntitiesMap.map(u => u.utterance))
309+
intentUtterances = intentUtterances.concat(specificSections[0].UtteranceAndEntitiesMap.map(u => u.utterance).filter(i => curlyRe.exec(i) === null))
309310
}
310311
} else {
311312
intentSections.forEach(s => {
312313
if (s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION) {
313-
intentUtterances = intentUtterances.concat(s.UtteranceAndEntitiesMap.map(u => u.utterance))
314+
intentUtterances = intentUtterances.concat(s.UtteranceAndEntitiesMap.map(u => u.utterance).filter(i => curlyRe.exec(i) === null))
314315
} else {
315316
s.SimpleIntentSections.forEach(section => {
316-
intentUtterances = intentUtterances.concat(section.UtteranceAndEntitiesMap.map(u => u.utterance))
317+
intentUtterances = intentUtterances.concat(section.UtteranceAndEntitiesMap.map(u => u.utterance).filter(i => curlyRe.exec(i) === null))
317318
})
318319
}
319320
})}
@@ -430,11 +431,13 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
430431
qnaSectionContents.push(qnaSectionContent)
431432
}
432433

433-
const qnaContents = qnaSectionContents.join(NEWLINE + NEWLINE)
434+
let qnaContents = qnaSectionContents.join(NEWLINE + NEWLINE)
434435
if (qnaContents && qnaContents !== '') {
435436
const modelInfoSections = qnaResource.Sections.filter(s => s.SectionType === LUSectionTypes.MODELINFOSECTION)
436437
const modelInforContent = modelInfoSections.map(m => m.ModelInfo).join(NEWLINE)
437-
trainedQnaResource = new SectionOperator(new LUResource([], modelInforContent, [])).addSection(NEWLINE + qnaContents)
438+
if (modelInforContent && modelInforContent !== '') qnaContents = NEWLINE + qnaContents
439+
440+
trainedQnaResource = new SectionOperator(new LUResource([], modelInforContent, [])).addSection(qnaContents)
438441
}
439442

440443
// remove utterances which are duplicated with local qna questions

packages/lu/src/parser/lubuild/builder.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {LuBuildCore} from './core'
77
import {Settings} from './settings'
88
import {MultiLanguageRecognizer} from './multi-language-recognizer'
99
import {Recognizer} from './recognizer'
10+
import {CrossTrainedRecognizer} from './cross-trained-recognizer'
1011
const path = require('path')
1112
const fs = require('fs-extra')
1213
const delay = require('delay')
@@ -18,6 +19,7 @@ const LuisBuilderVerbose = require('./../luis/luisCollate')
1819
const LuisBuilder = require('./../luis/luisBuilder')
1920
const LUOptions = require('./../lu/luOptions')
2021
const Content = require('./../lu/lu')
22+
const recognizerType = require('./../utils/enums/recognizertypes')
2123

2224
export class Builder {
2325
private readonly handler: (input: string) => any
@@ -215,7 +217,7 @@ export class Builder {
215217
return dialogContents
216218
}
217219

218-
async writeDialogAssets(contents: any[], force: boolean, out: string, luconfig: string) {
220+
async writeDialogAssets(contents: any[], force: boolean, out: string, dialogType: string, luconfig: string) {
219221
let writeDone = false
220222

221223
let writeContents = contents.filter(c => c.id.endsWith('.dialog'))
@@ -242,7 +244,7 @@ export class Builder {
242244
}
243245

244246
this.handler(`Writing to ${outFilePath}\n`)
245-
await fs.writeFile(outFilePath, content.content, 'utf-8')
247+
await this.writeDialog(content.content, outFilePath, dialogType)
246248
writeDone = true
247249
}
248250
}
@@ -254,7 +256,7 @@ export class Builder {
254256
}
255257

256258
this.handler(`Writing to ${content.path}\n`)
257-
await fs.writeFile(content.path, content.content, 'utf-8')
259+
await this.writeDialog(content.content, content.path, dialogType)
258260
writeDone = true
259261
}
260262
}
@@ -404,4 +406,27 @@ export class Builder {
404406
app.intents = filteredIntents
405407
}
406408
}
409+
410+
async writeDialog(content: string, filePath: string, dialogType: string) {
411+
await fs.writeFile(filePath, content, 'utf-8')
412+
const contentObj = JSON.parse(content)
413+
if (dialogType === recognizerType.CROSSTRAINED && contentObj.$kind === 'Microsoft.MultiLanguageRecognizer') {
414+
const fileName = path.basename(filePath, '.lu.dialog')
415+
const crossTrainedFileName = fileName + '.lu.qna.dialog'
416+
const crossTrainedFilePath = path.join(path.dirname(filePath), crossTrainedFileName)
417+
if (fs.existsSync(crossTrainedFilePath)) {
418+
const existingCRDialog = JSON.parse(await fileHelper.getContentFromFile(crossTrainedFilePath))
419+
if (!existingCRDialog.recognizers.includes(fileName + '.lu')) {
420+
existingCRDialog.recognizers.push(fileName + '.lu')
421+
}
422+
423+
content = JSON.stringify(existingCRDialog, null, 4)
424+
} else {
425+
const recognizers = [fileName + '.lu']
426+
content = new CrossTrainedRecognizer(crossTrainedFilePath, recognizers).save()
427+
}
428+
429+
await fs.writeFile(crossTrainedFilePath, content, 'utf-8')
430+
}
431+
}
407432
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*!
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License.
4+
*/
5+
6+
export class CrossTrainedRecognizer {
7+
public recognizers: any
8+
private readonly dialogPath: string
9+
10+
constructor(dialogPath: string, recognizers: any) {
11+
this.dialogPath = dialogPath
12+
this.recognizers = recognizers
13+
}
14+
15+
save(): string {
16+
let output = {
17+
$kind: 'Microsoft.CrossTrainedRecognizerSet',
18+
recognizers: this.recognizers
19+
}
20+
21+
return JSON.stringify(output, null, 4)
22+
}
23+
24+
getDialogPath(): string {
25+
return this.dialogPath
26+
}
27+
}

packages/lu/src/parser/qnabuild/builder.ts

Lines changed: 49 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {QnaBuildCore} from './core'
77
import {Settings} from './settings'
88
import {MultiLanguageRecognizer} from './multi-language-recognizer'
99
import {Recognizer} from './recognizer'
10+
import {CrossTrainedRecognizer} from './cross-trained-recognizer'
1011
const path = require('path')
1112
const fs = require('fs-extra')
1213
const delay = require('delay')
@@ -20,6 +21,7 @@ const qnaOptions = require('./../lu/qnaOptions')
2021
const Content = require('./../lu/qna')
2122
const KB = require('./../qna/qnamaker/kb')
2223
const NEWLINE = require('os').EOL
24+
const recognizerType = require('./../utils/enums/recognizertypes')
2325

2426
export class Builder {
2527
private readonly handler: (input: string) => any
@@ -285,25 +287,21 @@ export class Builder {
285287
return kbToLuContent
286288
}
287289

288-
async writeDialogAssets(contents: any[], force: boolean, out: string) {
290+
async writeDialogAssets(contents: any[], force: boolean, out: string, dialogType: string, files: string[]) {
289291
let writeDone = false
290292

291-
if (out) {
292-
for (const content of contents) {
293-
const outFilePath = path.join(path.resolve(out), path.basename(content.path))
294-
if (force || !fs.existsSync(outFilePath)) {
295-
this.handler(`Writing to ${outFilePath}\n`)
296-
await fs.writeFile(outFilePath, content.content, 'utf-8')
297-
writeDone = true
298-
}
293+
for (const content of contents) {
294+
let outFilePath
295+
if (out) {
296+
outFilePath = path.join(path.resolve(out), path.basename(content.path))
297+
} else {
298+
outFilePath = content.path
299299
}
300-
} else {
301-
for (const content of contents) {
302-
if (force || !fs.existsSync(content.path)) {
303-
this.handler(`Writing to ${content.path}\n`)
304-
await fs.writeFile(content.path, content.content, 'utf-8')
305-
writeDone = true
306-
}
300+
301+
if (force || !fs.existsSync(outFilePath)) {
302+
this.handler(`Writing to ${outFilePath}\n`)
303+
await this.writeDialog(content.content, outFilePath, dialogType, files)
304+
writeDone = true
307305
}
308306
}
309307

@@ -467,4 +465,39 @@ export class Builder {
467465
await qnaBuildCore.publishKB(recognizer.getKBId())
468466
this.handler(`Publishing finished for kb ${kbName}\n`)
469467
}
468+
469+
async writeDialog(content: string, filePath: string, dialogType: string, files: string[]) {
470+
await fs.writeFile(filePath, content, 'utf-8')
471+
const contentObj = JSON.parse(content)
472+
if (dialogType === recognizerType.CROSSTRAINED && contentObj.$kind === 'Microsoft.MultiLanguageRecognizer') {
473+
const fileName = path.basename(filePath, '.dialog')
474+
475+
for (const file of files) {
476+
let qnafileName
477+
let cultureFromPath = fileHelper.getCultureFromPath(file)
478+
if (cultureFromPath) {
479+
let fileNameWithCulture = path.basename(file, path.extname(file))
480+
qnafileName = fileNameWithCulture.substring(0, fileNameWithCulture.length - cultureFromPath.length - 1)
481+
} else {
482+
qnafileName = path.basename(file, path.extname(file))
483+
}
484+
485+
let crossTrainedFileName = `${qnafileName}.lu.qna.dialog`
486+
let crossTrainedFilePath = path.join(path.dirname(filePath), crossTrainedFileName)
487+
if (fs.existsSync(crossTrainedFilePath)) {
488+
let existingCRDialog = JSON.parse(await fileHelper.getContentFromFile(crossTrainedFilePath))
489+
if (!existingCRDialog.recognizers.includes(fileName)) {
490+
existingCRDialog.recognizers.push(fileName)
491+
}
492+
493+
content = JSON.stringify(existingCRDialog, null, 4)
494+
} else {
495+
let recognizers = [fileName]
496+
content = new CrossTrainedRecognizer(crossTrainedFilePath, recognizers).save()
497+
}
498+
499+
await fs.writeFile(crossTrainedFilePath, content, 'utf-8')
500+
}
501+
}
502+
}
470503
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*!
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License.
4+
*/
5+
6+
export class CrossTrainedRecognizer {
7+
public recognizers: any
8+
private readonly dialogPath: string
9+
10+
constructor(dialogPath: string, recognizers: any) {
11+
this.dialogPath = dialogPath
12+
this.recognizers = recognizers
13+
}
14+
15+
save(): string {
16+
let output = {
17+
$kind: 'Microsoft.CrossTrainedRecognizerSet',
18+
recognizers: this.recognizers
19+
}
20+
21+
return JSON.stringify(output, null, 4)
22+
}
23+
24+
getDialogPath(): string {
25+
return this.dialogPath
26+
}
27+
}

0 commit comments

Comments
 (0)