Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 4c45f4a

Browse files
authored
cross train and lu/qna build optimization for composer (#889)
* adjust luis build to handle empty lu file * support empty crosstrained recognizer in qnamaker:build * fix empty .lu.qna.dialog * add filter meta data for qna only(no lu files) scenario * handle corner case when all lu or qna files are empty * adjust build to fix dialog generation logic for qna * fix build * optimize * add crosstrained tests * fix crosstrain test * fix tests * adjust test case * fix tslint * fix merging master issue * fix import qna from url and file api * fix tsc failure for ts syntax * fix unit tests
1 parent 88a2a42 commit 4c45f4a

File tree

35 files changed

+896
-460
lines changed

35 files changed

+896
-460
lines changed

packages/lu/src/parser/cross-train/crossTrainer.js

Lines changed: 70 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -36,39 +36,34 @@ module.exports = {
3636
let {luObjectArray, qnaObjectArray} = pretreatment(luContents, qnaContents)
3737
const {rootIds, triggerRules, intentName, verbose} = crossTrainConfig
3838

39-
let triggerFileIds = Object.keys(triggerRules).map(x => x.toLowerCase())
40-
let destFileIds = Object.values(triggerRules).flatMap(x => Object.values(x)).flatMap(y => y).map(x => x.toLowerCase())
41-
42-
luObjectArray = luObjectArray.filter(x => triggerFileIds.includes(x.id.toLowerCase()) || destFileIds.includes(x.id.toLowerCase()))
43-
qnaObjectArray = qnaObjectArray.filter(x => {
44-
const luFileId = x.id.toLowerCase().replace(new RegExp(helpers.FileExtTypeEnum.QnAFile + '$'), helpers.FileExtTypeEnum.LUFile)
45-
return triggerFileIds.includes(luFileId) || destFileIds.includes(luFileId)
46-
})
47-
4839
// parse lu content to LUResource object
49-
let luFileIdToResourceMap = await parseAndValidateContent(luObjectArray, verbose)
40+
let {fileIdToResourceMap: luFileIdToResourceMap, allEmpty: allLuEmpty} = await parseAndValidateContent(luObjectArray, verbose)
5041

5142
// parse qna content to LUResource object
52-
let qnaFileIdToResourceMap = await parseAndValidateContent(qnaObjectArray, verbose)
53-
54-
// construct resource tree to build the father-children relationship among lu files
55-
let resources = constructResoureTree(luFileIdToResourceMap, triggerRules)
56-
57-
// do lu cross training from roots. One root one core training
58-
for (const rootObjectId of rootIds) {
59-
if (resources.some(r => r.id.toLowerCase() === rootObjectId.toLowerCase())) {
60-
// do cross training for each root at top level
61-
const result = luCrossTrain(rootObjectId, resources, qnaFileIdToResourceMap, intentName)
62-
for (const res of result) {
63-
luFileIdToResourceMap.set(res.id, res.content)
43+
let {fileIdToResourceMap: qnaFileIdToResourceMap, allEmpty: allQnAEmpty} = await parseAndValidateContent(qnaObjectArray, verbose)
44+
45+
if (!allLuEmpty) {
46+
// construct resource tree to build the father-children relationship among lu files
47+
let resources = constructResoureTree(luFileIdToResourceMap, triggerRules)
48+
49+
// do lu cross training from roots. One root one core training
50+
for (const rootObjectId of rootIds) {
51+
if (resources.some(r => r.id.toLowerCase() === rootObjectId.toLowerCase())) {
52+
// do cross training for each root at top level
53+
const result = luCrossTrain(rootObjectId, resources, qnaFileIdToResourceMap, intentName)
54+
for (const res of result) {
55+
luFileIdToResourceMap.set(res.id, res.content)
56+
}
57+
} else {
58+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, root lu file '${rootObjectId}' does not exist`))
6459
}
65-
} else {
66-
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, root lu file '${rootObjectId}' does not exist`))
6760
}
6861
}
6962

70-
// do qna cross training with lu files
71-
qnaCrossTrain(qnaFileIdToResourceMap, luFileIdToResourceMap, intentName)
63+
if (!allQnAEmpty) {
64+
// do qna cross training with lu files
65+
qnaCrossTrain(qnaFileIdToResourceMap, luFileIdToResourceMap, intentName, allLuEmpty)
66+
}
7267

7368
return { luResult: luFileIdToResourceMap, qnaResult: qnaFileIdToResourceMap }
7469
} catch (err) {
@@ -346,21 +341,25 @@ const extractIntentUtterances = function(resource, intentName) {
346341
* @param {Map<string, LUResource>} qnaFileIdToResourceMap map of qna file id and resource
347342
* @param {Map<string, LUResource>} luFileIdToResourceMap map of lu file id and resource
348343
* @param {string} interruptionIntentName interruption intent name
344+
* @param {boolean} allLuEmpty indicate if all lu files are section empty
349345
* @throws {exception} throws errors
350346
*/
351-
const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, interruptionIntentName) {
347+
const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, interruptionIntentName, allLuEmpty) {
352348
try {
353-
for (const luObjectId of Array.from(luFileIdToResourceMap.keys())) {
354-
let qnaObjectId = luObjectId.toLowerCase().replace(new RegExp(helpers.FileExtTypeEnum.LUFile + '$'), helpers.FileExtTypeEnum.QnAFile)
355-
let fileName = path.basename(luObjectId, path.extname(luObjectId))
356-
const culture = fileHelper.getCultureFromPath(luObjectId)
349+
for (const qnaObjectId of Array.from(qnaFileIdToResourceMap.keys())) {
350+
let luObjectId = qnaObjectId.toLowerCase().replace(new RegExp(helpers.FileExtTypeEnum.QnAFile + '$'), helpers.FileExtTypeEnum.LUFile)
351+
let fileName = path.basename(qnaObjectId, path.extname(qnaObjectId))
352+
const culture = fileHelper.getCultureFromPath(qnaObjectId)
357353
fileName = culture ? fileName.substring(0, fileName.length - culture.length - 1) : fileName
358354

359-
qnaObjectId = Array.from(qnaFileIdToResourceMap.keys()).find(x => x.toLowerCase() === qnaObjectId)
360-
if (qnaObjectId) {
361-
const { luResource, qnaResource } = qnaCrossTrainCore(luFileIdToResourceMap.get(luObjectId), qnaFileIdToResourceMap.get(qnaObjectId), fileName, interruptionIntentName)
355+
luObjectId = Array.from(luFileIdToResourceMap.keys()).find(x => x.toLowerCase() === luObjectId)
356+
if (luObjectId) {
357+
const { luResource, qnaResource } = qnaCrossTrainCore(luFileIdToResourceMap.get(luObjectId), qnaFileIdToResourceMap.get(qnaObjectId), fileName, interruptionIntentName, allLuEmpty)
362358
luFileIdToResourceMap.set(luObjectId, luResource)
363359
qnaFileIdToResourceMap.set(qnaObjectId, qnaResource)
360+
} else {
361+
let qnaResource = qnaAddMetaData(qnaFileIdToResourceMap.get(qnaObjectId), fileName)
362+
qnaFileIdToResourceMap.set(qnaObjectId, qnaResource)
364363
}
365364
}
366365
} catch (err) {
@@ -374,9 +373,10 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i
374373
* @param {LUResource} qnaResource the qna resource
375374
* @param {string} fileName file name
376375
* @param {string} interruptionIntentName interruption intent name
376+
* @param {boolean} allLuEmpty indicate if all lu files are section empty
377377
* @returns {luResource: LUResource, qnaResource: LUResource} cross trained lu resource and qna resource
378378
*/
379-
const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interruptionIntentName) {
379+
const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interruptionIntentName, allLuEmpty) {
380380
let trainedLuResource = luResource
381381
let trainedQnaResource = qnaResource
382382

@@ -425,11 +425,39 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
425425
const crossTrainingComments = '> Source: cross training. Please do not edit these directly!'
426426

427427
// add questions from qna file to corresponding lu file with intent named DeferToRecognizer_QnA_${fileName}
428-
if (questionsContent && questionsContent !== '') {
428+
if (!allLuEmpty && questionsContent && questionsContent !== '') {
429429
const questionsToUtterances = `${NEWLINE}${crossTrainingComments}${NEWLINE}# DeferToRecognizer_QnA_${fileName}${NEWLINE}${questionsContent}`
430430
trainedLuResource = new SectionOperator(trainedLuResource).addSection(questionsToUtterances)
431431
}
432432

433+
// update qna filters
434+
trainedQnaResource = qnaAddMetaData(qnaResource, fileName)
435+
436+
// remove utterances with curly brackets
437+
const utterancesWithoutPatterns = utterances.filter(i => /{([^}]+)}/g.exec(i) === null)
438+
439+
// remove utterances which are duplicated with local qna questions
440+
let questionsOfLowerCase = questions.map(q => q.toLowerCase())
441+
let dedupedUtterances = utterancesWithoutPatterns.filter(u => !questionsOfLowerCase.includes(u.toLowerCase()))
442+
443+
// add utterances from lu file to corresponding qna file with question set to all utterances
444+
// split large QA pair to multiple smaller ones to overcome the limit that the maximum number of questions per answer is 300
445+
while (dedupedUtterances.length > 0) {
446+
let subDedupedUtterances = dedupedUtterances.splice(0, MAX_QUESTIONS_PER_ANSWER)
447+
// construct new question content for qna resource
448+
let utterancesContent = subDedupedUtterances.join(NEWLINE + '- ')
449+
let utterancesToQuestion = `${NEWLINE}${crossTrainingComments}${NEWLINE}> !# @qna.pair.source = crosstrained${NEWLINE}${NEWLINE}# ? ${utterancesContent}${NEWLINE}${NEWLINE}**Filters:**${NEWLINE}- dialogName=${fileName}${NEWLINE}${NEWLINE}\`\`\`${NEWLINE}intent=DeferToRecognizer_LUIS_${fileName}${NEWLINE}\`\`\``
450+
trainedQnaResource = new SectionOperator(trainedQnaResource).addSection(utterancesToQuestion)
451+
}
452+
453+
return { luResource: trainedLuResource, qnaResource: trainedQnaResource }
454+
}
455+
456+
const qnaAddMetaData = function (qnaResource, fileName) {
457+
let resultQnaResource = qnaResource
458+
// extract qna sections
459+
const qnaSections = qnaResource.Sections.filter(s => s.SectionType === LUSectionTypes.QNASECTION)
460+
433461
// update qna filters
434462
let qnaSectionContents = []
435463
for (const qnaSection of qnaSections) {
@@ -458,27 +486,10 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
458486
const modelInforContent = modelInfoSections.map(m => m.ModelInfo).join(NEWLINE)
459487
if (modelInforContent && modelInforContent !== '') qnaContents = NEWLINE + qnaContents
460488

461-
trainedQnaResource = new SectionOperator(new LUResource([], modelInforContent, [])).addSection(qnaContents)
462-
}
463-
464-
// remove utterances with curly brackets
465-
const utterancesWithoutPatterns = utterances.filter(i => /{([^}]+)}/g.exec(i) === null)
466-
467-
// remove utterances which are duplicated with local qna questions
468-
let questionsOfLowerCase = questions.map(q => q.toLowerCase())
469-
let dedupedUtterances = utterancesWithoutPatterns.filter(u => !questionsOfLowerCase.includes(u.toLowerCase()))
470-
471-
// add utterances from lu file to corresponding qna file with question set to all utterances
472-
// split large QA pair to multiple smaller ones to overcome the limit that the maximum number of questions per answer is 300
473-
while (dedupedUtterances.length > 0) {
474-
let subDedupedUtterances = dedupedUtterances.splice(0, MAX_QUESTIONS_PER_ANSWER)
475-
// construct new question content for qna resource
476-
let utterancesContent = subDedupedUtterances.join(NEWLINE + '- ')
477-
let utterancesToQuestion = `${NEWLINE}${crossTrainingComments}${NEWLINE}> !# @qna.pair.source = crosstrained${NEWLINE}${NEWLINE}# ? ${utterancesContent}${NEWLINE}${NEWLINE}**Filters:**${NEWLINE}- dialogName=${fileName}${NEWLINE}${NEWLINE}\`\`\`${NEWLINE}intent=DeferToRecognizer_LUIS_${fileName}${NEWLINE}\`\`\``
478-
trainedQnaResource = new SectionOperator(trainedQnaResource).addSection(utterancesToQuestion)
489+
resultQnaResource = new SectionOperator(new LUResource([], modelInforContent, [])).addSection(qnaContents)
479490
}
480491

481-
return { luResource: trainedLuResource, qnaResource: trainedQnaResource }
492+
return resultQnaResource
482493
}
483494

484495
/**
@@ -490,6 +501,7 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
490501
*/
491502
const parseAndValidateContent = async function (objectArray, verbose) {
492503
let fileIdToResourceMap = new Map()
504+
let allEmpty = true
493505
for (const object of objectArray) {
494506
let fileContent = object.content
495507
if (object.content && object.content !== '') {
@@ -505,6 +517,8 @@ const parseAndValidateContent = async function (objectArray, verbose) {
505517

506518
let resource = luParser.parse(fileContent)
507519

520+
if (resource.Sections.filter(s => s.SectionType !== LUSectionTypes.MODELINFOSECTION).length > 0) allEmpty = false
521+
508522
if (resource.Errors && resource.Errors.length > 0) {
509523
if (verbose) {
510524
var warns = resource.Errors.filter(error => (error && error.Severity && error.Severity === DiagnosticSeverity.WARN))
@@ -522,7 +536,7 @@ const parseAndValidateContent = async function (objectArray, verbose) {
522536
fileIdToResourceMap.set(object.id, resource)
523537
}
524538

525-
return fileIdToResourceMap
539+
return {fileIdToResourceMap, allEmpty}
526540
}
527541

528542
const pretreatment = function (luContents, qnaContents) {

packages/lu/src/parser/lu/luMerger.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ const parseLuFile = async function(luOb, log, luis_culture) {
512512
let parsedContent = ''
513513
if (!luOb.content) {
514514
let error = BuildDiagnostic({ message: `Cannot parse empty ${luOb.id}. Please add content to the file or remove it.` })
515-
throw(new exception(retCode.errorCode.INVALID_INPUT_FILE, error.toString()));
515+
throw(new exception(retCode.errorCode.EMPTY_CONTENT, error.toString()));
516516
}
517517
try {
518518
parsedContent = await parseFileContents.parseFile(luOb.content, log, luis_culture);

0 commit comments

Comments
 (0)