@@ -36,39 +36,34 @@ module.exports = {
3636 let { luObjectArray, qnaObjectArray} = pretreatment ( luContents , qnaContents )
3737 const { rootIds, triggerRules, intentName, verbose} = crossTrainConfig
3838
39- let triggerFileIds = Object . keys ( triggerRules ) . map ( x => x . toLowerCase ( ) )
40- let destFileIds = Object . values ( triggerRules ) . flatMap ( x => Object . values ( x ) ) . flatMap ( y => y ) . map ( x => x . toLowerCase ( ) )
41-
42- luObjectArray = luObjectArray . filter ( x => triggerFileIds . includes ( x . id . toLowerCase ( ) ) || destFileIds . includes ( x . id . toLowerCase ( ) ) )
43- qnaObjectArray = qnaObjectArray . filter ( x => {
44- const luFileId = x . id . toLowerCase ( ) . replace ( new RegExp ( helpers . FileExtTypeEnum . QnAFile + '$' ) , helpers . FileExtTypeEnum . LUFile )
45- return triggerFileIds . includes ( luFileId ) || destFileIds . includes ( luFileId )
46- } )
47-
4839 // parse lu content to LUResource object
49- let luFileIdToResourceMap = await parseAndValidateContent ( luObjectArray , verbose )
40+ let { fileIdToResourceMap : luFileIdToResourceMap , allEmpty : allLuEmpty } = await parseAndValidateContent ( luObjectArray , verbose )
5041
5142 // parse qna content to LUResource object
52- let qnaFileIdToResourceMap = await parseAndValidateContent ( qnaObjectArray , verbose )
53-
54- // construct resource tree to build the father-children relationship among lu files
55- let resources = constructResoureTree ( luFileIdToResourceMap , triggerRules )
56-
57- // do lu cross training from roots. One root one core training
58- for ( const rootObjectId of rootIds ) {
59- if ( resources . some ( r => r . id . toLowerCase ( ) === rootObjectId . toLowerCase ( ) ) ) {
60- // do cross training for each root at top level
61- const result = luCrossTrain ( rootObjectId , resources , qnaFileIdToResourceMap , intentName )
62- for ( const res of result ) {
63- luFileIdToResourceMap . set ( res . id , res . content )
43+ let { fileIdToResourceMap : qnaFileIdToResourceMap , allEmpty : allQnAEmpty } = await parseAndValidateContent ( qnaObjectArray , verbose )
44+
45+ if ( ! allLuEmpty ) {
46+ // construct resource tree to build the father-children relationship among lu files
47+ let resources = constructResoureTree ( luFileIdToResourceMap , triggerRules )
48+
49+ // do lu cross training from roots. One root one core training
50+ for ( const rootObjectId of rootIds ) {
51+ if ( resources . some ( r => r . id . toLowerCase ( ) === rootObjectId . toLowerCase ( ) ) ) {
52+ // do cross training for each root at top level
53+ const result = luCrossTrain ( rootObjectId , resources , qnaFileIdToResourceMap , intentName )
54+ for ( const res of result ) {
55+ luFileIdToResourceMap . set ( res . id , res . content )
56+ }
57+ } else {
58+ throw ( new exception ( retCode . errorCode . INVALID_INPUT , `Sorry, root lu file '${ rootObjectId } ' does not exist` ) )
6459 }
65- } else {
66- throw ( new exception ( retCode . errorCode . INVALID_INPUT , `Sorry, root lu file '${ rootObjectId } ' does not exist` ) )
6760 }
6861 }
6962
70- // do qna cross training with lu files
71- qnaCrossTrain ( qnaFileIdToResourceMap , luFileIdToResourceMap , intentName )
63+ if ( ! allQnAEmpty ) {
64+ // do qna cross training with lu files
65+ qnaCrossTrain ( qnaFileIdToResourceMap , luFileIdToResourceMap , intentName , allLuEmpty )
66+ }
7267
7368 return { luResult : luFileIdToResourceMap , qnaResult : qnaFileIdToResourceMap }
7469 } catch ( err ) {
@@ -346,21 +341,25 @@ const extractIntentUtterances = function(resource, intentName) {
346341 * @param {Map<string, LUResource> } qnaFileIdToResourceMap map of qna file id and resource
347342 * @param {Map<string, LUResource> } luFileIdToResourceMap map of lu file id and resource
348343 * @param {string } interruptionIntentName interruption intent name
344+ * @param {boolean } allLuEmpty indicate if all lu files are section empty
349345 * @throws {exception } throws errors
350346 */
351- const qnaCrossTrain = function ( qnaFileIdToResourceMap , luFileIdToResourceMap , interruptionIntentName ) {
347+ const qnaCrossTrain = function ( qnaFileIdToResourceMap , luFileIdToResourceMap , interruptionIntentName , allLuEmpty ) {
352348 try {
353- for ( const luObjectId of Array . from ( luFileIdToResourceMap . keys ( ) ) ) {
354- let qnaObjectId = luObjectId . toLowerCase ( ) . replace ( new RegExp ( helpers . FileExtTypeEnum . LUFile + '$' ) , helpers . FileExtTypeEnum . QnAFile )
355- let fileName = path . basename ( luObjectId , path . extname ( luObjectId ) )
356- const culture = fileHelper . getCultureFromPath ( luObjectId )
349+ for ( const qnaObjectId of Array . from ( qnaFileIdToResourceMap . keys ( ) ) ) {
350+ let luObjectId = qnaObjectId . toLowerCase ( ) . replace ( new RegExp ( helpers . FileExtTypeEnum . QnAFile + '$' ) , helpers . FileExtTypeEnum . LUFile )
351+ let fileName = path . basename ( qnaObjectId , path . extname ( qnaObjectId ) )
352+ const culture = fileHelper . getCultureFromPath ( qnaObjectId )
357353 fileName = culture ? fileName . substring ( 0 , fileName . length - culture . length - 1 ) : fileName
358354
359- qnaObjectId = Array . from ( qnaFileIdToResourceMap . keys ( ) ) . find ( x => x . toLowerCase ( ) === qnaObjectId )
360- if ( qnaObjectId ) {
361- const { luResource, qnaResource } = qnaCrossTrainCore ( luFileIdToResourceMap . get ( luObjectId ) , qnaFileIdToResourceMap . get ( qnaObjectId ) , fileName , interruptionIntentName )
355+ luObjectId = Array . from ( luFileIdToResourceMap . keys ( ) ) . find ( x => x . toLowerCase ( ) === luObjectId )
356+ if ( luObjectId ) {
357+ const { luResource, qnaResource } = qnaCrossTrainCore ( luFileIdToResourceMap . get ( luObjectId ) , qnaFileIdToResourceMap . get ( qnaObjectId ) , fileName , interruptionIntentName , allLuEmpty )
362358 luFileIdToResourceMap . set ( luObjectId , luResource )
363359 qnaFileIdToResourceMap . set ( qnaObjectId , qnaResource )
360+ } else {
361+ let qnaResource = qnaAddMetaData ( qnaFileIdToResourceMap . get ( qnaObjectId ) , fileName )
362+ qnaFileIdToResourceMap . set ( qnaObjectId , qnaResource )
364363 }
365364 }
366365 } catch ( err ) {
@@ -374,9 +373,10 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i
374373 * @param {LUResource } qnaResource the qna resource
375374 * @param {string } fileName file name
376375 * @param {string } interruptionIntentName interruption intent name
376+ * @param {boolean } allLuEmpty indicate if all lu files are section empty
377377 * @returns {luResource: LUResource, qnaResource: LUResource } cross trained lu resource and qna resource
378378 */
379- const qnaCrossTrainCore = function ( luResource , qnaResource , fileName , interruptionIntentName ) {
379+ const qnaCrossTrainCore = function ( luResource , qnaResource , fileName , interruptionIntentName , allLuEmpty ) {
380380 let trainedLuResource = luResource
381381 let trainedQnaResource = qnaResource
382382
@@ -425,11 +425,39 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
425425 const crossTrainingComments = '> Source: cross training. Please do not edit these directly!'
426426
427427 // add questions from qna file to corresponding lu file with intent named DeferToRecognizer_QnA_${fileName}
428- if ( questionsContent && questionsContent !== '' ) {
428+ if ( ! allLuEmpty && questionsContent && questionsContent !== '' ) {
429429 const questionsToUtterances = `${ NEWLINE } ${ crossTrainingComments } ${ NEWLINE } # DeferToRecognizer_QnA_${ fileName } ${ NEWLINE } ${ questionsContent } `
430430 trainedLuResource = new SectionOperator ( trainedLuResource ) . addSection ( questionsToUtterances )
431431 }
432432
433+ // update qna filters
434+ trainedQnaResource = qnaAddMetaData ( qnaResource , fileName )
435+
436+ // remove utterances with curly brackets
437+ const utterancesWithoutPatterns = utterances . filter ( i => / { ( [ ^ } ] + ) } / g. exec ( i ) === null )
438+
439+ // remove utterances which are duplicated with local qna questions
440+ let questionsOfLowerCase = questions . map ( q => q . toLowerCase ( ) )
441+ let dedupedUtterances = utterancesWithoutPatterns . filter ( u => ! questionsOfLowerCase . includes ( u . toLowerCase ( ) ) )
442+
443+ // add utterances from lu file to corresponding qna file with question set to all utterances
444+ // split large QA pair to multiple smaller ones to overcome the limit that the maximum number of questions per answer is 300
445+ while ( dedupedUtterances . length > 0 ) {
446+ let subDedupedUtterances = dedupedUtterances . splice ( 0 , MAX_QUESTIONS_PER_ANSWER )
447+ // construct new question content for qna resource
448+ let utterancesContent = subDedupedUtterances . join ( NEWLINE + '- ' )
449+ let utterancesToQuestion = `${ NEWLINE } ${ crossTrainingComments } ${ NEWLINE } > !# @qna.pair.source = crosstrained${ NEWLINE } ${ NEWLINE } # ? ${ utterancesContent } ${ NEWLINE } ${ NEWLINE } **Filters:**${ NEWLINE } - dialogName=${ fileName } ${ NEWLINE } ${ NEWLINE } \`\`\`${ NEWLINE } intent=DeferToRecognizer_LUIS_${ fileName } ${ NEWLINE } \`\`\``
450+ trainedQnaResource = new SectionOperator ( trainedQnaResource ) . addSection ( utterancesToQuestion )
451+ }
452+
453+ return { luResource : trainedLuResource , qnaResource : trainedQnaResource }
454+ }
455+
456+ const qnaAddMetaData = function ( qnaResource , fileName ) {
457+ let resultQnaResource = qnaResource
458+ // extract qna sections
459+ const qnaSections = qnaResource . Sections . filter ( s => s . SectionType === LUSectionTypes . QNASECTION )
460+
433461 // update qna filters
434462 let qnaSectionContents = [ ]
435463 for ( const qnaSection of qnaSections ) {
@@ -458,27 +486,10 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
458486 const modelInforContent = modelInfoSections . map ( m => m . ModelInfo ) . join ( NEWLINE )
459487 if ( modelInforContent && modelInforContent !== '' ) qnaContents = NEWLINE + qnaContents
460488
461- trainedQnaResource = new SectionOperator ( new LUResource ( [ ] , modelInforContent , [ ] ) ) . addSection ( qnaContents )
462- }
463-
464- // remove utterances with curly brackets
465- const utterancesWithoutPatterns = utterances . filter ( i => / { ( [ ^ } ] + ) } / g. exec ( i ) === null )
466-
467- // remove utterances which are duplicated with local qna questions
468- let questionsOfLowerCase = questions . map ( q => q . toLowerCase ( ) )
469- let dedupedUtterances = utterancesWithoutPatterns . filter ( u => ! questionsOfLowerCase . includes ( u . toLowerCase ( ) ) )
470-
471- // add utterances from lu file to corresponding qna file with question set to all utterances
472- // split large QA pair to multiple smaller ones to overcome the limit that the maximum number of questions per answer is 300
473- while ( dedupedUtterances . length > 0 ) {
474- let subDedupedUtterances = dedupedUtterances . splice ( 0 , MAX_QUESTIONS_PER_ANSWER )
475- // construct new question content for qna resource
476- let utterancesContent = subDedupedUtterances . join ( NEWLINE + '- ' )
477- let utterancesToQuestion = `${ NEWLINE } ${ crossTrainingComments } ${ NEWLINE } > !# @qna.pair.source = crosstrained${ NEWLINE } ${ NEWLINE } # ? ${ utterancesContent } ${ NEWLINE } ${ NEWLINE } **Filters:**${ NEWLINE } - dialogName=${ fileName } ${ NEWLINE } ${ NEWLINE } \`\`\`${ NEWLINE } intent=DeferToRecognizer_LUIS_${ fileName } ${ NEWLINE } \`\`\``
478- trainedQnaResource = new SectionOperator ( trainedQnaResource ) . addSection ( utterancesToQuestion )
489+ resultQnaResource = new SectionOperator ( new LUResource ( [ ] , modelInforContent , [ ] ) ) . addSection ( qnaContents )
479490 }
480491
481- return { luResource : trainedLuResource , qnaResource : trainedQnaResource }
492+ return resultQnaResource
482493}
483494
484495/**
@@ -490,6 +501,7 @@ const qnaCrossTrainCore = function (luResource, qnaResource, fileName, interrupt
490501 */
491502const parseAndValidateContent = async function ( objectArray , verbose ) {
492503 let fileIdToResourceMap = new Map ( )
504+ let allEmpty = true
493505 for ( const object of objectArray ) {
494506 let fileContent = object . content
495507 if ( object . content && object . content !== '' ) {
@@ -505,6 +517,8 @@ const parseAndValidateContent = async function (objectArray, verbose) {
505517
506518 let resource = luParser . parse ( fileContent )
507519
520+ if ( resource . Sections . filter ( s => s . SectionType !== LUSectionTypes . MODELINFOSECTION ) . length > 0 ) allEmpty = false
521+
508522 if ( resource . Errors && resource . Errors . length > 0 ) {
509523 if ( verbose ) {
510524 var warns = resource . Errors . filter ( error => ( error && error . Severity && error . Severity === DiagnosticSeverity . WARN ) )
@@ -522,7 +536,7 @@ const parseAndValidateContent = async function (objectArray, verbose) {
522536 fileIdToResourceMap . set ( object . id , resource )
523537 }
524538
525- return fileIdToResourceMap
539+ return { fileIdToResourceMap, allEmpty }
526540}
527541
528542const pretreatment = function ( luContents , qnaContents ) {
0 commit comments