Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 13f2fce

Browse files
authored
support section and cross-train in bf lu (#354)
* add section support * fix bug * fix unit tests * add tests for section enabled and mergeIntents disabled * add CRUD for section and intent * fix typo * refactor lu parser component to sections * optimize * optimize g4 parser * fix some bugs * fix nestintents bug * init section CRUD * add section check step * fix bugs based on failed tests * fix some tests and typo bugs * fix tests * fix luis_sorted.lu * revert unnecessary changes in test files * optimize and add tests * revert section api in this pr * remove uuid from section * optimize logic and add tests * revert test file changes * add more tests * add test for enableSections set to false * add more test * export parse content of section name line when it is treated as intent * refine section crud * Update sectionOperator.js fix function error * extract isSectionEnabled in parser * add more tests * fix test failures * expose parseLuList function * add interuption intent converter function * commit covert.ts changes * Revert "commit covert.ts changes" This reverts commit 8bd5abe. * add crossTrain command * add crossTrain function * optimize crossTrain * optimize name * support to convert recruse sub folder files * add entity support * support entity * optimize tests * optimize cross train * optimize tests * fix test cases * add locale support * support local intent * support empty lu file content * try to merge master 2 * move newly added tests in luis folder to lu fodler * fix conflicts * fix tests * fix style * fix style issues * add docs * optimize style * remove cross-train cli * add test for getConfigFile function in fileHelper * optimize code * optimize test * fix style warining * revert remove cli commit * fix getLuFiles function to accept multiple files * exclude generated files in test coverage * ignore generated files in test coverage * remove dup config in lu's package.json * refine code * refine code * fix style validation error * support nestedIntentSection * support nestedIntentSection * remove cross train doc * remove more cross train doc * optimize tests * optimize luCrossTrainer structure * optimize tests * fix bug * remove entities from interuption and labels from utterances and pull patterns * make config format from .config to .json and refine crossTrain function interface * optimize config structure and add multiple dialog invocations support in same trigger * optimize code structure. One root one core training * remove only in test file * fix style error * expose dignostics class * throw diagnostic in luis validation * optimize code based on comments * remove cross-train cli related changes and only keep the api
1 parent cda90ea commit 13f2fce

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+4499
-1658
lines changed

packages/lu/src/parser/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
const modules = {
66
parser: {
77
parseFile: require('./lufile/parseFileContents').parseFile,
8-
validateLUISBlob: require('./luis/luisValidator').validateLUIS
8+
validateLUISBlob: require('./luis/luisValidator')
99
},
1010
refresh: {
1111
constructMdFromLUIS: require('./luis/luConverter'),
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
const retCode = require('./../utils/enums/CLI-errors')
2+
const helpers = require('./../utils/helpers')
3+
const exception = require('./../utils/exception')
4+
const luParser = require('./../lufile/luParser');
5+
const SectionOperator = require('./../lufile/sectionOperator');
6+
const LUSectionTypes = require('./../utils/enums/lusectiontypes');
7+
const DiagnosticSeverity = require('./../lufile/diagnostic').DiagnosticSeverity;
8+
const NEWLINE = require('os').EOL;
9+
10+
module.exports = {
11+
/**
12+
* Do cross training among lu files
13+
* @param {luObject[]} luObjectArray the luObject list to be parsed
14+
* @param {any} crossTrainConfig cross train json config
15+
* @returns {Map<string, LUResource>} Map of file id and luResource
16+
* @throws {exception} Throws on errors. exception object includes errCode and text
17+
*/
18+
luCrossTrain: async function (luObjectArray, crossTrainConfig) {
19+
try {
20+
const crossTrainConfigObj = JSON.parse(crossTrainConfig);
21+
const rootObjectIds = crossTrainConfigObj.rootIds;
22+
const triggerRules = crossTrainConfigObj.triggerRules;
23+
const intentName = crossTrainConfigObj.intentName;
24+
const verbose = crossTrainConfigObj.verbose;
25+
26+
// parse lu content to LUResource object
27+
let fileIdToLuResourceMap = this.parseAndValidateLuContent(luObjectArray, verbose);
28+
29+
// contruct resource tree to build the father-children relationship among lu files
30+
let resources = this.constructResoureTree(fileIdToLuResourceMap, triggerRules);
31+
32+
// do cross training from roots. One root one core training
33+
for (const rootObjectId of rootObjectIds) {
34+
if (resources.some(r => r.id === rootObjectId)) {
35+
// do cross training for each root at top level
36+
const result = this.crossTrain(rootObjectId, resources, intentName);
37+
for (const res of result) {
38+
fileIdToLuResourceMap.set(res.id, res.content);
39+
}
40+
} else {
41+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, root lu file '${rootObjectId}' does not exist`));
42+
}
43+
}
44+
45+
return fileIdToLuResourceMap;
46+
} catch (err) {
47+
throw (err)
48+
}
49+
},
50+
51+
/**
52+
* Parse and validate luObject array to convert to LUResource object dict
53+
* @param {luObject[]} luObjectArray the luObject list to be parsed
54+
* @param {boolean} verbose indicate to enable log messages or not
55+
* @returns {Map<string, LUResource>} Map of file id and luResource
56+
* @throws {exception} Throws on errors. exception object includes errCode and text
57+
*/
58+
parseAndValidateLuContent: function (luObjectArray, verbose) {
59+
let fileIdToLuResourceMap = new Map();
60+
for (const luObject of luObjectArray) {
61+
let luContent = luObject.content;
62+
luContent = helpers.sanitizeNewLines(luContent);
63+
if (luContent === undefined || luContent === '') continue;
64+
65+
let luResource = luParser.parse(luContent);
66+
if (luResource.Errors && luResource.Errors.length > 0) {
67+
if (verbose) {
68+
var warns = luResource.Errors.filter(error => (error && error.Severity && error.Severity === DiagnosticSeverity.WARN));
69+
if (warns.length > 0) {
70+
process.stdout.write(warns.map(warn => warn.toString()).join(NEWLINE).concat(NEWLINE));
71+
}
72+
}
73+
74+
var errors = luResource.Errors.filter(error => (error && error.Severity && error.Severity === DiagnosticSeverity.ERROR));
75+
if (errors.length > 0) {
76+
throw (new exception(retCode.errorCode.INVALID_LINE, errors.map(error => error.toString()).join(NEWLINE)));
77+
}
78+
}
79+
80+
fileIdToLuResourceMap.set(luObject.id, luResource);
81+
}
82+
83+
return fileIdToLuResourceMap;
84+
},
85+
86+
/**
87+
* Contruct resource tree to build the father-children relationship among lu files
88+
* @param {Map<string, LUResource>} fileIdToLuResourceMap Map of file id and luResource
89+
* @param {any} triggerRules trigger rules object that indicate the triggering rules from root to dest lu files
90+
* @returns {any[]} Object array of LUResource with id and children properties
91+
* @throws {exception} Throws on errors. exception object includes errCode and text
92+
*/
93+
constructResoureTree(fileIdToLuResourceMap, triggerRules) {
94+
let visitedChildren = new Set();
95+
let resources = [];
96+
let fileIdsFromInput = Array.from(fileIdToLuResourceMap.keys());
97+
for (const fileId of fileIdsFromInput) {
98+
let luResource = fileIdToLuResourceMap.get(fileId);
99+
let resource = {
100+
id: fileId,
101+
content: luResource,
102+
children: []
103+
};
104+
105+
if (!(fileId in triggerRules)) {
106+
resources.push(resource);
107+
continue;
108+
}
109+
110+
let intents = [];
111+
for (const section of luResource.Sections) {
112+
if (section.SectionType === LUSectionTypes.SIMPLEINTENTSECTION
113+
|| section.SectionType === LUSectionTypes.NESTEDINTENTSECTION) {
114+
intents.push(section);
115+
}
116+
}
117+
118+
const destLuFileToIntent = triggerRules[fileId];
119+
for (const destLuFile of Object.keys(destLuFileToIntent)) {
120+
if (!fileIdsFromInput.includes(destLuFile)) continue;
121+
122+
if (visitedChildren.has(destLuFile)) {
123+
// validate loop in a tree or forest
124+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, dialog call loop detected for lu file ${destLuFile} when doing cross training`));
125+
}
126+
127+
const triggerIntentName = destLuFileToIntent[destLuFile];
128+
if (!intents.some(i => i.Name === triggerIntentName)) {
129+
throw (new exception(retCode.errorCode.INVALID_INPUT, `Sorry, trigger intent '${triggerIntentName}' is not found in lu file: ${fileId}`));
130+
}
131+
132+
resource.children.push({
133+
target: destLuFile,
134+
intent: triggerIntentName
135+
});
136+
137+
visitedChildren.add(destLuFile);
138+
}
139+
140+
resources.push(resource);
141+
}
142+
143+
return resources;
144+
},
145+
146+
/**
147+
* Cross training core function. Do cross training from a root to its children once.
148+
* @param {string} rootResourceId the root resource object id
149+
* @param {any[]} resources all resource object list
150+
* @param {string} intentName interuption intent name
151+
* @returns {any[]} updated resource objects
152+
* @throws {exception} Throws on errors. exception object includes errCode and text
153+
*/
154+
crossTrain: function (rootResourceId, resources, intentName) {
155+
const idToResourceMap = new Map();
156+
for (const resource of resources) {
157+
idToResourceMap.set(resource.id, resource);
158+
}
159+
160+
// Parse resources
161+
let rootResource = resources.filter(r => r.id === rootResourceId)[0];
162+
rootResource.visited = true;
163+
this.mergeRootInteruptionToLeaves(rootResource, idToResourceMap, intentName);
164+
165+
return Array.from(idToResourceMap.values());
166+
},
167+
168+
mergeRootInteruptionToLeaves: function (rootResource, result, intentName) {
169+
if (rootResource.children === undefined || rootResource.length <= 0) return;
170+
171+
this.mergeBrothersInteruption(rootResource, result, intentName)
172+
for (const child of rootResource.children) {
173+
let childResource = result.get(child.target);
174+
if (childResource.visited === undefined) {
175+
const newChildResource = this.mergeFatherInteruptionToChild(rootResource, childResource, intentName);
176+
result.set(child.target, newChildResource);
177+
newChildResource.visited = true;
178+
this.mergeRootInteruptionToLeaves(newChildResource, result, intentName);
179+
}
180+
}
181+
},
182+
183+
mergeBrothersInteruption: function (resource, result, intentName) {
184+
let children = resource.children;
185+
for (const child of children) {
186+
let triggerIntent = child.intent;
187+
const brotherSections = resource.content.Sections.filter(s => s.Name !== triggerIntent
188+
&& s.Name !== intentName
189+
&& (s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION || s.SectionType === LUSectionTypes.NESTEDINTENTSECTION));
190+
191+
let brotherUtterances = [];
192+
brotherSections.forEach(s => {
193+
if (s.SectionType === LUSectionTypes.SIMPLEINTENTSECTION) {
194+
brotherUtterances = brotherUtterances.concat(s.UtteranceAndEntitiesMap.map(u => u.utterance));
195+
} else {
196+
s.SimpleIntentSections.forEach(section => {
197+
brotherUtterances = brotherUtterances.concat(section.UtteranceAndEntitiesMap.map(u => u.utterance));
198+
})
199+
}
200+
});
201+
202+
let targetResource = result.get(child.target);
203+
204+
// Merge direct brother's utterances
205+
targetResource = this.mergeInteruptionIntent(brotherUtterances, targetResource, intentName);
206+
result.set(targetResource.id, targetResource);
207+
}
208+
},
209+
210+
mergeFatherInteruptionToChild: function (fatherResource, childResource, intentName) {
211+
const fatherInteruptions = fatherResource.content.Sections.filter(s => s.Name === intentName);
212+
if (fatherInteruptions && fatherInteruptions.length > 0) {
213+
const fatherInteruption = fatherInteruptions[0];
214+
const fatherUtterances = fatherInteruption.UtteranceAndEntitiesMap.map(u => u.utterance);
215+
childResource = this.mergeInteruptionIntent(fatherUtterances, childResource, intentName);
216+
}
217+
218+
return childResource;
219+
},
220+
221+
mergeInteruptionIntent: function (fromUtterances, toResource, intentName) {
222+
const toInteruptions = toResource.content.Sections.filter(section => section.Name === intentName);
223+
if (toInteruptions && toInteruptions.length > 0) {
224+
const toInteruption = toInteruptions[0];
225+
const existingUtterances = toInteruption.UtteranceAndEntitiesMap.map(u => u.utterance);
226+
// construct new content here
227+
let newFileContent = '';
228+
fromUtterances.forEach(utterance => {
229+
if (!existingUtterances.includes(utterance)) {
230+
newFileContent += '- ' + utterance + NEWLINE;
231+
}
232+
});
233+
234+
if (newFileContent === '') return toResource;
235+
236+
newFileContent = toInteruption.ParseTree.intentDefinition().getText().trim() + NEWLINE + newFileContent;
237+
let lines = newFileContent.split(/\r?\n/);
238+
let newLines = [];
239+
lines.forEach(line => {
240+
if (line.trim().startsWith('-')) {
241+
newLines.push('- ' + line.trim().slice(1).trim());
242+
} else if (line.trim().startsWith('##')) {
243+
newLines.push('## ' + line.trim().slice(2).trim());
244+
} else if (line.trim().startsWith('#')) {
245+
newLines.push('# ' + line.trim().slice(1).trim());
246+
}
247+
})
248+
249+
newFileContent = newLines.join(NEWLINE);
250+
251+
// update section here
252+
toResource.content = new SectionOperator(toResource.content).updateSection(toInteruption.Id, newFileContent);
253+
} else {
254+
// construct new content here
255+
if (fromUtterances && fromUtterances.length > 0) {
256+
let newFileContent = NEWLINE + `# ${intentName}` + NEWLINE;
257+
fromUtterances.forEach(utterance => newFileContent += '- ' + utterance + NEWLINE);
258+
259+
// add section here
260+
toResource.content = new SectionOperator(toResource.content).addSection(newFileContent);
261+
}
262+
}
263+
264+
return toResource;
265+
}
266+
}

packages/lu/src/parser/lufile/LUFileLexer.g4

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
lexer grammar LUFileLexer;
22

33
@lexer::members {
4-
this.ignoreWS = true; // usually we ignore whitespace, but inside template, whitespace is significant
4+
this.ignoreWS = true; // usually we ignore whitespace, but inside utterance, whitespace is significant
55
}
66

77
fragment LETTER: 'a'..'z' | 'A'..'Z';
@@ -34,7 +34,7 @@ QNA
3434
;
3535

3636
HASH
37-
: '#'+ {this.ignoreWS = true;} -> pushMode(INTENT_NAME_MODE)
37+
: '#' {this.ignoreWS = true;} -> pushMode(INTENT_NAME_MODE)
3838
;
3939

4040
DASH
@@ -133,6 +133,10 @@ WS_IN_NAME
133133
: WHITESPACE+ -> type(WS)
134134
;
135135

136+
HASH_IN_NAME
137+
: '#' -> type(HASH)
138+
;
139+
136140
NEWLINE_IN_NAME
137141
: '\r'? '\n' -> type(NEWLINE), popMode
138142
;

0 commit comments

Comments
 (0)