Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 3e0497b

Browse files
authored
New entity definition support in LU file format. (#168)
* Updated for new entity * refactor g4 files * remove ununused code * fix typos * optimize parser * WIP * Simple and regex entity definition - new format * fix multiple bugs of antlr * Support for prebuilt, phraselist in new definition * composite entity * WIP.closedlist * Finalizing all entity type support in new format * Finalizing updates for JSON -> LU conversion * Translation in * Adding negative tests * Add support for entity reference in utterances. * Finalizing luis to lu for @ notation in utterances
1 parent b950a04 commit 3e0497b

38 files changed

+5623
-1196
lines changed

packages/luis/src/commands/luis/convert.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ export default class LuisConvert extends Command {
5252
result.desc = flags.desc || result.desc || ''
5353
result.culture = flags.culture || result.culture || 'en-us'
5454
result.culture = result.culture.toLowerCase()
55+
if (result.flatListOfEntityAndRoles) delete result.flatListOfEntityAndRoles
5556
result = JSON.stringify(result, null, 2)
5657
}
5758

packages/luis/src/commands/luis/translate.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ export default class LuisTranslate extends Command {
6666
for (let file in translatedObject) {
6767
for (let lng in translatedObject[file]) {
6868
filePath = await fileHelper.generateNewTranslatedFilePath(file, lng, out)
69-
await fs.writeFile(filePath, translatedObject[path.basename(file)][lng], 'utf-8')
69+
await fs.writeFile(filePath, translatedObject[path.basename(file)][lng][0], 'utf-8')
7070
}
7171
}
7272
} catch (err) {

packages/luis/src/parser/converters/luistoluconverter.js

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,18 @@ module.exports = {
5959
let nonCompositesInUtterance = sortedEntitiesList.filter(entity => LUISJSON.composites.find(composite => composite.name == entity.entity) == undefined);
6060
nonCompositesInUtterance.forEach(entity => {
6161
if (entity.role !== undefined) {
62-
tokenizedText[parseInt(entity.startPos)] = `{${entity.entity}:${entity.role}=${tokenizedText[parseInt(entity.startPos)]}`;
62+
tokenizedText[parseInt(entity.startPos)] = `{@${entity.role}=${tokenizedText[parseInt(entity.startPos)]}`;
6363
} else {
64-
tokenizedText[parseInt(entity.startPos)] = `{${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
64+
tokenizedText[parseInt(entity.startPos)] = `{@${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
6565
}
6666
tokenizedText[parseInt(entity.endPos)] += `}`;
6767
})
6868
let compositeEntitiesInUtterance = sortedEntitiesList.filter(entity => LUISJSON.composites.find(composite => composite.name == entity.entity) != undefined);
6969
compositeEntitiesInUtterance.forEach(entity => {
7070
if (entity.role !== undefined) {
71-
tokenizedText[parseInt(entity.startPos)] = `{${entity.entity}:${entity.role}=${tokenizedText[parseInt(entity.startPos)]}`;
71+
tokenizedText[parseInt(entity.startPos)] = `{@${entity.role}=${tokenizedText[parseInt(entity.startPos)]}`;
7272
} else {
73-
tokenizedText[parseInt(entity.startPos)] = `{${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
73+
tokenizedText[parseInt(entity.startPos)] = `{@${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
7474
}
7575
tokenizedText[parseInt(entity.endPos)] += `}`;
7676
})
@@ -97,9 +97,9 @@ module.exports = {
9797
}
9898
fileContent += NEWLINE + NEWLINE;
9999
}
100-
fileContent += '$' + entity.name + ':simple';
100+
fileContent += `@ simple ${entity.name}`;
101101
if (entity.roles.length > 0) {
102-
fileContent += ` Roles=${entity.roles.join(', ')}`;
102+
fileContent += ` ${entity.roles.length > 1 ? `hasRoles` : `hasRole`} ${entity.roles.join(',')}`
103103
}
104104
fileContent += NEWLINE + NEWLINE;
105105
});
@@ -109,9 +109,9 @@ module.exports = {
109109
if(LUISJSON.prebuiltEntities && LUISJSON.prebuiltEntities.length >= 0){
110110
fileContent += '> # PREBUILT Entity definitions' + NEWLINE + NEWLINE;
111111
LUISJSON.prebuiltEntities.forEach(function(entity) {
112-
fileContent += '$PREBUILT:' + entity.name;
112+
fileContent += `@ prebuilt ${entity.name}`;
113113
if (entity.roles.length > 0) {
114-
fileContent += ` Roles=${entity.roles.join(', ')}`;
114+
fileContent += ` ${entity.roles.length > 1 ? `hasRoles` : `hasRole`} ${entity.roles.join(',')}`;
115115
}
116116
fileContent += NEWLINE + NEWLINE;
117117
});
@@ -121,24 +121,31 @@ module.exports = {
121121
if(LUISJSON.model_features && LUISJSON.model_features.length >= 0) {
122122
fileContent += '> # Phrase list definitions' + NEWLINE + NEWLINE;
123123
LUISJSON.model_features.forEach(function(entity) {
124-
fileContent += '$' + entity.name + ':phraseList' + (entity.mode ? ' interchangeable' : '') + NEWLINE;
125-
fileContent += '- ' + entity.words + NEWLINE;
124+
fileContent += `@ phraselist ${entity.name}${(entity.mode ? `(interchangeable)` : ``)}`;
125+
if (entity.words !== '') {
126+
fileContent += ` = ${NEWLINE}\t- ${entity.words}`;
127+
}
128+
fileContent += NEWLINE + NEWLINE;
126129
});
127130
fileContent += NEWLINE;
128131
}
129132
if(LUISJSON.closedLists && LUISJSON.closedLists.length >= 0){
130133
fileContent += '> # List entities' + NEWLINE + NEWLINE;
131134
LUISJSON.closedLists.forEach(function(ListItem) {
135+
fileContent += `@ list ${ListItem.name}`;
136+
if (ListItem.roles.length > 0) {
137+
fileContent += ` ${ListItem.roles.length > 1 ? `hasRoles` : `hasRole`} ${ListItem.roles.join(',')}`;
138+
}
139+
if (ListItem.subLists.length !== 0) {
140+
fileContent += ` = `;
141+
fileContent += NEWLINE;
142+
}
132143
ListItem.subLists.forEach(function(list) {
133-
fileContent += '$' + ListItem.name + ':' + list.canonicalForm + '=';
134-
if (ListItem.roles.length > 0) {
135-
fileContent += ` Roles=${ListItem.roles.join(', ')}`;
136-
}
144+
fileContent += `\t- ${list.canonicalForm} :`;
137145
fileContent += NEWLINE;
138146
list.list.forEach(function(listItem) {
139-
fileContent += '- ' + listItem + NEWLINE;
147+
fileContent += '\t\t- ' + listItem + NEWLINE;
140148
});
141-
fileContent += NEWLINE;
142149
});
143150
fileContent += NEWLINE + NEWLINE;
144151
});
@@ -147,9 +154,12 @@ module.exports = {
147154
if(LUISJSON.regex_entities && LUISJSON.regex_entities.length >= 0) {
148155
fileContent += '> # RegEx entities' + NEWLINE + NEWLINE;
149156
LUISJSON.regex_entities.forEach(function(regExEntity) {
150-
fileContent += '$' + regExEntity.name + ':/' + regExEntity.regexPattern + '/';
157+
fileContent += `@ regex ${regExEntity.name}`;
151158
if (regExEntity.roles.length > 0) {
152-
fileContent += ` Roles=${regExEntity.roles.join(', ')}`;
159+
fileContent += ` ${regExEntity.roles.length > 1 ? `hasRoles` : `hasRole`} ${regExEntity.roles.join(',')}`;
160+
}
161+
if (regExEntity.regexPattern !== '') {
162+
fileContent += ` = /${regExEntity.regexPattern}/`;
153163
}
154164
fileContent += NEWLINE;
155165
});
@@ -160,9 +170,12 @@ module.exports = {
160170
if(LUISJSON.composites && LUISJSON.composites.length > 0) {
161171
fileContent += '> # Composite entities' + NEWLINE + NEWLINE;
162172
LUISJSON.composites.forEach(composite => {
163-
fileContent += '$' + composite.name + ':[' + composite.children.join(', ') + ']';
173+
fileContent += `@ composite ${composite.name}`;
164174
if (composite.roles.length > 0) {
165-
fileContent += ` Roles=${composite.roles.join(', ')}`;
175+
fileContent += ` ${composite.roles.length > 1 ? `hasRoles` : `hasRole`} ${composite.roles.join(',')}`;
176+
}
177+
if (composite.children.length > 0) {
178+
fileContent += ` = [${composite.children.join(', ')}]`;
166179
}
167180
fileContent += NEWLINE;
168181
})
@@ -231,7 +244,7 @@ const addUtteranceToCollection = function (attribute, srcItem, matchInTarget) {
231244
if(attribute === 'text') {
232245
matchInTarget.utterances.push(srcItem);
233246
} else {
234-
matchInTarget.utterances.push(new helperClasses.uttereances(srcItem.pattern,srcItem.intent,[]));
247+
matchInTarget.utterances.push(new helperClasses.uttereances(srcItem.pattern.replace('{', '{@'),srcItem.intent,[]));
235248
}
236249
}
237250

packages/luis/src/parser/lufile/LUFileLexer.g4

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,17 @@ HASH
3838
;
3939

4040
DASH
41-
: UTTERANCE_MARK {this.ignoreWS = true;} -> pushMode(INTENT_BODY_MODE)
41+
: UTTERANCE_MARK {this.ignoreWS = true;} -> pushMode(LIST_BODY_MODE)
4242
;
4343

4444
DOLLAR
4545
: '$' {this.ignoreWS = true;} -> pushMode(ENTITY_MODE)
4646
;
4747

48+
AT
49+
: '@' {this.ignoreWS = true;} -> pushMode(NEW_ENTITY_MODE)
50+
;
51+
4852
IMPORT_DESC
4953
: '[' .*? ']'
5054
;
@@ -65,6 +69,60 @@ INVALID_TOKEN_DEFAULT_MODE
6569
: .
6670
;
6771

72+
mode NEW_ENTITY_MODE;
73+
74+
WS_IN_NEW_ENTITY_IGNORED
75+
: WHITESPACE+ {this.ignoreWS}? -> skip
76+
;
77+
78+
WS_IN_NEW_ENTITY
79+
: WHITESPACE+ -> type(WS)
80+
;
81+
82+
NEWLINE_IN_NEW_ENTITY
83+
: '\r'? '\n' {this.ignoreWS = true;} -> type(NEWLINE), popMode
84+
;
85+
86+
COMMA
87+
: ','
88+
;
89+
90+
NEW_EQUAL
91+
: '='
92+
;
93+
94+
HAS_ROLES_LABEL
95+
: 'hasRole' 's'?
96+
;
97+
98+
HAS_FEATURES_LABEL
99+
: 'usesFeature' 's'?
100+
;
101+
102+
NEW_ENTITY_TYPE_IDENTIFIER
103+
: 'simple'|'list'|'regex'|'prebuilt'|'composite'|'machine-learned'|'patternany'|'phraselist'
104+
;
105+
106+
NEW_ENTITY_IDENTIFIER
107+
: (LETTER | NUMBER | '_' | '-' | '|' | '.' | '(' | ')')+
108+
;
109+
110+
NEW_ENTITY_IDENTIFIER_WITH_WS
111+
: ('\'' | '"') (LETTER | NUMBER | '_' | '-' | '|' | '.' | WS)+ ('\'' | '"')
112+
;
113+
114+
NEW_COMPOSITE_ENTITY
115+
: '[' (~[\r\n{}[()])* ']'
116+
;
117+
118+
NEW_REGEX_ENTITY
119+
: '/' (~[\r\n])* '/'
120+
;
121+
122+
NEW_TEXT
123+
: ~[ \t\r\n.,;]+
124+
;
125+
68126
mode INTENT_NAME_MODE;
69127

70128
WS_IN_NAME_IGNORED
@@ -87,18 +145,18 @@ DOT
87145
: '.'
88146
;
89147

90-
mode INTENT_BODY_MODE;
148+
mode LIST_BODY_MODE;
91149

92150
// a little tedious on the rules, a big improvement on portability
93-
WS_IN_BODY_IGNORED
151+
WS_IN_LIST_BODY_IGNORED
94152
: WHITESPACE+ {this.ignoreWS}? -> skip
95153
;
96154

97-
WS_IN_BODY
155+
WS_IN_LIST_BODY
98156
: WHITESPACE+ -> type(WS)
99157
;
100158

101-
NEWLINE_IN_BODY
159+
NEWLINE_IN_LIST_BODY
102160
: '\r'? '\n' {this.ignoreWS = true;} -> type(NEWLINE), popMode
103161
;
104162

packages/luis/src/parser/lufile/LUFileParser.g4

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ file
99
paragraph
1010
: newline
1111
| intentDefinition
12+
| newEntityDefinition
1213
| entityDefinition
1314
| importDefinition
1415
| qnaDefinition
@@ -51,6 +52,54 @@ normalIntentString
5152
: DASH (WS|TEXT|EXPRESSION|ESCAPE_CHARACTER)*
5253
;
5354

55+
newEntityDefinition
56+
: newEntityLine newline newEntityListbody?
57+
;
58+
59+
newEntityListbody
60+
: (normalItemString newline)+
61+
;
62+
63+
newEntityLine
64+
: AT newEntityType? (newEntityName|newEntityNameWithWS) newEntityRoles? newEntityUsesFeatures? NEW_EQUAL? (newCompositeDefinition|newRegexDefinition)?
65+
;
66+
67+
newCompositeDefinition
68+
: NEW_COMPOSITE_ENTITY
69+
;
70+
71+
newRegexDefinition
72+
: NEW_REGEX_ENTITY
73+
;
74+
75+
newEntityType
76+
: NEW_ENTITY_TYPE_IDENTIFIER
77+
;
78+
79+
newEntityRoles
80+
: HAS_ROLES_LABEL? newEntityRoleOrFeatures
81+
;
82+
83+
newEntityUsesFeatures
84+
: HAS_FEATURES_LABEL newEntityRoleOrFeatures
85+
;
86+
87+
newEntityRoleOrFeatures
88+
: text (COMMA text)*
89+
;
90+
91+
text
92+
: NEW_TEXT | NEW_ENTITY_IDENTIFIER
93+
;
94+
95+
newEntityName
96+
: NEW_ENTITY_TYPE_IDENTIFIER | NEW_ENTITY_IDENTIFIER
97+
;
98+
99+
newEntityNameWithWS
100+
: NEW_ENTITY_IDENTIFIER_WITH_WS
101+
;
102+
54103
entityDefinition
55104
: entityLine newline entityListBody?
56105
;
@@ -84,7 +133,7 @@ entityListBody
84133
;
85134

86135
normalItemString
87-
: DASH (WS|TEXT)*
136+
: DASH (WS|TEXT|EXPRESSION)*
88137
;
89138

90139
importDefinition

packages/luis/src/parser/lufile/classes/hclasses.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,28 @@ const readerObj = {
108108
this.role = role ? role : '';
109109
this.parent = parent ? parent : undefined;
110110
}
111+
},
112+
patternAnyEntity: class{
113+
constructor(name, explicitList, roles) {
114+
this.name = name ? name : '';
115+
this.explicitList = explicitList ? explicitList : [];
116+
this.roles = roles ? roles : [];
117+
}
118+
},
119+
entityAndRoles: class {
120+
constructor(name, type, roles) {
121+
this.name = name ? name : '';
122+
this.type = type ? type : '';
123+
this.roles = roles ? roles : [];
124+
}
125+
addRoles(roles) {
126+
(roles || []).forEach(role => {
127+
if (!this.roles.includes(role)) this.roles.push(role)
128+
})
129+
}
130+
hasRole(value) {
131+
return this.roles.includes(value);
132+
}
111133
}
112134
};
113135

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/**
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License.
4+
*/
5+
module.exports = {
6+
SIMPLE: 'simple',
7+
LIST: 'list',
8+
REGEX: 'regex',
9+
PREBUILT: 'prebuilt',
10+
COMPOSITE: 'composite',
11+
ML: 'machine-learned',
12+
PATTERNANY:'patternany',
13+
PHRASELIST: 'phraselist'
14+
};

packages/luis/src/parser/lufile/enums/parserconsts.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ module.exports = {
1414
ANSWER: "```",
1515
FILTER: "**",
1616
QNAALTERATIONS: "qna-alterations",
17-
MODELINFO: "!#"
17+
MODELINFO: "!#",
18+
NEWENTITY: "@"
1819
};

0 commit comments

Comments
 (0)