Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 52a7916

Browse files
authored
Enable escaping square brackets and parenthesis to avoid being recognized as pattern (#1142)
* support to excape square brackets and parenthesis * fix logic for deduplication of utterance
1 parent 5c86839 commit 52a7916

File tree

7 files changed

+121
-8
lines changed

7 files changed

+121
-8
lines changed

packages/lu/src/parser/lufile/parseFileContents.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1257,7 +1257,7 @@ const parseAndHandleSimpleIntentSection = function (parsedContent, luResource, c
12571257
}
12581258
} else {
12591259
if(!hashTable[uttHash]) {
1260-
let utteranceObject = new helperClass.utterances(utterance, intentName, []);
1260+
let utteranceObject = new helperClass.utterances(utterance.replace(/\\[\[\]\(\)]/gi, match => match.slice(1)), intentName, []);
12611261
parsedContent.LUISJsonStructure.utterances.push(utteranceObject);
12621262
hashTable[uttHash] = utteranceObject;
12631263
}

packages/lu/src/parser/luis/luConverter.js

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,11 @@ const parseUtterancesToLu = function(utterances, luisJSON){
111111
} else {
112112
// will not add escape char for pattern utterances since brackets are strictly used in pattern
113113
// so there are no exceptions that need to be handled in pattern
114-
if (helpers.isUtterancePattern(utterance)) {
114+
if (utterance.isPattern) {
115115
updatedText = utterance.text;
116116
} else {
117-
let tokenizedText = utterance.text.split('');
117+
updatedText = utterance.text.replace(/[\[\]\(\)]/gi, match => `\\${match}`);
118+
let tokenizedText = updatedText.split('');
118119
tokenizedText.forEach(function (token, index) {
119120
tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token;
120121
});
@@ -511,9 +512,9 @@ const updateUtterancesList = function (srcCollection, tgtCollection, attribute)
511512
addUtteranceToCollection(attribute, srcItem, matchInTarget);
512513
return;
513514
}
514-
if(!matchInTarget.utterances.find(item => item.text == srcItem[attribute])) {
515+
if(!matchInTarget.utterances.find(item => item.text == srcItem[attribute] && ((item.isPattern && attribute !== 'text') || (!item.isPattern && attribute === 'text')))) {
515516
addUtteranceToCollection(attribute, srcItem, matchInTarget);
516-
return;
517+
return;
517518
}
518519
});
519520
}
@@ -528,7 +529,9 @@ const addUtteranceToCollection = function (attribute, srcItem, matchInTarget) {
528529
if(attribute === 'text') {
529530
matchInTarget.utterances.push(srcItem);
530531
} else {
531-
matchInTarget.utterances.push(new helperClasses.utterances(srcItem.pattern.replace('{', '{@'),srcItem.intent,[]));
532+
let utteranceFromPattern = new helperClasses.utterances(srcItem.pattern.replace('{', '{@'),srcItem.intent,[]);
533+
utteranceFromPattern.isPattern = true;
534+
matchInTarget.utterances.push(utteranceFromPattern);
532535
}
533536
}
534537

packages/lu/src/parser/utils/helpers.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ const helpers = {
187187
if (this.isUtteranceLinkRef(utterance)) return false;
188188

189189
// patterns must have at least one [optional] and or one (group | text)
190-
let detectPatternRegex = /(\[.*?\])|(\(.*?(\|.*?)+\))/gi;
190+
let detectPatternRegex = /(\[.*(?<!\\)\])|(\(.*?(\|.*?)+(?<!\\)\))/gi;
191191
return detectPatternRegex.test(utterance);
192192
},
193193
hashCode : function(s) {

packages/lu/test/commands/luis/convert.test.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ describe('luis:convert', () => {
109109
await assertToJSON('./../../fixtures/examples/newEntityIncludes.lu', './../../fixtures/verified/newEntityIncludes.json')
110110
})
111111

112+
it('luis:convert utterance escaping square brackets and parenthesis correctly to json', async () => {
113+
await assertToJSON('./../../fixtures/verified/escapeSquareBrackets.lu', './../../fixtures/verified/escapeSquareBrackets.json')
114+
})
115+
116+
it('luis:convert utterance escaping square brackets and parenthesis correctly to json to lu', async () => {
117+
await assertToLu('./../../fixtures/verified/escapeSquareBrackets.json', './../../fixtures/verified/escapeSquareBrackets.lu')
118+
})
119+
112120
it('Parse to LU instance', async () => {
113121
let luFile = `
114122
@ ml test
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"intents": [
3+
{
4+
"name": "None"
5+
},
6+
{
7+
"name": "test"
8+
}
9+
],
10+
"entities": [],
11+
"composites": [],
12+
"closedLists": [],
13+
"regex_entities": [],
14+
"regex_features": [],
15+
"utterances": [
16+
{
17+
"text": "who (was|is|will be) {employeelistentity}['s] manager [([in]|[on]){datetimev2}?]",
18+
"intent": "test",
19+
"entities": []
20+
},
21+
{
22+
"text": "Text spacing issues [mas 1.14.12] (wcag 2.1) 10",
23+
"intent": "test",
24+
"entities": []
25+
},
26+
{
27+
"text": "who will be the champion of this {eventName}?",
28+
"intent": "test",
29+
"entities": []
30+
}
31+
],
32+
"patterns": [
33+
{
34+
"pattern": "who (was|is|will be) the champion of that game?",
35+
"intent": "test"
36+
},
37+
{
38+
"pattern": "who \\(is\\) the champion of this {eventName}?",
39+
"intent": "test"
40+
},
41+
{
42+
"pattern": "who is the best one [in] this game",
43+
"intent": "test"
44+
}
45+
],
46+
"patternAnyEntities": [
47+
{
48+
"name": "eventName",
49+
"explicitList": [],
50+
"roles": []
51+
}
52+
],
53+
"prebuiltEntities": [],
54+
"luis_schema_version": "7.0.0",
55+
"versionId": "0.1",
56+
"name": "abcdefg",
57+
"desc": "",
58+
"culture": "en-us",
59+
"tokenizerVersion": "1.0.0",
60+
"phraselists": []
61+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
> LUIS application information
3+
> !# @app.name = abcdefg
4+
> !# @app.versionId = 0.1
5+
> !# @app.culture = en-us
6+
> !# @app.luis_schema_version = 7.0.0
7+
> !# @app.tokenizerVersion = 1.0.0
8+
9+
10+
> # Intent definitions
11+
12+
# None
13+
14+
15+
# test
16+
- who \(was|is|will be\) \{employeelistentity\}\['s\] manager \[\(\[in\]|\[on\]\)\{datetimev2\}?\]
17+
- Text spacing issues \[mas 1.14.12\] \(wcag 2.1\) 10
18+
- who will be the champion of this \{eventName\}?
19+
- who (was|is|will be) the champion of that game?
20+
- who \(is\) the champion of this {@eventName}?
21+
- who is the best one [in] this game
22+
23+
24+
> # Entity definitions
25+
26+
27+
> # PREBUILT Entity definitions
28+
29+
30+
> # Phrase list definitions
31+
32+
33+
> # List entities
34+
35+
> # RegEx entities
36+
37+
38+
> # Pattern.Any entities
39+
40+
@ patternany eventName

packages/luis/test/fixtures/verified/luis_sorted.lu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535

3636

3737
# DeleteAlarm
38-
- delete the {alarmTime} alarm
38+
- delete the \{alarmTime\} alarm
39+
- delete the {@alarmTime} alarm
3940
- remove the {@alarmTime} alarm
4041

4142

0 commit comments

Comments
 (0)