Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit dbba6c6

Browse files
authored
Fix luis converter to enable escaping specific chars (#1111)
* fix luis converter * add test cases
1 parent b300d92 commit dbba6c6

File tree

6 files changed

+559
-6
lines changed

6 files changed

+559
-6
lines changed

packages/lu/src/parser/lufile/visitor.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class Visitor {
1212
let utterance = '';
1313
let entities = [];
1414
let errorMsgs = [];
15-
for (const node of ctx.children) {
15+
for (const [index, node] of ctx.children.entries()) {
1616
const innerNode = node;
1717
switch (innerNode.symbol.type) {
1818
case lp.DASH: break;
@@ -23,7 +23,7 @@ class Visitor {
2323
}
2424
case lp.ESCAPE_CHARACTER: {
2525
let escapeCharacters = innerNode.getText();
26-
let escapedUtterace = escapeCharacters.length > 1 && EscapeCharsInUtterance.includes(escapeCharacters[1]) ? escapeCharacters.slice(1) : escapeCharacters;
26+
let escapedUtterace = escapeCharacters.length > 1 && (EscapeCharsInUtterance.includes(escapeCharacters[1]) || (escapeCharacters[1] === '\\' && index + 1 < ctx.children.length && ctx.children[index + 1].symbol.type === lp.EXPRESSION)) ? escapeCharacters.slice(1) : escapeCharacters;
2727
utterance = utterance.concat(escapedUtterace);
2828
break;
2929
}
@@ -98,7 +98,8 @@ class Visitor {
9898
let expChars = exp.split('');
9999
let escapeChar = false;
100100
expChars.forEach(function (char, index) {
101-
if (char === '\\' && expChars.length > index + 1 && EscapeCharsInUtterance.includes(expChars[index + 1])) {
101+
if (char === '\\' && !escapeChar && expChars.length > index + 1
102+
&& (EscapeCharsInUtterance.includes(expChars[index + 1]) || expChars[index + 1] === '\\')) {
102103
escapeChar = true;
103104
} else if (char === '{' && !escapeChar) {
104105
let newEntity = {entityName : '', role : '', entityValue : undefined, parent : curEntity};

packages/lu/src/parser/luis/luConverter.js

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
const NEWLINE = require('os').EOL;
22
const helperClasses = require('./../lufile/classes/hclasses')
33
const EntityTypeEnum = require('./../utils/enums/luisEntityTypes');
4+
const EscapeCharsInUtterance = require('./../utils/enums/escapechars').EscapeCharsInUtterance;
5+
const helpers = require('./../utils/helpers');
46

57
/**
68
* Parses a Luis object into Lu Content
@@ -89,7 +91,7 @@ const parseUtterancesToLu = function(utterances, luisJSON){
8991
if(luisJSON.test === true && utterance.predictedResult !== undefined){
9092
fileContent += parsePredictedResultToLu(utterance, luisJSON)
9193
}
92-
if(utterance.entities.length >= 0) {
94+
if(utterance.entities.length > 0) {
9395
// update utterance for each entity
9496
let text = utterance.text;
9597
// flatten entities
@@ -99,10 +101,25 @@ const parseUtterancesToLu = function(utterances, luisJSON){
99101
// remove all children
100102
sortedEntitiesList.forEach(entity => delete entity.children);
101103
let tokenizedText = text.split('');
104+
tokenizedText.forEach(function (token, index) {
105+
tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token;
106+
});
102107
// handle cases where we have both child as well as cases where more than one entity can have the same start position
103108
// if there are multiple entities in the same start position, then order them by composite, nDepth, regular entity
104109
getEntitiesByPositionList(sortedEntitiesList, tokenizedText);
105110
updatedText = tokenizedText.join('');
111+
} else {
112+
// will not add escape char for pattern utterances since brackets are strictly used in pattern
113+
// so there are no exceptions that need to be handled in pattern
114+
if (helpers.isUtterancePattern(utterance)) {
115+
updatedText = utterance.text;
116+
} else {
117+
let tokenizedText = utterance.text.split('');
118+
tokenizedText.forEach(function (token, index) {
119+
tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token;
120+
});
121+
updatedText = tokenizedText.join('');
122+
}
106123
}
107124

108125
// remove duplicated whitespaces between words inside utterance to make sure they are aligned with the luis portal
@@ -138,7 +155,16 @@ const updateTokenizedTextByEntity = function(tokenizedText, entity) {
138155
} else {
139156
tokenizedText[parseInt(entity.startPos)] = `{@${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
140157
}
141-
tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] + '}';
158+
159+
// check blackslash before entity definition
160+
// blackslash before { or } will be reconized to escape { or }
161+
// to avoid such escape, add another blackslash before blackslash
162+
if (parseInt(entity.startPos) > 0 && tokenizedText[parseInt(entity.startPos) - 1] === '\\') {
163+
tokenizedText[parseInt(entity.startPos) - 1] += '\\'
164+
}
165+
166+
tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] === '\\' ?
167+
tokenizedText[parseInt(entity.endPos)] + '\\}' : tokenizedText[parseInt(entity.endPos)] + '}';
142168
}
143169

144170
const parsePredictedResultToLu = function(utterance, luisJSON){

packages/lu/src/parser/utils/enums/escapechars.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
*/
55
// Escape chars in utterance
66
module.exports = {
7-
EscapeCharsInUtterance: ['{', '}', '\\']
7+
EscapeCharsInUtterance: ['{', '}']
88
};

packages/lu/test/commands/luis/convert.test.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ describe('luis:convert', () => {
6262
await assertToLu('./../../fixtures/verified/nDepthEntityInUtterance.json', './../../fixtures/verified/nDepthEntityInUtterance.lu')
6363
})
6464

65+
it('luis:convert successfully reconstructs a markdown file from a LUIS input file (with escape characters in utterances)', async () => {
66+
await assertToLu('./../../fixtures/verified/escapeCharactersInUtterances.json', './../../fixtures/verified/escapeCharactersInUtterances.lu')
67+
})
68+
69+
it('luis:convert Utterances with escape characters correctly', async () => {
70+
await assertToJSON('./../../fixtures/verified/escapeCharactersInUtterances.lu', './../../fixtures/verified/escapeCharactersInUtterances.json')
71+
})
72+
6573
it('luis:convert Simple intent and utterances are parsed correctly', async () => {
6674
await assertToJSON('./../../fixtures/examples/1.lu', './../../fixtures/verified/1.json', '1')
6775
})

0 commit comments

Comments
 (0)