Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 930f45e

Browse files
authored
optimize lu parser to improve parsing performance and robustness (#910)
* fix missing entity type issue in simpleIntentSection * adjust test case * optimize lu parser to improve parsing effiency * add more unit tests to test section range
1 parent f10d79c commit 930f45e

File tree

11 files changed

+1012
-962
lines changed

11 files changed

+1012
-962
lines changed

packages/lu/src/parser/lufile/LUFileParser.g4

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ paragraph
1010
: newline
1111
| nestedIntentSection
1212
| simpleIntentSection
13+
| entitySection
14+
| newEntitySection
1315
| importSection
1416
| qnaSection
1517
| modelInfoSection
@@ -51,7 +53,7 @@ subIntentDefinition
5153
;
5254

5355
simpleIntentSection
54-
: (intentDefinition? (entitySection | newEntitySection)+) | intentDefinition
56+
: intentDefinition
5557
;
5658

5759
intentDefinition
@@ -135,15 +137,15 @@ entityDefinition
135137
;
136138

137139
entityLine
138-
: WS* DOLLAR (entityName COLON_MARK entityType)?
140+
: WS* DOLLAR entityName? COLON_MARK? entityType?
139141
;
140142

141143
entityName
142-
: (ENTITY_TEXT|WS)*
144+
: (ENTITY_TEXT|WS)+
143145
;
144146

145147
entityType
146-
: (compositeEntityIdentifier|regexEntityIdentifier|ENTITY_TEXT|COLON_MARK|WS)*
148+
: (compositeEntityIdentifier|regexEntityIdentifier|ENTITY_TEXT|COLON_MARK|WS)+
147149
;
148150

149151
compositeEntityIdentifier

packages/lu/src/parser/lufile/generated/LUFileLexer.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Generated from ../LUFileLexer.g4 by ANTLR 4.7.2
1+
// Generated from ../LUFileLexer.g4 by ANTLR 4.8
22
// jshint ignore: start
33
var antlr4 = require('antlr4/index');
44

@@ -555,6 +555,5 @@ LUFileLexer.prototype.ruleNames = [ "A", "B", "C", "D", "E", "F", "G", "H",
555555
LUFileLexer.prototype.grammarFileName = "LUFileLexer.g4";
556556

557557

558-
559558
exports.LUFileLexer = LUFileLexer;
560559

packages/lu/src/parser/lufile/generated/LUFileParser.interp

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

packages/lu/src/parser/lufile/generated/LUFileParser.js

Lines changed: 829 additions & 863 deletions
Large diffs are not rendered by default.

packages/lu/src/parser/lufile/generated/LUFileParserListener.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Generated from ../LUFileParser.g4 by ANTLR 4.7.2
1+
// Generated from ../LUFileParser.g4 by ANTLR 4.8
22
// jshint ignore: start
33
var antlr4 = require('antlr4/index');
44

packages/lu/src/parser/lufile/generated/LUFileParserVisitor.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Generated from ../LUFileParser.g4 by ANTLR 4.7.2
1+
// Generated from ../LUFileParser.g4 by ANTLR 4.8
22
// jshint ignore: start
33
var antlr4 = require('antlr4/index');
44

packages/lu/src/parser/lufile/luParser.js

Lines changed: 68 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,10 @@ class LUParser {
7474
emptyIntentSection.Id = `${emptyIntentSection.SectionType}_${emptyIntentSection.Name}`
7575

7676
// get the end character index
77-
const firstLine = content.split(/\r?\n/)[0];
77+
// this is default value
78+
// it will be reset in function extractSectionBody()
7879
let endCharacter = section.Name.length + 2;
79-
if (firstLine.includes(section.Name)) {
80-
endCharacter = firstLine.length;
81-
}
80+
8281
const range = new Range(section.Range.Start, new Position(section.Range.Start.Line, endCharacter))
8382
emptyIntentSection.Range = range;
8483
let errorMsg = `no utterances found for intent definition: "# ${emptyIntentSection.Name}"`
@@ -153,6 +152,8 @@ class LUParser {
153152
}))
154153
}
155154

155+
sections = this.reconstractIntentSections(sections)
156+
156157
this.extractSectionBody(sections, content)
157158

158159
return new LUResource(sections, content, errors);
@@ -231,17 +232,10 @@ class LUParser {
231232
}
232233

233234
let entitySections = fileContext.paragraph()
234-
.map(x => x.simpleIntentSection())
235-
.filter(x => x && !x.intentDefinition());
235+
.map(x => x.entitySection())
236+
.filter(x => x && x.entityDefinition());
236237

237-
let entitySectionList = [];
238-
entitySections.forEach(x => {
239-
if (x.entitySection) {
240-
for (const entitySection of x.entitySection()) {
241-
entitySectionList.push(new EntitySection(entitySection));
242-
}
243-
}
244-
})
238+
let entitySectionList = entitySections.map(x => new EntitySection(x));
245239

246240
return entitySectionList;
247241
}
@@ -256,17 +250,10 @@ class LUParser {
256250
}
257251

258252
let newEntitySections = fileContext.paragraph()
259-
.map(x => x.simpleIntentSection())
260-
.filter(x => x && !x.intentDefinition());
253+
.map(x => x.newEntitySection())
254+
.filter(x => x && x.newEntityDefinition());
261255

262-
let newEntitySectionList = [];
263-
newEntitySections.forEach(x => {
264-
if (x.newEntitySection) {
265-
for (const newEntitySection of x.newEntitySection()) {
266-
newEntitySectionList.push(new NewEntitySection(newEntitySection));
267-
}
268-
}
269-
})
256+
let newEntitySectionList = newEntitySections.map(x => new NewEntitySection(x));
270257

271258
return newEntitySectionList;
272259
}
@@ -325,12 +312,67 @@ class LUParser {
325312
return modelInfoSectionList;
326313
}
327314

315+
/**
316+
* @param {any[]} sections
317+
*/
318+
static reconstractIntentSections(sections) {
319+
let newSections = []
320+
sections.sort((a, b) => a.Range.Start.Line - b.Range.Start.Line)
321+
let index
322+
for (index = 0; index < sections.length; index++) {
323+
let section = sections[index]
324+
if (index + 1 === sections.length) {
325+
newSections.push(section)
326+
break
327+
}
328+
329+
if (section.SectionType === SectionType.NESTEDINTENTSECTION) {
330+
if (sections[index + 1].SectionType === SectionType.ENTITYSECTION
331+
|| sections[index + 1].SectionType === SectionType.NEWENTITYSECTION) {
332+
let simpleIntentSections = section.SimpleIntentSections
333+
simpleIntentSections[simpleIntentSections.length - 1].Entities.push(sections[index + 1])
334+
simpleIntentSections[simpleIntentSections.length - 1].Errors.push(...sections[index + 1].Errors)
335+
index++
336+
337+
while (index + 1 < sections.length
338+
&& (sections[index + 1].SectionType === SectionType.ENTITYSECTION
339+
|| sections[index + 1].SectionType === SectionType.NEWENTITYSECTION
340+
|| (sections[index + 1].SectionType === SectionType.SIMPLEINTENTSECTION && sections[index + 1].IntentNameLine.includes('##')))) {
341+
if (sections[index + 1].SectionType === SectionType.ENTITYSECTION
342+
|| sections[index + 1].SectionType === SectionType.NEWENTITYSECTION) {
343+
simpleIntentSections[simpleIntentSections.length - 1].Entities.push(sections[index + 1])
344+
simpleIntentSections[simpleIntentSections.length - 1].Errors.push(...sections[index + 1].Errors)
345+
} else {
346+
simpleIntentSections.push(sections[index + 1])
347+
}
348+
349+
index++
350+
}
351+
352+
simpleIntentSections.forEach(s => section.Errors.push(...s.Errors))
353+
354+
section.SimpleIntentSection = simpleIntentSections
355+
}
356+
} else if (section.SectionType === SectionType.SIMPLEINTENTSECTION) {
357+
while (index + 1 < sections.length && (sections[index + 1].SectionType === SectionType.ENTITYSECTION
358+
|| sections[index + 1].SectionType === SectionType.NEWENTITYSECTION)) {
359+
section.Entities.push(sections[index + 1])
360+
section.Errors.push(...sections[index + 1].Errors)
361+
index++
362+
}
363+
}
364+
365+
newSections.push(section)
366+
}
367+
368+
return newSections
369+
}
370+
328371
/**
329372
* @param {any[]} sections
330373
* @param {string} content
331374
*/
332375
static extractSectionBody(sections, content) {
333-
sections.sort((a, b) => a.Range.Start.Line - b.Range.Start.Line)
334376
const originList = content.split(/\r?\n/)
335377
let qnaSectionIndex = 0
336378
sections.forEach(function (section, index) {
@@ -348,6 +390,7 @@ class LUParser {
348390
stopLine = originList.length
349391
}
350392
section.Range.End.Line = stopLine;
393+
section.Range.End.Character = originList[stopLine - 1].length
351394

352395
let destList
353396
if (section.SectionType === SectionType.QNASECTION) {

packages/lu/src/parser/lufile/nestedIntentSection.js

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class NestedIntentSection extends BaseSection {
1515
super();
1616
this.SectionType = LUSectionTypes.NESTEDINTENTSECTION;
1717
this.Name = this.ExtractName(parseTree);
18-
this.Body = this.ExtractBody(parseTree, content);
18+
this.Body = '';
1919
this.SimpleIntentSections = this.ExtractSimpleIntentSections(parseTree, content);
2020
this.Errors = [];
2121
if (this.SimpleIntentSections && this.SimpleIntentSections.length > 0) {
@@ -34,27 +34,11 @@ class NestedIntentSection extends BaseSection {
3434
return parseTree.nestedIntentNameLine().nestedIntentName().getText().trim();
3535
}
3636

37-
ExtractBody(parseTree, content) {
38-
const startLine = parseTree.start.line - 1;
39-
const stopLine = parseTree.stop.line - 1;
40-
const originList = content.split(/\r?\n/)
41-
if (isNaN(startLine) || isNaN(stopLine) || startLine < 0 || startLine > stopLine || originList.Length <= stopLine) {
42-
throw new Error("index out of range.")
43-
}
44-
45-
if (startLine < stopLine) {
46-
const destList = originList.slice(startLine + 1, stopLine + 1)
47-
48-
return destList.join(NEWLINE)
49-
} else {
50-
return ''
51-
}
52-
}
53-
5437
ExtractSimpleIntentSections(parseTree, content) {
5538
let simpleIntentSections = [];
5639
for(const subIntentDefinition of parseTree.nestedIntentBodyDefinition().subIntentDefinition()) {
5740
let simpleIntentSection = new SimpleIntentSection(subIntentDefinition.simpleIntentSection(), content);
41+
simpleIntentSection.Range.Start.Character = 0
5842
simpleIntentSections.push(simpleIntentSection);
5943
}
6044

packages/lu/src/parser/lufile/simpleIntentSection.js

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,11 @@ class SimpleIntentSection extends BaseSection {
2525

2626
if (parseTree) {
2727
this.Name = this.ExtractName(parseTree);
28+
this.IntentNameLine = this.ExtractIntentNameLine(parseTree);
2829
let result = this.ExtractUtteranceAndEntitiesMap(parseTree);
2930
this.UtteranceAndEntitiesMap = result.utteranceAndEntitiesMap;
3031
this.Errors = result.errors;
31-
result = this.ExtractEntities(parseTree);
32-
this.Entities = result.entitySections;
33-
this.Errors = this.Errors.concat(result.errors);
3432
this.Id = `${this.SectionType}_${this.Name}`;
35-
this.Body = this.ExtractBody(parseTree, content)
3633
const startPosition = new Position(parseTree.start.line, parseTree.start.column);
3734
const stopPosition = new Position(parseTree.stop.line, parseTree.stop.column + parseTree.stop.text.length);
3835
this.Range = new Range(startPosition, stopPosition);
@@ -43,6 +40,10 @@ class SimpleIntentSection extends BaseSection {
4340
return parseTree.intentDefinition().intentNameLine().intentName().getText().trim();
4441
}
4542

43+
ExtractIntentNameLine(parseTree) {
44+
return parseTree.intentDefinition().intentNameLine().getText().trim();
45+
}
46+
4647
ExtractUtteranceAndEntitiesMap(parseTree) {
4748
let utteranceAndEntitiesMap = [];
4849
let errors = [];
@@ -94,45 +95,6 @@ class SimpleIntentSection extends BaseSection {
9495

9596
return { utteranceAndEntitiesMap, errors };
9697
}
97-
98-
ExtractEntities(parseTree) {
99-
let entitySections = [];
100-
let errors = [];
101-
if (parseTree.entitySection) {
102-
for (const entitySection of parseTree.entitySection()) {
103-
const entitySectionObj = new EntitySection(entitySection);
104-
entitySections.push(entitySectionObj);
105-
errors = errors.concat(entitySectionObj.Errors);
106-
}
107-
}
108-
109-
if (parseTree.newEntitySection) {
110-
for (const newEntitySection of parseTree.newEntitySection()) {
111-
const newEntitiySectionObj = new NewEntitySection(newEntitySection);
112-
entitySections.push(newEntitiySectionObj);
113-
errors = errors.concat(newEntitiySectionObj.Errors);
114-
}
115-
}
116-
117-
return { entitySections, errors };
118-
}
119-
120-
ExtractBody(parseTree, content) {
121-
const startLine = parseTree.start.line - 1
122-
const stopLine = parseTree.stop.line - 1
123-
const originList = content.split(/\r?\n/)
124-
if (isNaN(startLine) || isNaN(stopLine) || startLine < 0 || startLine > stopLine || originList.Length <= stopLine) {
125-
throw new Error("index out of range.")
126-
}
127-
128-
if (startLine < stopLine) {
129-
const destList = originList.slice(startLine + 1, stopLine + 1)
130-
131-
return destList.join(NEWLINE)
132-
} else {
133-
return ''
134-
}
135-
}
13698
}
13799

138100
module.exports = SimpleIntentSection;

0 commit comments

Comments
 (0)