Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit 789391b

Browse files
Add support for URI reference in LU (#923)
* Add support for URI reference in LU * fixing linting issues. * removing dead code. Co-authored-by: Emilio Munoz <[email protected]>
1 parent ec5bb0e commit 789391b

File tree

8 files changed

+397
-61
lines changed

8 files changed

+397
-61
lines changed

packages/lu/src/parser/lu/luMerger.js

Lines changed: 66 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const parserObject = require('./../lufile/classes/parserObject');
1616
const txtfile = require('./../lufile/read-text-file');
1717
const BuildDiagnostic = require('./../lufile/diagnostic').BuildDiagnostic;
1818
const LUISObjNameEnum = require('./../utils/enums/luisobjenum');
19+
const fetch = require('node-fetch');
1920

2021
module.exports = {
2122
/**
@@ -361,7 +362,7 @@ const resolveRefByType = function(srcId, ref, refTree) {
361362
return filter(srcId, ref, refTree);
362363
}
363364

364-
const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearchFn = findLuFilesInDir){
365+
const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearchFn = resolveLuContent){
365366
let allParsedLUISContent = []
366367
let allParsedQnAContent = []
367368
let allParsedAlterationsContent = []
@@ -411,40 +412,74 @@ const buildLuJsonObject = async function(luObjArray, log, luis_culture, luSearch
411412
QnAAlterations: allParsedAlterationsContent
412413
}
413414
}
414-
415-
const findLuFilesInDir = async function(srcId, idsToFind){
416-
let luObjects = []
415+
const resolveLuContent = async function(srcId, idsToFind){
416+
let luObjects = [];
417+
for(let idx = 0; idx < idsToFind.length; idx++) {
418+
let toResolve = idsToFind[idx];
419+
if (isUrl(toResolve.filePath)) {
420+
await resolveLuUriContent(srcId, toResolve, luObjects);
421+
} else {
422+
resolveLuFileContent(toResolve, luObjects, srcId);
423+
}
424+
}
425+
return luObjects;
426+
}
427+
const resolveLuFileContent = function(file, luObjects, srcId) {
417428
let parentFilePath = srcId === 'stdin' ? process.cwd() : path.parse(path.resolve(srcId)).dir
418-
for(let idx = 0; idx < idsToFind.length; idx++ ) {
419-
// Support wild cards at the end of a relative .LU file path.
420-
// './bar/*' should look for all .lu files under the specified folder.
421-
// './bar/**' should recursively look for .lu files under sub-folders as well.
422-
let file = idsToFind[idx]
423-
if(file.filePath.endsWith('*')) {
424-
const isRecursive = file.filePath.endsWith('**')
425-
const rootFolder = file.filePath.replace(/\*/g, '')
426-
let rootPath = rootFolder;
427-
if(!path.isAbsolute(rootFolder)) {
428-
rootPath = path.resolve(parentFilePath, rootFolder);
429-
}
430-
// Get LU files in this location
431-
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive);
432-
// add these to filesToParse
433-
for(let f = 0; f < luFilesToAdd.length; f++){
434-
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate)
435-
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts))
436-
}
437-
continue
429+
// Support wild cards at the end of a relative .LU file path.
430+
// './bar/*' should look for all .lu files under the specified folder.
431+
// './bar/**' should recursively look for .lu files under sub-folders as well.
432+
if(file.filePath.endsWith('*')) {
433+
const isRecursive = file.filePath.endsWith('**')
434+
const rootFolder = file.filePath.replace(/\*/g, '')
435+
let rootPath = rootFolder;
436+
if(!path.isAbsolute(rootFolder)) {
437+
rootPath = path.resolve(parentFilePath, rootFolder);
438438
}
439-
440-
if(!path.isAbsolute(file.filePath)) {
441-
file.filePath = path.resolve(parentFilePath, file.filePath)
439+
// Get LU files in this location
440+
const luFilesToAdd = helpers.findLUFiles(rootPath, isRecursive);
441+
// add these to filesToParse
442+
for(let f = 0; f < luFilesToAdd.length; f++){
443+
const opts = new luOptions(luFilesToAdd[f], file.includeInCollate)
444+
luObjects.push(new luObject(readLuFile(luFilesToAdd[f]), opts))
442445
}
443-
// find matching parsed files and ensure includeInCollate is updated if needed.
444-
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate)))
445-
446+
return
447+
}
448+
449+
if(!path.isAbsolute(file.filePath)) {
450+
file.filePath = path.resolve(parentFilePath, file.filePath)
451+
}
452+
// find matching parsed files and ensure includeInCollate is updated if needed.
453+
luObjects.push(new luObject(readLuFile(file.filePath), new luOptions(file.filePath, file.includeInCollate)))
454+
}
455+
const resolveLuUriContent = async function(srcId, toResolve, luObjects) {
456+
let uri = toResolve.filePath || undefined;
457+
if (uri !== undefined) {
458+
let response;
459+
try {
460+
response = await fetch(uri, { method: 'GET' });
461+
} catch (err) {
462+
// throw, invalid URI
463+
let errorMsg = `URI: "${uri}" appears to be invalid. Please double check the URI or re-try this parse when you are connected to the internet.`;
464+
let error = BuildDiagnostic({
465+
message: errorMsg,
466+
range: luImport.Range
467+
})
468+
469+
throw (new exception(retCode.errorCode.INVALID_URI, error.toString(), [error]));
470+
}
471+
var res = await response.buffer();
472+
var encodedRes = helpers.fixBuffer(res);
473+
luObjects.push(new luObject(encodedRes, new luOptions(toResolve.filePath, toResolve.includeInCollate)));
474+
}
475+
}
476+
const isUrl = function(path) {
477+
try {
478+
new URL(path);
479+
return true;
480+
} catch (err) {
481+
return false;
446482
}
447-
return luObjects
448483
}
449484

450485
const updateParsedFiles = function(allParsedLUISContent, allParsedQnAContent, allParsedAlterationsContent, luobject) {

packages/lu/src/parser/lufile/parseFileContents.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,11 @@ const parseAndHandleImportSection = async function (parsedContent, luResource) {
659659

660660
let contentType = response.headers.get('content-type');
661661
if (!contentType.includes('text/html')) {
662-
parsedContent.qnaJsonStructure.files.push(new qnaFile(linkValue, linkValueText));
662+
if (parseUrl.pathname.toLowerCase().endsWith('.lu') || parseUrl.pathname.toLowerCase().endsWith('.qna')) {
663+
parsedContent.additionalFilesToParse.push(new fileToParse(linkValue));
664+
} else {
665+
parsedContent.qnaJsonStructure.files.push(new qnaFile(linkValue, linkValueText));
666+
}
663667
} else {
664668
parsedContent.qnaJsonStructure.urls.push(linkValue);
665669
}

packages/lu/src/parser/utils/helpers.js

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ const helpers = {
7575
let linkValueList = utterance.trim().match(new RegExp(/\(.*?\)/g));
7676
let linkValue = linkValueList[0].replace('(', '').replace(')', '');
7777
if (linkValue === '') throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}"`));
78-
let parseUrl = url.parse(linkValue);
79-
if (parseUrl.host || parseUrl.hostname) throw (new exception(retCode.errorCode.INVALID_LU_FILE_REF, `[ERROR]: Invalid LU File Ref: "${utterance}". \n Reference cannot be a URI`));
8078
// reference can either be #<Intent-Name> or #? or /*#? or /**#? or #*utterance* or #<Intent-Name>*patterns*
8179
let splitRegExp = new RegExp(/^(?<fileName>.*?)(?<segment>#|\*+)(?<path>.*?)$/gim);
8280
let splitReference = splitRegExp.exec(linkValue);
@@ -170,7 +168,36 @@ const helpers = {
170168
(finalLUISJSON.entities || []).forEach(e => {
171169
if (e.explicitlyAdded !== undefined) delete e.explicitlyAdded;
172170
})
173-
}
171+
},
172+
fixBuffer : function(fileBuffer) {
173+
if (fileBuffer) {
174+
// If the data starts with BOM, we know it is UTF
175+
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) {
176+
// EF BB BF UTF-8 with BOM
177+
fileBuffer = fileBuffer.slice(3)
178+
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
179+
// FF FE 00 00 UTF-32, little-endian BOM
180+
fileBuffer = fileBuffer.slice(4)
181+
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) {
182+
// 00 00 FE FF UTF-32, big-endian BOM
183+
fileBuffer = fileBuffer.slice(4)
184+
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
185+
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
186+
fileBuffer = fileBuffer.slice(4)
187+
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) {
188+
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
189+
fileBuffer = fileBuffer.slice(4)
190+
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) {
191+
// FF FE UTF-16, little endian BOM
192+
fileBuffer = fileBuffer.slice(2)
193+
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) {
194+
// FE FF UTF-16, big endian BOM
195+
fileBuffer = fileBuffer.slice(2)
196+
}
197+
}
198+
return fileBuffer.toString('utf8').replace(/\0/g, '');
199+
}
200+
174201
};
175202

176203
module.exports = helpers;

packages/lu/src/utils/textfilereader.ts

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
const fs = require('fs-extra')
77
const error = require('./../parser/utils/exception')
88
const retCode = require('./../parser/utils/enums/CLI-errors')
9+
const helpers = require('./../parser/utils/helpers')
910

1011
export async function readTextFile(file: any): Promise<string> {
1112
return new Promise(async (resolve, reject) => {
@@ -14,32 +15,7 @@ export async function readTextFile(file: any): Promise<string> {
1415
return reject('ENOENT: no such file or directory, ' + file)
1516
}
1617
let fileBuffer = await fs.readFile(file)
17-
if (fileBuffer) {
18-
// If the data starts with BOM, we know it is UTF
19-
if (fileBuffer[0] === 0xEF && fileBuffer[1] === 0xBB && fileBuffer[2] === 0xBF) {
20-
// EF BB BF UTF-8 with BOM
21-
fileBuffer = fileBuffer.slice(3)
22-
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
23-
// FF FE 00 00 UTF-32, little-endian BOM
24-
fileBuffer = fileBuffer.slice(4)
25-
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFE && fileBuffer[3] === 0xFF) {
26-
// 00 00 FE FF UTF-32, big-endian BOM
27-
fileBuffer = fileBuffer.slice(4)
28-
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF && fileBuffer[2] === 0x00 && fileBuffer[3] === 0x00) {
29-
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
30-
fileBuffer = fileBuffer.slice(4)
31-
} else if (fileBuffer[0] === 0x00 && fileBuffer[1] === 0x00 && fileBuffer[2] === 0xFF && fileBuffer[3] === 0xFE) {
32-
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
33-
fileBuffer = fileBuffer.slice(4)
34-
} else if (fileBuffer[0] === 0xFF && fileBuffer[1] === 0xFE) {
35-
// FF FE UTF-16, little endian BOM
36-
fileBuffer = fileBuffer.slice(2)
37-
} else if (fileBuffer[0] === 0xFE && fileBuffer[1] === 0xFF) {
38-
// FE FF UTF-16, big endian BOM
39-
fileBuffer = fileBuffer.slice(2)
40-
}
41-
}
42-
return resolve(fileBuffer.toString('utf8').replace(/\0/g, ''))
18+
return resolve(helpers.fixBuffer(fileBuffer))
4319
} catch (err) {
4420
if (err.message.match(/ENOENT: no such file or directory/)) {
4521
return reject(new error(retCode.errorCode.INVALID_INPUT_FILE, err.message))
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
{
2+
"intents": [
3+
{
4+
"name": "None"
5+
}
6+
],
7+
"entities": [
8+
{
9+
"name": "add",
10+
"roles": [],
11+
"children": [
12+
{
13+
"name": "count",
14+
"children": [],
15+
"features": [
16+
{
17+
"modelName": "globalCount",
18+
"isRequired": true
19+
}
20+
]
21+
}
22+
]
23+
},
24+
{
25+
"name": "globalCount",
26+
"roles": [],
27+
"children": [
28+
{
29+
"name": "countNumber",
30+
"children": [],
31+
"features": [
32+
{
33+
"modelName": "number",
34+
"isRequired": true
35+
}
36+
]
37+
}
38+
]
39+
}
40+
],
41+
"composites": [],
42+
"closedLists": [],
43+
"regex_entities": [],
44+
"regex_features": [],
45+
"utterances": [
46+
{
47+
"text": "add two apples",
48+
"intent": "None",
49+
"entities": [
50+
{
51+
"entity": "add",
52+
"startPos": 0,
53+
"endPos": 13,
54+
"children": [
55+
{
56+
"entity": "count",
57+
"startPos": 4,
58+
"endPos": 13
59+
}
60+
]
61+
},
62+
{
63+
"entity": "globalCount",
64+
"startPos": 4,
65+
"endPos": 13,
66+
"children": [
67+
{
68+
"entity": "countNumber",
69+
"startPos": 4,
70+
"endPos": 6
71+
}
72+
]
73+
}
74+
]
75+
}
76+
],
77+
"patterns": [],
78+
"patternAnyEntities": [],
79+
"prebuiltEntities": [
80+
{
81+
"name": "number",
82+
"roles": []
83+
}
84+
],
85+
"luis_schema_version": "7.0.0",
86+
"versionId": "0.1",
87+
"name": "",
88+
"desc": "",
89+
"culture": "en-us",
90+
"phraselists": []
91+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"intents": [
3+
{
4+
"name": "test"
5+
}
6+
],
7+
"entities": [],
8+
"composites": [],
9+
"closedLists": [],
10+
"regex_entities": [],
11+
"model_features": [],
12+
"regex_features": [],
13+
"utterances": [
14+
{
15+
"text": "add two apples",
16+
"intent": "test",
17+
"entities": []
18+
}
19+
],
20+
"patterns": [],
21+
"patternAnyEntities": [],
22+
"prebuiltEntities": [],
23+
"luis_schema_version": "3.2.0",
24+
"versionId": "0.1",
25+
"name": "",
26+
"desc": "",
27+
"culture": "en-us"
28+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"intents": [
3+
{
4+
"name": "test"
5+
}
6+
],
7+
"entities": [],
8+
"composites": [],
9+
"closedLists": [],
10+
"regex_entities": [],
11+
"model_features": [],
12+
"regex_features": [],
13+
"utterances": [
14+
{
15+
"text": "add two apples",
16+
"intent": "test",
17+
"entities": []
18+
},
19+
{
20+
"text": "one",
21+
"intent": "test",
22+
"entities": []
23+
},
24+
{
25+
"text": "two",
26+
"intent": "test",
27+
"entities": []
28+
}
29+
],
30+
"patterns": [
31+
{
32+
"pattern": "another {entity}",
33+
"intent": "test"
34+
}
35+
],
36+
"patternAnyEntities": [
37+
{
38+
"name": "entity",
39+
"explicitList": [],
40+
"roles": []
41+
}
42+
],
43+
"prebuiltEntities": [],
44+
"luis_schema_version": "3.2.0",
45+
"versionId": "0.1",
46+
"name": "",
47+
"desc": "",
48+
"culture": "en-us"
49+
}

0 commit comments

Comments
 (0)