Skip to content
This repository was archived by the owner on Jan 15, 2025. It is now read-only.

Commit fcb2cdb

Browse files
authored
fix the bug that two strings can share the same hash code (#1298)
1 parent e4d38fc commit fcb2cdb

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

packages/lu/src/parser/luis/luisCollate.js

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,17 @@ const build = async function (luArray, verbose, luis_culture, luSearchFn) {
3636
const collate = function (luisList) {
3737
if (luisList.length === 0) return
3838
let luisObject = new Luis(luisList[0])
39-
let hashTable = {};
40-
initializeHash(luisObject, hashTable)
39+
let blobItemJsonStringifiedToObjectMap = new Map();
40+
initializeMap(luisObject, blobItemJsonStringifiedToObjectMap)
4141
for (let i = 1; i < luisList.length; i++) {
4242
let blob = luisList[i]
4343
mergeResults(blob, luisObject, LUISObjNameEnum.INTENT);
4444
mergeResults(blob, luisObject, LUISObjNameEnum.ENTITIES);
4545
mergeNDepthEntities(blob.entities, luisObject.entities);
4646
mergeResults_closedlists(blob, luisObject, LUISObjNameEnum.CLOSEDLISTS);
4747
mergeResults(blob, luisObject, LUISObjNameEnum.PATTERNANYENTITY);
48-
mergeResultsWithHash(blob, luisObject, LUISObjNameEnum.UTTERANCE, hashTable);
49-
mergeResultsWithHash(blob, luisObject, LUISObjNameEnum.PATTERNS, hashTable);
48+
mergeResultsWithMap(blob, luisObject, LUISObjNameEnum.UTTERANCE, blobItemJsonStringifiedToObjectMap);
49+
mergeResultsWithMap(blob, luisObject, LUISObjNameEnum.PATTERNS, blobItemJsonStringifiedToObjectMap);
5050
buildRegex(blob, luisObject)
5151
buildPrebuiltEntities(blob, luisObject)
5252
buildModelFeatures(blob, luisObject)
@@ -78,18 +78,18 @@ const cleanupEntities = function (luisObject) {
7878
delete luisObject.onAmbiguousLabels;
7979
}
8080

81-
const mergeResultsWithHash = function (blob, finalCollection, type, hashTable) {
81+
const mergeResultsWithMap = function (blob, finalCollection, type, blobItemJsonStringifiedToObjectMap) {
8282
if (blob[type] === undefined || blob[type].length === 0) {
8383
return
8484
}
8585
blob[type].forEach(function (blobItem) {
86-
// add if this item if it does not already exist by hash look up.
87-
let hashCode = helpers.hashCode(JSON.stringify(blobItem));
88-
if (!hashTable[hashCode]) {
86+
// add this item if it does not already exist in the map.
87+
let blobItemJsonStringified = JSON.stringify(blobItem);
88+
if (!blobItemJsonStringifiedToObjectMap.has(blobItemJsonStringified)) {
8989
finalCollection[type].push(blobItem);
90-
hashTable[hashCode] = blobItem;
90+
blobItemJsonStringifiedToObjectMap.set(blobItemJsonStringified, blobItem);
9191
} else {
92-
let item = hashTable[hashCode];
92+
let item = blobItemJsonStringifiedToObjectMap.get(blobItemJsonStringified);
9393

9494
if (type !== LUISObjNameEnum.INTENT &&
9595
type !== LUISObjNameEnum.PATTERNS &&
@@ -427,10 +427,10 @@ const buildPatternAny = function (blob, FinalLUISJSON) {
427427
})
428428
}
429429

430-
const initializeHash = function (LuisJSON, hashTable = undefined) {
430+
const initializeMap = function (LuisJSON, blobItemJsonStringifiedToObjectMap = undefined) {
431431
for (let prop in LuisJSON) {
432-
if (hashTable !== undefined && (prop === LUISObjNameEnum.UTTERANCE || prop === LUISObjNameEnum.PATTERNS)) {
433-
(LuisJSON[prop] || []).forEach(item => hashTable[helpers.hashCode(JSON.stringify(item))] = item)
432+
if (blobItemJsonStringifiedToObjectMap !== undefined && (prop === LUISObjNameEnum.UTTERANCE || prop === LUISObjNameEnum.PATTERNS)) {
433+
(LuisJSON[prop] || []).forEach(item => blobItemJsonStringifiedToObjectMap.set(JSON.stringify(item), item))
434434
}
435435
}
436436
}

packages/lu/src/parser/utils/helpers.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,6 @@ const helpers = {
190190
let detectPatternRegex = /(\[.*(?<!\\)\])|(\(.*?(\|.*?)+(?<!\\)\))/gi;
191191
return detectPatternRegex.test(utterance);
192192
},
193-
hashCode : function(s) {
194-
return s.split("").reduce(function(a,b){a=((a<<5)-a)+b.charCodeAt(0);return a&a},0);
195-
},
196193
/**
197194
* Helper to detect luis schema version based on content and update the final payload as needed.
198195
* @param {LUIS} finalLUISJSON

0 commit comments

Comments
 (0)