Skip to content

Commit 8ecba76

Browse files
authored
Updated NeptuneSchema.js to account for multi-label nodes to prevent duplicated nodes and edges (aws#125)
There was a bug where multi-label nodes would duplicate edges as the function findFromAndToLabels(edgeStructure) (which looks at a specific edge type in the graph database to see what kinds of nodes it connects, creating a list of all the different from-to label pairs for that relationship) used nested for loops to go through all combinations of fromLabel and toLabel arrays. When nodes had multiple labels, this created duplicate edge directions since each label combination would generate a separate entry in edgeStructure.directions, even if the actual relationship between the node types was the same. Also, the function getNodeNames() (which gets all the different node types in the graph database by querying for all nodes and their labels, then adds each label to the schema structure) generated duplicated nodes in the generated schema as the original code was processing node labels without checking for duplicates, causing the same node label to be added multiple times to schema.nodeStructures. The fix creates a new empty set every time the function is called to store every processed edge or node in order to check that it has not been previously processed.
1 parent 6408f4e commit 8ecba76

File tree

3 files changed

+195
-6
lines changed

3 files changed

+195
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,6 @@ permissions and limitations under the License.
116116
option ([#117](https://github.com/aws/amazon-neptune-for-graphql/pull/117))
117117
* Fixed invalid schema generation when AWS AppSync scalar types are used in
118118
an input
119-
schema ([#118](https://github.com/aws/amazon-neptune-for-graphql/pull/118))
119+
schema ([#118](https://github.com/aws/amazon-neptune-for-graphql/pull/118))
120+
* Fixed duplicated nodes and edges from nodes with
121+
multi-labels ([#125](https://github.com/aws/amazon-neptune-for-graphql/pull/125))

src/NeptuneSchema.js

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,17 @@ async function getNodesNames() {
148148
loggerInfo('Getting nodes names');
149149

150150
try {
151+
const processedLabels = new Set(schema.nodeStructures.map(node => node.label));
152+
151153
response.results.forEach(result => {
152-
schema.nodeStructures.push({ label: result['labels(a)'][0], properties: []});
153-
loggerDebug('Found Node: ' + yellow(result['labels(a)'][0]), {toConsole: true});
154-
});
154+
result['labels(a)'].forEach(label => {
155+
if (!processedLabels.has(label)) {
156+
schema.nodeStructures.push({ label: label, properties: []});
157+
processedLabels.add(label);
158+
loggerDebug('Found Node: ' + yellow(label), {toConsole: true});
159+
}
160+
});
161+
});
155162
}
156163
catch (e) {
157164
loggerError('No nodes found', e);
@@ -181,11 +188,20 @@ async function findFromAndToLabels(edgeStructure) {
181188
const query = `MATCH (from)-[r:${sanitize(edgeStructure.label)}]->(to) WITH from, to LIMIT $sample RETURN DISTINCT labels(from) as fromLabel, labels(to) as toLabel`;
182189
loggerDebug(`Retrieving incoming and outgoing labels for edge ${edgeStructure.label} with limit ${SAMPLE}`, {toConsole: true});
183190
const response = await queryNeptune(query, {sample: SAMPLE});
191+
const existingDirections = new Set(
192+
edgeStructure.directions.map(dir => `${dir.from}|${dir.to}`)
193+
);
194+
184195
for (let result of response.results) {
185196
for (let fromLabel of result.fromLabel) {
186197
for (let toLabel of result.toLabel) {
187-
edgeStructure.directions.push({from:fromLabel, to:toLabel});
188-
loggerDebug('Found edge: ' + yellow(edgeStructure.label) + ' direction: ' + yellow(fromLabel) + ' -> ' + yellow(toLabel), {toConsole: true});
198+
const directionKey = `${fromLabel}|${toLabel}`;
199+
200+
if (!existingDirections.has(directionKey)) {
201+
edgeStructure.directions.push({from: fromLabel, to: toLabel});
202+
existingDirections.add(directionKey);
203+
loggerDebug('Found edge: ' + yellow(edgeStructure.label) + ' direction: ' + yellow(fromLabel) + ' -> ' + yellow(toLabel), {toConsole: true});
204+
}
189205
}
190206
}
191207
}

src/test/NeptuneSchema.test.js

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { jest } from '@jest/globals';
2+
import axios from 'axios';
3+
import { loggerInit } from '../logger.js';
4+
5+
describe('Neptune Schema discovery tests', () => {
6+
beforeAll(() => {
7+
loggerInit('./src/test/output', true, 'fatal');
8+
});
9+
10+
afterEach(() => {
11+
jest.restoreAllMocks();
12+
});
13+
14+
test('should handle nodes with multiple labels when querying edge to and from labels', async () => {
15+
// Mock successful summary API response
16+
jest.spyOn(axios, "get").mockResolvedValue({
17+
data: {
18+
payload: {
19+
graphSummary: {
20+
nodeLabels: ['Person', 'Employee', 'Manager', 'Company', 'Developer'],
21+
edgeLabels: ['WORKS_FOR', 'REPORTS_TO']
22+
}
23+
}
24+
}
25+
});
26+
27+
// Mock HTTP responses for neptune queries
28+
const mockResponses = [
29+
{ data: { results: [{ properties: { name: 'John' } }] } },
30+
{ data: { results: [{ properties: { employeeId: 'E123' } }] } },
31+
{ data: { results: [{ properties: { level: 'Senior' } }] } },
32+
{ data: { results: [{ properties: { companyName: 'TechCorp' } }] } },
33+
{ data: { results: [{ properties: { programmingLanguages: 'JavaScript' } }] } },
34+
{ data: { results: [{ properties: { startDate: '2020-01-01' } }] } },
35+
{ data: { results: [{ properties: { since: '2022-01-01' } }] } },
36+
{
37+
data: {
38+
results: [
39+
{ fromLabel: ['Employee', 'Person'], toLabel: ['Company'] },
40+
{ fromLabel: ['Manager', 'Employee'], toLabel: ['Company'] },
41+
{ fromLabel: ['Developer', 'Employee'], toLabel: ['Company'] }
42+
]
43+
}
44+
},
45+
{
46+
data: {
47+
results: [
48+
{ fromLabel: ['Employee'], toLabel: ['Manager', 'Person'] },
49+
{ fromLabel: ['Developer'], toLabel: ['Manager'] }
50+
]
51+
}
52+
},
53+
{ data: { results: [] } },
54+
{ data: { results: [] } },
55+
{ data: { results: [] } },
56+
{ data: { results: [{ rels: 3 }] } }
57+
];
58+
mockAxiosResponses(mockResponses);
59+
60+
// Import the module fresh for each test to avoid state sharing
61+
const schema = await runGetNeptuneSchema();
62+
63+
// Validate that no duplicate node or edge structures created
64+
expect(schema.nodeStructures).toHaveLength(5);
65+
const nodeLabels = schema.nodeStructures.map(node => node.label).toSorted();
66+
expect(nodeLabels).toEqual(['Company', 'Developer', 'Employee', 'Manager', 'Person']);
67+
68+
expect(schema.edgeStructures).toHaveLength(2);
69+
const edgeLabels = schema.edgeStructures.map(edge => edge.label).sort();
70+
expect(edgeLabels).toEqual(['REPORTS_TO', 'WORKS_FOR']);
71+
});
72+
73+
test('should handle multi-label nodes from getNodesNames query without creating duplicates', async () => {
74+
// Mock console.error to suppress expected error output
75+
const consoleSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
76+
77+
// Mock summary API to fail so we use direct getNodesNames queries
78+
jest.spyOn(axios, "get").mockRejectedValue(new Error('Summary API not available'));
79+
80+
// Mock HTTP responses for the multi-label scenario using getNodesNames
81+
const mockResponses = [
82+
{
83+
data: {
84+
results: [
85+
{ 'labels(a)': ['Person', 'Employee'] },
86+
{ 'labels(a)': ['Person', 'Manager'] },
87+
{ 'labels(a)': ['Company'] },
88+
{ 'labels(a)': ['Employee', 'Developer'] },
89+
{ 'labels(a)': ['Person'] },
90+
{ 'labels(a)': ['Manager', 'TeamLead'] }
91+
]
92+
}
93+
},
94+
{
95+
data: {
96+
results: [
97+
{ 'type(e)': 'WORKS_FOR' },
98+
{ 'type(e)': 'REPORTS_TO' }
99+
]
100+
}
101+
},
102+
{ data: { results: [{ properties: { name: 'John', age: 30 } }] } },
103+
{ data: { results: [{ properties: { employeeId: 'E123', department: 'IT' } }] } },
104+
{ data: { results: [{ properties: { level: 'Senior', yearsExperience: 5 } }] } },
105+
{ data: { results: [{ properties: { companyName: 'TechCorp', founded: 2010 } }] } },
106+
{ data: { results: [{ properties: { programmingLanguages: 'JavaScript', seniority: 'Mid' } }] } },
107+
{ data: { results: [{ properties: { teamSize: 8, responsibilities: 'Team coordination' } }] } },
108+
{ data: { results: [{ properties: { startDate: '2020-01-01', position: 'Software Engineer' } }] } },
109+
{ data: { results: [{ properties: { since: '2022-01-01', directReport: true } }] } },
110+
{
111+
data: {
112+
results: [
113+
{ fromLabel: ['Employee', 'Person'], toLabel: ['Company'] }
114+
]
115+
}
116+
},
117+
{
118+
data: {
119+
results: [
120+
{ fromLabel: ['Employee'], toLabel: ['Manager', 'TeamLead'] }
121+
]
122+
}
123+
},
124+
{ data: { results: [] } },
125+
{ data: { results: [] } },
126+
{ data: { results: [] } },
127+
{ data: { results: [{ rels: 3 }] } }
128+
];
129+
mockAxiosResponses(mockResponses);
130+
131+
// Import the module fresh for each test to avoid state sharing
132+
const schema = await runGetNeptuneSchema();
133+
134+
// Validate that no duplicate node or edge structures created
135+
expect(schema.nodeStructures).toHaveLength(6);
136+
const nodeLabels = schema.nodeStructures.map(node => node.label).sort();
137+
expect(nodeLabels).toEqual(['Company', 'Developer', 'Employee', 'Manager', 'Person', 'TeamLead']);
138+
139+
expect(schema.edgeStructures).toHaveLength(2);
140+
const edgeLabels = schema.edgeStructures.map(edge => edge.label).sort();
141+
expect(edgeLabels).toEqual(['REPORTS_TO', 'WORKS_FOR']);
142+
});
143+
});
144+
145+
async function runGetNeptuneSchema() {
146+
const {getNeptuneSchema, setGetNeptuneSchemaParameters} = await import(`../NeptuneSchema.js?t=${Date.now()}`);
147+
setNeptuneSchemaParameters(setGetNeptuneSchemaParameters);
148+
const result = await getNeptuneSchema();
149+
return JSON.parse(result);
150+
}
151+
152+
function setNeptuneSchemaParameters(setGetNeptuneSchemaParameters) {
153+
setGetNeptuneSchemaParameters({
154+
host: 'test-neptune.cluster-abc.us-west-2.neptune.amazonaws.com',
155+
port: '8182',
156+
region: 'us-west-2',
157+
neptuneType: 'neptune-db',
158+
graphName: 'test-graph',
159+
domain: 'neptune.amazonaws.com'
160+
});
161+
}
162+
163+
function mockAxiosResponses(mockResponses = []) {
164+
let responseIndex = 0;
165+
jest.spyOn(axios, 'post').mockImplementation(() => {
166+
if (responseIndex < mockResponses.length) {
167+
return Promise.resolve(mockResponses[responseIndex++]);
168+
}
169+
return Promise.resolve({data: {results: []}});
170+
});
171+
}

0 commit comments

Comments
 (0)