Updated NeptuneSchema.js to account for multi-label nodes to prevent duplicated nodes and edges (aws#125)

flora-jin · web-flow · commit 8ecba7634619 · 2025-08-13T12:51:03.000-07:00
There was a bug where multi-label nodes would duplicate edges as the function findFromAndToLabels(edgeStructure)
(which looks at a specific edge type in the graph database to see what kinds of nodes it connects, creating a list of all the different from-to label pairs for that relationship) used nested for loops to go through all combinations of fromLabel and toLabel arrays. When nodes had multiple labels, this created duplicate edge directions since each label combination would generate a separate entry in edgeStructure.directions, even if the actual relationship between the node types was the same.

Also, the function getNodeNames() (which gets all the different node types in the graph database by querying for all nodes and their labels, then adds each label to the schema structure) generated duplicated nodes in the generated schema as the original code was processing node labels without checking for duplicates, causing the same node label to be added multiple times to schema.nodeStructures.

The fix creates a new empty set every time the function is called to store every processed edge or node in order to check that it has not been previously processed.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -116,4 +116,6 @@ permissions and limitations under the License.
   option ([#117](https://github.com/aws/amazon-neptune-for-graphql/pull/117))
 * Fixed invalid schema generation when AWS AppSync scalar types are used in 
   an input 
-  schema ([#118](https://github.com/aws/amazon-neptune-for-graphql/pull/118))
+  schema ([#118](https://github.com/aws/amazon-neptune-for-graphql/pull/118))
+* Fixed duplicated nodes and edges from nodes with 
+  multi-labels ([#125](https://github.com/aws/amazon-neptune-for-graphql/pull/125))
diff --git a/src/NeptuneSchema.js b/src/NeptuneSchema.js
@@ -148,10 +148,17 @@ async function getNodesNames() {
     loggerInfo('Getting nodes names');
 
     try {
+        const processedLabels = new Set(schema.nodeStructures.map(node => node.label));
+
         response.results.forEach(result => {
-            schema.nodeStructures.push({ label: result['labels(a)'][0], properties: []});
-            loggerDebug('Found Node: ' + yellow(result['labels(a)'][0]), {toConsole: true});
-        });        
+            result['labels(a)'].forEach(label => {
+                if (!processedLabels.has(label)) {
+                    schema.nodeStructures.push({ label: label, properties: []});
+                    processedLabels.add(label);
+                    loggerDebug('Found Node: ' + yellow(label), {toConsole: true});
+                }
+            });
+        });
     }
     catch (e)  {
         loggerError('No nodes found', e);
@@ -181,11 +188,20 @@ async function findFromAndToLabels(edgeStructure) {
     const query = `MATCH (from)-[r:${sanitize(edgeStructure.label)}]->(to) WITH from, to LIMIT $sample RETURN DISTINCT labels(from) as fromLabel, labels(to) as toLabel`;
     loggerDebug(`Retrieving incoming and outgoing labels for edge ${edgeStructure.label} with limit ${SAMPLE}`, {toConsole: true});
     const response = await queryNeptune(query, {sample: SAMPLE});
+    const existingDirections = new Set(
+        edgeStructure.directions.map(dir => `${dir.from}|${dir.to}`)
+    );
+
     for (let result of response.results) {
         for (let fromLabel of result.fromLabel) {
             for (let toLabel of result.toLabel) {
-                edgeStructure.directions.push({from:fromLabel, to:toLabel});
-                loggerDebug('Found edge: ' + yellow(edgeStructure.label) + '  direction: ' + yellow(fromLabel) + ' -> ' + yellow(toLabel), {toConsole: true});
+                const directionKey = `${fromLabel}|${toLabel}`;
+
+                if (!existingDirections.has(directionKey)) {
+                    edgeStructure.directions.push({from: fromLabel, to: toLabel});
+                    existingDirections.add(directionKey);
+                    loggerDebug('Found edge: ' + yellow(edgeStructure.label) + '  direction: ' + yellow(fromLabel) + ' -> ' + yellow(toLabel), {toConsole: true});
+                }
             }
         }
     }
diff --git a/src/test/NeptuneSchema.test.js b/src/test/NeptuneSchema.test.js
@@ -0,0 +1,171 @@
+import { jest } from '@jest/globals';
+import axios from 'axios';
+import { loggerInit } from '../logger.js';
+
+describe('Neptune Schema discovery tests', () => {
+    beforeAll(() => {
+        loggerInit('./src/test/output', true, 'fatal');
+    });
+
+    afterEach(() => {
+        jest.restoreAllMocks();
+    });
+
+    test('should handle nodes with multiple labels when querying edge to and from labels', async () => {
+        // Mock successful summary API response
+        jest.spyOn(axios, "get").mockResolvedValue({
+            data: {
+                payload: {
+                    graphSummary: {
+                        nodeLabels: ['Person', 'Employee', 'Manager', 'Company', 'Developer'],
+                        edgeLabels: ['WORKS_FOR', 'REPORTS_TO']
+                    }
+                }
+            }
+        });
+
+        // Mock HTTP responses for neptune queries
+        const mockResponses = [
+            { data: { results: [{ properties: { name: 'John' } }] } },
+            { data: { results: [{ properties: { employeeId: 'E123' } }] } },
+            { data: { results: [{ properties: { level: 'Senior' } }] } },
+            { data: { results: [{ properties: { companyName: 'TechCorp' } }] } },
+            { data: { results: [{ properties: { programmingLanguages: 'JavaScript' } }] } },
+            { data: { results: [{ properties: { startDate: '2020-01-01' } }] } },
+            { data: { results: [{ properties: { since: '2022-01-01' } }] } },
+            {
+                data: {
+                    results: [
+                        { fromLabel: ['Employee', 'Person'], toLabel: ['Company'] },
+                        { fromLabel: ['Manager', 'Employee'], toLabel: ['Company'] },
+                        { fromLabel: ['Developer', 'Employee'], toLabel: ['Company'] }
+                    ]
+                }
+            },
+            {
+                data: {
+                    results: [
+                        { fromLabel: ['Employee'], toLabel: ['Manager', 'Person'] },
+                        { fromLabel: ['Developer'], toLabel: ['Manager'] }
+                    ]
+                }
+            },
+            { data: { results: [] } },
+            { data: { results: [] } },
+            { data: { results: [] } },
+            { data: { results: [{ rels: 3 }] } }
+        ];
+        mockAxiosResponses(mockResponses);
+
+        // Import the module fresh for each test to avoid state sharing
+        const schema = await runGetNeptuneSchema();
+
+        // Validate that no duplicate node or edge structures created
+        expect(schema.nodeStructures).toHaveLength(5);
+        const nodeLabels = schema.nodeStructures.map(node => node.label).toSorted();
+        expect(nodeLabels).toEqual(['Company', 'Developer', 'Employee', 'Manager', 'Person']);
+
+        expect(schema.edgeStructures).toHaveLength(2);
+        const edgeLabels = schema.edgeStructures.map(edge => edge.label).sort();
+        expect(edgeLabels).toEqual(['REPORTS_TO', 'WORKS_FOR']);
+    });
+
+    test('should handle multi-label nodes from getNodesNames query without creating duplicates', async () => {
+        // Mock console.error to suppress expected error output
+        const consoleSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+        // Mock summary API to fail so we use direct getNodesNames queries
+        jest.spyOn(axios, "get").mockRejectedValue(new Error('Summary API not available'));
+
+        // Mock HTTP responses for the multi-label scenario using getNodesNames
+        const mockResponses = [
+            {
+                data: {
+                    results: [
+                        { 'labels(a)': ['Person', 'Employee'] },
+                        { 'labels(a)': ['Person', 'Manager'] },
+                        { 'labels(a)': ['Company'] },
+                        { 'labels(a)': ['Employee', 'Developer'] },
+                        { 'labels(a)': ['Person'] },
+                        { 'labels(a)': ['Manager', 'TeamLead'] }
+                    ]
+                }
+            },
+            {
+                data: {
+                    results: [
+                        { 'type(e)': 'WORKS_FOR' },
+                        { 'type(e)': 'REPORTS_TO' }
+                    ]
+                }
+            },
+            { data: { results: [{ properties: { name: 'John', age: 30 } }] } },
+            { data: { results: [{ properties: { employeeId: 'E123', department: 'IT' } }] } },
+            { data: { results: [{ properties: { level: 'Senior', yearsExperience: 5 } }] } },
+            { data: { results: [{ properties: { companyName: 'TechCorp', founded: 2010 } }] } },
+            { data: { results: [{ properties: { programmingLanguages: 'JavaScript', seniority: 'Mid' } }] } },
+            { data: { results: [{ properties: { teamSize: 8, responsibilities: 'Team coordination' } }] } },
+            { data: { results: [{ properties: { startDate: '2020-01-01', position: 'Software Engineer' } }] } },
+            { data: { results: [{ properties: { since: '2022-01-01', directReport: true } }] } },
+            {
+                data: {
+                    results: [
+                        { fromLabel: ['Employee', 'Person'], toLabel: ['Company'] }
+                    ]
+                }
+            },
+            {
+                data: {
+                    results: [
+                        { fromLabel: ['Employee'], toLabel: ['Manager', 'TeamLead'] }
+                    ]
+                }
+            },
+            { data: { results: [] } },
+            { data: { results: [] } },
+            { data: { results: [] } },
+            { data: { results: [{ rels: 3 }] } }
+        ];
+        mockAxiosResponses(mockResponses);
+
+        // Import the module fresh for each test to avoid state sharing
+       const schema = await runGetNeptuneSchema();
+
+        // Validate that no duplicate node or edge structures created
+        expect(schema.nodeStructures).toHaveLength(6);
+        const nodeLabels = schema.nodeStructures.map(node => node.label).sort();
+        expect(nodeLabels).toEqual(['Company', 'Developer', 'Employee', 'Manager', 'Person', 'TeamLead']);
+
+        expect(schema.edgeStructures).toHaveLength(2);
+        const edgeLabels = schema.edgeStructures.map(edge => edge.label).sort();
+        expect(edgeLabels).toEqual(['REPORTS_TO', 'WORKS_FOR']);
+    });
+});
+
+async function runGetNeptuneSchema() {
+    const {getNeptuneSchema, setGetNeptuneSchemaParameters} = await import(`../NeptuneSchema.js?t=${Date.now()}`);
+    setNeptuneSchemaParameters(setGetNeptuneSchemaParameters);
+    const result = await getNeptuneSchema();
+    return JSON.parse(result);
+}
+
+function setNeptuneSchemaParameters(setGetNeptuneSchemaParameters) {
+    setGetNeptuneSchemaParameters({
+        host: 'test-neptune.cluster-abc.us-west-2.neptune.amazonaws.com',
+        port: '8182',
+        region: 'us-west-2',
+        neptuneType: 'neptune-db',
+        graphName: 'test-graph',
+        domain: 'neptune.amazonaws.com'
+    });
+}
+
+function mockAxiosResponses(mockResponses = []) {
+    let responseIndex = 0;
+    jest.spyOn(axios, 'post').mockImplementation(() => {
+        if (responseIndex < mockResponses.length) {
+            return Promise.resolve(mockResponses[responseIndex++]);
+        }
+        return Promise.resolve({data: {results: []}});
+    });
+}