|
| 1 | +/** |
| 2 | + * MERGE operation examples demonstrating Cypher-like upsert patterns |
| 3 | + * for job search tracking use cases. |
| 4 | + */ |
| 5 | + |
| 6 | +import { GraphDatabase } from '../src'; |
| 7 | + |
| 8 | +const db = new GraphDatabase(':memory:'); |
| 9 | + |
| 10 | +// ============================================================================ |
| 11 | +// Setup: Create indexes for efficient merge operations |
| 12 | +// ============================================================================ |
| 13 | + |
| 14 | +console.log('Setting up indexes for merge operations...\n'); |
| 15 | + |
| 16 | +// Create indexes on properties used for matching |
| 17 | +db.createPropertyIndex('Job', 'url', true); // Unique: one job per URL |
| 18 | +db.createPropertyIndex('Company', 'name'); // Non-unique: companies can have same name |
| 19 | +db.createPropertyIndex('Skill', 'name', true); // Unique: one skill per name |
| 20 | + |
| 21 | +console.log('✓ Indexes created\n'); |
| 22 | + |
| 23 | +// ============================================================================ |
| 24 | +// Example 1: Simple Job Upsert |
| 25 | +// ============================================================================ |
| 26 | + |
| 27 | +console.log('Example 1: Simple Job Upsert'); |
| 28 | +console.log('================================\n'); |
| 29 | + |
| 30 | +// First run: Creates new job |
| 31 | +const job1 = db.mergeNode( |
| 32 | + 'Job', |
| 33 | + { url: 'https://example.com/job/123' }, // Match on URL |
| 34 | + { |
| 35 | + url: 'https://example.com/job/123', |
| 36 | + title: 'Senior AI Engineer', |
| 37 | + company: 'TechCorp', |
| 38 | + status: 'active' |
| 39 | + } |
| 40 | +); |
| 41 | + |
| 42 | +console.log(`First merge: Created=${job1.created}, ID=${job1.node.id}`); |
| 43 | + |
| 44 | +// Second run: Finds existing job (idempotent) |
| 45 | +const job2 = db.mergeNode('Job', { url: 'https://example.com/job/123' }, { |
| 46 | + url: 'https://example.com/job/123', |
| 47 | + title: 'Senior AI Engineer', |
| 48 | + company: 'TechCorp', |
| 49 | + status: 'active' |
| 50 | +}); |
| 51 | + |
| 52 | +console.log(`Second merge: Created=${job2.created}, ID=${job2.node.id}`); |
| 53 | +console.log(`Same node: ${job1.node.id === job2.node.id}\n`); |
| 54 | + |
| 55 | +// ============================================================================ |
| 56 | +// Example 2: ON CREATE / ON MATCH Tracking |
| 57 | +// ============================================================================ |
| 58 | + |
| 59 | +console.log('Example 2: ON CREATE / ON MATCH Tracking'); |
| 60 | +console.log('==========================================\n'); |
| 61 | + |
| 62 | +// Track when jobs are first discovered vs. last seen |
| 63 | +const job3 = db.mergeNode( |
| 64 | + 'Job', |
| 65 | + { url: 'https://example.com/job/456' }, |
| 66 | + { |
| 67 | + url: 'https://example.com/job/456', |
| 68 | + title: 'ML Engineer', |
| 69 | + status: 'active' |
| 70 | + }, |
| 71 | + { |
| 72 | + onCreate: { |
| 73 | + discoveredAt: new Date().toISOString(), |
| 74 | + applicationStatus: 'not_applied', |
| 75 | + viewCount: 0 |
| 76 | + }, |
| 77 | + onMatch: { |
| 78 | + lastSeenAt: new Date().toISOString() |
| 79 | + // viewCount would be incremented here in real app |
| 80 | + } |
| 81 | + } |
| 82 | +); |
| 83 | + |
| 84 | +console.log('First merge (CREATE):'); |
| 85 | +console.log(` Created: ${job3.created}`); |
| 86 | +console.log(` discoveredAt: ${job3.node.properties.discoveredAt}`); |
| 87 | +console.log(` applicationStatus: ${job3.node.properties.applicationStatus}`); |
| 88 | +console.log(` lastSeenAt: ${job3.node.properties.lastSeenAt || 'undefined'}\n`); |
| 89 | + |
| 90 | +// Wait a bit then merge again |
| 91 | +setTimeout(() => { |
| 92 | + const job4 = db.mergeNode( |
| 93 | + 'Job', |
| 94 | + { url: 'https://example.com/job/456' }, |
| 95 | + undefined, |
| 96 | + { |
| 97 | + onMatch: { |
| 98 | + lastSeenAt: new Date().toISOString(), |
| 99 | + viewCount: (job3.node.properties.viewCount || 0) + 1 |
| 100 | + } |
| 101 | + } |
| 102 | + ); |
| 103 | + |
| 104 | + console.log('Second merge (MATCH):'); |
| 105 | + console.log(` Created: ${job4.created}`); |
| 106 | + console.log(` discoveredAt: ${job4.node.properties.discoveredAt} (preserved)`); |
| 107 | + console.log(` lastSeenAt: ${job4.node.properties.lastSeenAt} (updated)`); |
| 108 | + console.log(` viewCount: ${job4.node.properties.viewCount} (incremented)\n`); |
| 109 | +}, 100); |
| 110 | + |
| 111 | +// ============================================================================ |
| 112 | +// Example 3: Company Deduplication |
| 113 | +// ============================================================================ |
| 114 | + |
| 115 | +console.log('\nExample 3: Company Deduplication'); |
| 116 | +console.log('==================================\n'); |
| 117 | + |
| 118 | +// Multiple job listings from same company should reuse company node |
| 119 | +const companies = ['TechCorp', 'TechCorp', 'StartupCo', 'TechCorp']; |
| 120 | + |
| 121 | +companies.forEach((companyName, i) => { |
| 122 | + const result = db.mergeNode( |
| 123 | + 'Company', |
| 124 | + { name: companyName }, |
| 125 | + { name: companyName, industry: 'Software' } |
| 126 | + ); |
| 127 | + console.log(`Job ${i + 1}: Company "${companyName}" → ${result.created ? 'CREATED' : 'MATCHED'} (ID: ${result.node.id})`); |
| 128 | +}); |
| 129 | + |
| 130 | +console.log( |
| 131 | + `\nTotal companies created: ${db.nodes('Company').exec().length} (should be 2, not 4)\n` |
| 132 | +); |
| 133 | + |
| 134 | +// ============================================================================ |
| 135 | +// Example 4: Relationship Merge (Unique Edges) |
| 136 | +// ============================================================================ |
| 137 | + |
| 138 | +console.log('Example 4: Relationship Merge'); |
| 139 | +console.log('===============================\n'); |
| 140 | + |
| 141 | +const engineerJob = db.createNode('Job', { title: 'Engineer', url: 'https://example.com/job/789' }); |
| 142 | +const techCorp = db.mergeNode('Company', { name: 'TechCorp' }, { name: 'TechCorp' }); |
| 143 | + |
| 144 | +// First merge: Creates relationship |
| 145 | +const edge1 = db.mergeEdge( |
| 146 | + engineerJob.id, |
| 147 | + 'POSTED_BY', |
| 148 | + techCorp.node.id, |
| 149 | + { source: 'web_scraper' }, |
| 150 | + { |
| 151 | + onCreate: { firstSeenAt: new Date().toISOString() }, |
| 152 | + onMatch: { lastVerifiedAt: new Date().toISOString() } |
| 153 | + } |
| 154 | +); |
| 155 | + |
| 156 | +console.log(`First edge merge: Created=${edge1.created}`); |
| 157 | +console.log(` firstSeenAt: ${edge1.edge.properties?.firstSeenAt}`); |
| 158 | +console.log(` lastVerifiedAt: ${edge1.edge.properties?.lastVerifiedAt || 'undefined'}`); |
| 159 | + |
| 160 | +// Second merge: Finds existing relationship |
| 161 | +const edge2 = db.mergeEdge( |
| 162 | + engineerJob.id, |
| 163 | + 'POSTED_BY', |
| 164 | + techCorp.node.id, |
| 165 | + undefined, |
| 166 | + { |
| 167 | + onMatch: { lastVerifiedAt: new Date().toISOString() } |
| 168 | + } |
| 169 | +); |
| 170 | + |
| 171 | +console.log(`\nSecond edge merge: Created=${edge2.created}`); |
| 172 | +console.log(` firstSeenAt: ${edge2.edge.properties?.firstSeenAt} (preserved)`); |
| 173 | +console.log(` lastVerifiedAt: ${edge2.edge.properties?.lastVerifiedAt} (updated)\n`); |
| 174 | + |
| 175 | +// ============================================================================ |
| 176 | +// Example 5: Bulk Import with Merge (Idempotent ETL) |
| 177 | +// ============================================================================ |
| 178 | + |
| 179 | +console.log('Example 5: Bulk Import with Merge'); |
| 180 | +console.log('===================================\n'); |
| 181 | + |
| 182 | +// Simulated daily job scraper data |
| 183 | +const scrapedJobs = [ |
| 184 | + { url: 'https://example.com/job/123', title: 'Senior AI Engineer', company: 'TechCorp' }, |
| 185 | + { url: 'https://example.com/job/456', title: 'ML Engineer', company: 'StartupCo' }, |
| 186 | + { url: 'https://example.com/job/789', title: 'Data Scientist', company: 'BigCorp' }, |
| 187 | + // Duplicates from previous run |
| 188 | + { url: 'https://example.com/job/123', title: 'Senior AI Engineer', company: 'TechCorp' } |
| 189 | +]; |
| 190 | + |
| 191 | +let created = 0; |
| 192 | +let matched = 0; |
| 193 | + |
| 194 | +db.transaction(() => { |
| 195 | + for (const jobData of scrapedJobs) { |
| 196 | + // Merge company |
| 197 | + const company = db.mergeNode( |
| 198 | + 'Company', |
| 199 | + { name: jobData.company }, |
| 200 | + { name: jobData.company } |
| 201 | + ); |
| 202 | + |
| 203 | + // Merge job with tracking |
| 204 | + const job = db.mergeNode( |
| 205 | + 'Job', |
| 206 | + { url: jobData.url }, |
| 207 | + { |
| 208 | + url: jobData.url, |
| 209 | + title: jobData.title, |
| 210 | + status: 'active' |
| 211 | + }, |
| 212 | + { |
| 213 | + onCreate: { |
| 214 | + discoveredAt: new Date().toISOString(), |
| 215 | + applicationStatus: 'not_applied' |
| 216 | + }, |
| 217 | + onMatch: { |
| 218 | + lastSeenAt: new Date().toISOString(), |
| 219 | + status: 'active' // Reactivate if was closed |
| 220 | + } |
| 221 | + } |
| 222 | + ); |
| 223 | + |
| 224 | + // Merge relationship |
| 225 | + db.mergeEdge(job.node.id, 'POSTED_BY', company.node.id); |
| 226 | + |
| 227 | + if (job.created) created++; |
| 228 | + else matched++; |
| 229 | + } |
| 230 | +}); |
| 231 | + |
| 232 | +console.log(`Processed ${scrapedJobs.length} scraped jobs:`); |
| 233 | +console.log(` ${created} new jobs created`); |
| 234 | +console.log(` ${matched} existing jobs updated`); |
| 235 | +console.log(` Total jobs in DB: ${db.nodes('Job').exec().length}\n`); |
| 236 | + |
| 237 | +// ============================================================================ |
| 238 | +// Example 6: Handling Merge Conflicts |
| 239 | +// ============================================================================ |
| 240 | + |
| 241 | +console.log('Example 6: Handling Merge Conflicts'); |
| 242 | +console.log('=====================================\n'); |
| 243 | + |
| 244 | +// Create ambiguous data (multiple companies with same partial info) |
| 245 | +db.createNode('Company', { industry: 'SaaS', name: 'Corp A', location: 'NYC' }); |
| 246 | +db.createNode('Company', { industry: 'SaaS', name: 'Corp B', location: 'SF' }); |
| 247 | + |
| 248 | +try { |
| 249 | + // This will fail: industry alone is not unique |
| 250 | + db.mergeNode('Company', { industry: 'SaaS' }, { industry: 'SaaS', size: 'Large' }); |
| 251 | +} catch (error: any) { |
| 252 | + console.log('❌ Merge conflict detected:'); |
| 253 | + console.log(` ${error.message}`); |
| 254 | + console.log(` Conflicting nodes: ${error.conflictingNodes?.length}`); |
| 255 | + console.log('\n Solution: Add more specific match criteria (e.g., name + industry)\n'); |
| 256 | +} |
| 257 | + |
| 258 | +// ============================================================================ |
| 259 | +// Example 7: Skills Graph with Merge |
| 260 | +// ============================================================================ |
| 261 | + |
| 262 | +console.log('Example 7: Skills Graph with Merge'); |
| 263 | +console.log('====================================\n'); |
| 264 | + |
| 265 | +const skillNames = ['Python', 'Machine Learning', 'Python', 'TensorFlow', 'Machine Learning']; |
| 266 | + |
| 267 | +skillNames.forEach((skillName) => { |
| 268 | + const result = db.mergeNode('Skill', { name: skillName }, { name: skillName, category: 'Technical' }); |
| 269 | + |
| 270 | + console.log(`Skill "${skillName}": ${result.created ? 'CREATED' : 'MATCHED'} (ID: ${result.node.id})`); |
| 271 | +}); |
| 272 | + |
| 273 | +console.log(`\nTotal unique skills: ${db.nodes('Skill').exec().length}\n`); |
| 274 | + |
| 275 | +// ============================================================================ |
| 276 | +// Performance Comparison |
| 277 | +// ============================================================================ |
| 278 | + |
| 279 | +console.log('Performance: Merge vs Manual Pattern'); |
| 280 | +console.log('======================================\n'); |
| 281 | + |
| 282 | +const iterations = 1000; |
| 283 | + |
| 284 | +// Manual pattern |
| 285 | +const manualStart = Date.now(); |
| 286 | +for (let i = 0; i < iterations; i++) { |
| 287 | + db.transaction(() => { |
| 288 | + const existing = db |
| 289 | + .nodes('TestNode') |
| 290 | + .where({ key: `test-${i % 100}` }) |
| 291 | + .limit(1) |
| 292 | + .exec()[0]; |
| 293 | + |
| 294 | + if (existing) { |
| 295 | + db.updateNode(existing.id, { updated: true }); |
| 296 | + } else { |
| 297 | + db.createNode('TestNode', { key: `test-${i % 100}`, created: true }); |
| 298 | + } |
| 299 | + }); |
| 300 | +} |
| 301 | +const manualTime = Date.now() - manualStart; |
| 302 | + |
| 303 | +// Clear for fair comparison |
| 304 | +db.nodes('TestNode').exec().forEach((node) => db.deleteNode(node.id)); |
| 305 | + |
| 306 | +// Create index first |
| 307 | +db.createPropertyIndex('TestNode', 'key'); |
| 308 | + |
| 309 | +// Merge pattern |
| 310 | +const mergeStart = Date.now(); |
| 311 | +for (let i = 0; i < iterations; i++) { |
| 312 | + db.mergeNode( |
| 313 | + 'TestNode', |
| 314 | + { key: `test-${i % 100}` }, |
| 315 | + { key: `test-${i % 100}`, created: true }, |
| 316 | + { onMatch: { updated: true }, warnOnMissingIndex: false } |
| 317 | + ); |
| 318 | +} |
| 319 | +const mergeTime = Date.now() - mergeStart; |
| 320 | + |
| 321 | +console.log(`Manual pattern: ${manualTime}ms`); |
| 322 | +console.log(`Merge pattern: ${mergeTime}ms`); |
| 323 | +console.log(`Speedup: ${(manualTime / mergeTime).toFixed(2)}x\n`); |
| 324 | + |
| 325 | +// ============================================================================ |
| 326 | +// Cleanup |
| 327 | +// ============================================================================ |
| 328 | + |
| 329 | +db.close(); |
| 330 | +console.log('✓ Examples complete'); |
0 commit comments