Skip to content

Commit 6497825

Browse files
Copilotmathiasrw
andauthored
Use subquery caching with correlation check to fix #2280 (#2281)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]> Co-authored-by: Mathias Wulff <[email protected]>
1 parent a99943d commit 6497825

File tree

4 files changed

+187
-3
lines changed

4 files changed

+187
-3
lines changed

src/38query.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ function queryfn(query, oldscope, cb, A, B) {
88
query.cb = cb;
99
query.oldscope = oldscope;
1010

11+
// Clear subquery cache from previous execution (used by IN/NOT IN optimization)
12+
query.subqueryCache = {};
13+
1114
// Run all subqueries before main statement
1215
if (query.queriesfn) {
1316
query.sourceslen += query.queriesfn.length;

src/40select.js

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,49 @@ yy.Select = class Select {
447447

448448
compileQueries(query) {
449449
if (!this.queries) return;
450-
query.queriesfn = this.queries.map(function (q) {
450+
451+
// Helper function to detect if a subquery might be correlated
452+
// A subquery is correlated if it references tables not in its own FROM clause
453+
const isCorrelated = (subquery, outerQuery) => {
454+
if (!subquery.from) return false;
455+
456+
// Get table names from subquery's FROM clause
457+
const subqueryTables = new Set();
458+
subquery.from.forEach(f => {
459+
if (f.tableid) subqueryTables.add(f.tableid);
460+
if (f.as) subqueryTables.add(f.as);
461+
});
462+
463+
// Check if WHERE clause references tables not in subquery's FROM
464+
const referencesExternal = node => {
465+
if (!node) return false;
466+
467+
// Check Column nodes for tableid using instanceof
468+
if (node instanceof yy.Column) {
469+
if (node.tableid && !subqueryTables.has(node.tableid)) {
470+
return true;
471+
}
472+
}
473+
474+
// Recursively check own properties only (not inherited)
475+
for (let key of Object.keys(node)) {
476+
if (node[key] && typeof node[key] === 'object') {
477+
if (referencesExternal(node[key])) return true;
478+
}
479+
}
480+
return false;
481+
};
482+
483+
return referencesExternal(subquery.where) || referencesExternal(subquery.columns);
484+
};
485+
486+
query.queriesfn = this.queries.map(function (q, idx) {
451487
var nq = q.compile(query.database.databaseid);
452488
nq.query.modifier = 'RECORDSET';
489+
490+
// Mark as correlated if it references external tables
491+
nq.query.isCorrelated = isCorrelated(q, query);
492+
453493
// If the nested query has its own queries, ensure they're compiled too
454494
// This handles nested subqueries properly
455495
if (q.queries && q.queries.length > 0) {

src/50expression.js

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,13 @@
367367
s = `(${this.op === 'NOT BETWEEN' ? '!' : ''}((${ref(this.right1)} <= ${left}) && (${left} <= ${ref(this.right2)})))`;
368368
} else if (this.op === 'IN') {
369369
if (this.right instanceof yy.Select) {
370-
s = `alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) > -1`;
370+
// Check if this is a correlated subquery (references outer tables)
371+
// If correlated, we cannot cache the results as they depend on the current row
372+
const cacheKey = `in${this.queriesidx}`;
373+
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
374+
const cachedLookup = `((this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
375+
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) > -1)`;
376+
s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
371377
} else if (Array.isArray(this.right)) {
372378
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
373379
// Leverage JS Set for faster lookups than arrays
@@ -385,7 +391,13 @@
385391
}
386392
} else if (this.op === 'NOT IN') {
387393
if (this.right instanceof yy.Select) {
388-
s = `alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, p)).indexOf(alasql.utils.getValueOf(${leftJS()})) < 0`;
394+
// Check if this is a correlated subquery (references outer tables)
395+
// If correlated, we cannot cache the results as they depend on the current row
396+
const cacheKey = `notIn${this.queriesidx}`;
397+
const checkCorrelated = `(this.queriesfn[${this.queriesidx}].query && this.queriesfn[${this.queriesidx}].query.isCorrelated)`;
398+
const cachedLookup = `(!(this.subqueryCache = this.subqueryCache || {}, this.subqueryCache.${cacheKey} || (this.subqueryCache.${cacheKey} = new Set(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).map(alasql.utils.getValueOf)))).has(alasql.utils.getValueOf(${leftJS()})))`;
399+
const uncachedLookup = `(alasql.utils.flatArray(this.queriesfn[${this.queriesidx}](params, null, ${context})).indexOf(alasql.utils.getValueOf(${leftJS()})) < 0)`;
400+
s = `(${checkCorrelated} ? ${uncachedLookup} : ${cachedLookup})`;
389401
} else if (Array.isArray(this.right)) {
390402
if (!alasql.options.cache || this.right.some(value => value instanceof yy.ParamValue)) {
391403
// Leverage JS Set for faster lookups than arrays

test/test2280.js

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
}
5+
6+
describe('Test 2280 - Subquery caching optimization', function () {
7+
const test = '2280';
8+
9+
before(function () {
10+
alasql('create database test' + test);
11+
alasql('use test' + test);
12+
});
13+
14+
after(function () {
15+
alasql('drop database test' + test);
16+
});
17+
18+
it('A) IN subquery with larger dataset works correctly', function () {
19+
alasql('CREATE TABLE users (id INT, name STRING)');
20+
alasql('CREATE TABLE orders (id INT, userId INT, amount NUMBER)');
21+
22+
// Insert test data
23+
for (let i = 1; i <= 100; i++) {
24+
alasql('INSERT INTO users VALUES (?, ?)', [i, 'User' + i]);
25+
}
26+
for (let i = 1; i <= 300; i++) {
27+
const userId = ((i - 1) % 100) + 1;
28+
const amount = 100 + ((i - 1) % 100) * 10;
29+
alasql('INSERT INTO orders VALUES (?, ?, ?)', [i, userId, amount]);
30+
}
31+
32+
// Users whose id appears in orders with amount > 800
33+
// amount > 800 means userId in {72, 73, ..., 100} (29 users, amounts 810-1090)
34+
const result = alasql(
35+
'SELECT * FROM users WHERE id IN (SELECT userId FROM orders WHERE amount > 800)'
36+
);
37+
38+
assert.strictEqual(result.length, 29);
39+
assert.ok(result.every(u => u.id >= 72));
40+
});
41+
42+
it('B) NOT IN subquery works correctly', function () {
43+
// Users whose id does NOT appear in orders with amount > 800
44+
const result = alasql(
45+
'SELECT * FROM users WHERE id NOT IN (SELECT userId FROM orders WHERE amount > 800)'
46+
);
47+
48+
assert.strictEqual(result.length, 71);
49+
assert.ok(result.every(u => u.id < 72));
50+
});
51+
52+
it('C) IN subquery with empty result works', function () {
53+
const result = alasql(
54+
'SELECT * FROM users WHERE id IN (SELECT userId FROM orders WHERE amount > 9999)'
55+
);
56+
assert.strictEqual(result.length, 0);
57+
});
58+
59+
it('D) NOT IN subquery with empty result returns all rows', function () {
60+
const result = alasql(
61+
'SELECT * FROM users WHERE id NOT IN (SELECT userId FROM orders WHERE amount > 9999)'
62+
);
63+
assert.strictEqual(result.length, 100);
64+
});
65+
66+
it('E) Subquery cache does not persist between separate query executions', function () {
67+
// Execute query once
68+
const result1 = alasql(
69+
'SELECT COUNT(*) as cnt FROM users WHERE id IN (SELECT userId FROM orders WHERE amount > 500)'
70+
);
71+
const count1 = result1[0].cnt;
72+
73+
// Add new data that would change the subquery result
74+
alasql('INSERT INTO orders VALUES (999, 1, 999)');
75+
76+
// Execute again - should reflect new data (cache should not persist)
77+
const result2 = alasql(
78+
'SELECT COUNT(*) as cnt FROM users WHERE id IN (SELECT userId FROM orders WHERE amount > 500)'
79+
);
80+
const count2 = result2[0].cnt;
81+
82+
// User 1 should now be in the result since we added an order with amount 999
83+
// Result should be one more than before
84+
assert.strictEqual(count2, count1 + 1);
85+
});
86+
87+
it('F) Subquery performance is optimized', function () {
88+
// This test verifies that the subquery is not re-executed for each row
89+
// by checking that the query completes in a reasonable time
90+
const start = Date.now();
91+
const iterations = 10;
92+
93+
for (let i = 0; i < iterations; i++) {
94+
alasql('SELECT * FROM users WHERE id IN (SELECT userId FROM orders WHERE amount > 500)');
95+
}
96+
97+
const elapsed = Date.now() - start;
98+
const avgTime = elapsed / iterations;
99+
100+
// With caching, each query should take less than 50ms on average
101+
// Without caching (re-executing subquery per row), it would take 100+ ms
102+
assert.ok(avgTime < 50, 'Query should complete quickly with caching (avg: ' + avgTime + 'ms)');
103+
});
104+
105+
it('G) Correlated subqueries are detected and not cached', function () {
106+
// Create test tables
107+
alasql('CREATE TABLE outer_table (id INT, val INT)');
108+
alasql('CREATE TABLE inner_table (outer_id INT, data INT)');
109+
110+
alasql('INSERT INTO outer_table VALUES (1, 100), (2, 200), (3, 300)');
111+
alasql('INSERT INTO inner_table VALUES (1, 10), (2, 20), (3, 30)');
112+
113+
// This subquery references outer_table.val, making it correlated
114+
const result = alasql(
115+
'SELECT * FROM outer_table WHERE id IN (SELECT outer_id FROM inner_table WHERE outer_table.val > 150)'
116+
);
117+
118+
// Should return rows with id 2 and 3 (val > 150)
119+
assert.strictEqual(result.length, 2);
120+
assert.ok(result.some(r => r.id === 2 && r.val === 200));
121+
assert.ok(result.some(r => r.id === 3 && r.val === 300));
122+
123+
// Verify the query was marked as correlated
124+
const compiled = alasql.compile(
125+
'SELECT * FROM outer_table WHERE id IN (SELECT outer_id FROM inner_table WHERE outer_table.val > 150)'
126+
);
127+
assert.strictEqual(compiled.query.queriesfn[0].query.isCorrelated, true);
128+
});
129+
});

0 commit comments

Comments
 (0)