Skip to content

Commit 5af89fb

Browse files
Copilotmathiasrw
andauthored
Fix aggregate functions (COUNT/MAX/MIN) for PARTITION BY to close #2408 (#2411)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]> Co-authored-by: Mathias Wulff <[email protected]>
1 parent c8e599a commit 5af89fb

File tree

4 files changed

+285
-41
lines changed

4 files changed

+285
-41
lines changed

src/40select.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ yy.Select = class Select {
186186
// For ROWNUM()
187187
query.rownums = [];
188188
query.grouprownums = [];
189+
query.windowaggrs = []; // For window aggregate functions (COUNT/MAX/MIN/SUM/AVG with OVER)
189190

190191
// Check if INTO OBJECT() is used - this affects how arrow expressions are compiled
191192
if (this.into instanceof yy.FuncValue && this.into.funcid.toUpperCase() === 'OBJECT') {
@@ -431,6 +432,76 @@ yy.Select = class Select {
431432
}
432433
}
433434

435+
// Handle window aggregate functions - COUNT/MAX/MIN/SUM/AVG with OVER (PARTITION BY ...)
436+
if (query.windowaggrs && query.windowaggrs.length > 0) {
437+
for (var j = 0, jlen = query.windowaggrs.length; j < jlen; j++) {
438+
var config = query.windowaggrs[j];
439+
var partitions = {};
440+
441+
// Group rows by partition
442+
for (var i = 0, ilen = res.length; i < ilen; i++) {
443+
var partitionKey =
444+
config.partitionColumns && config.partitionColumns.length > 0
445+
? config.partitionColumns
446+
.map(function (col) {
447+
return res[i][col];
448+
})
449+
.join('|')
450+
: '__all__';
451+
452+
if (!partitions[partitionKey]) partitions[partitionKey] = [];
453+
partitions[partitionKey].push(i);
454+
}
455+
456+
// Calculate and assign aggregate for each partition
457+
for (var partitionKey in partitions) {
458+
var rowIndices = partitions[partitionKey];
459+
var values = [];
460+
var colId = config.expression && config.expression.columnid;
461+
462+
// Collect values from partition rows
463+
if (config.aggregatorid !== 'COUNT' || (colId && colId !== '*')) {
464+
for (var k = 0; k < rowIndices.length; k++) {
465+
var val = res[rowIndices[k]][colId];
466+
if (val != null) values.push(val);
467+
}
468+
}
469+
470+
// Calculate aggregate
471+
var aggregateValue;
472+
switch (config.aggregatorid) {
473+
case 'COUNT':
474+
aggregateValue = colId && colId !== '*' ? values.length : rowIndices.length;
475+
break;
476+
case 'SUM':
477+
aggregateValue = values.reduce(function (sum, v) {
478+
return sum + v;
479+
}, 0);
480+
break;
481+
case 'AVG':
482+
aggregateValue =
483+
values.length > 0
484+
? values.reduce(function (sum, v) {
485+
return sum + v;
486+
}, 0) / values.length
487+
: null;
488+
break;
489+
case 'MAX':
490+
aggregateValue = values.length > 0 ? Math.max.apply(null, values) : null;
491+
break;
492+
case 'MIN':
493+
aggregateValue = values.length > 0 ? Math.min.apply(null, values) : null;
494+
break;
495+
}
496+
497+
// Assign aggregate value to all rows in partition
498+
for (var k = 0; k < rowIndices.length; k++) {
499+
res[rowIndices[k]][config.as] = aggregateValue;
500+
}
501+
}
502+
}
503+
}
504+
434505
var res2 = modify(query, res);
435506

436507
if (cb) {

src/424select.js

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
// return sources;
1111
// };
1212

13+
// Regular expression to match aggregate functions that require expression compilation
14+
var re_aggrWithExpression = /^(SUM|MAX|MIN|FIRST|LAST|AVG|ARRAY|REDUCE|TOTAL)$/;
15+
1316
function compileSelectStar(query, aliases, joinstar) {
1417
var sp = '',
1518
ss = [],
@@ -335,55 +338,44 @@ yy.Select.prototype.compileSelect1 = function (query, params) {
335338
}
336339
}
337340
} else if (col instanceof yy.AggrValue) {
338-
if (!self.group) {
339-
// self.group=[new yy.Column({columnid:'q',as:'q' })];
340-
self.group = [''];
341-
}
342-
if (!col.as) {
343-
col.as = escapeq(col.toString());
344-
}
341+
// Set alias if not provided
342+
if (!col.as) col.as = escapeq(col.toString());
343+
344+
// Check if this aggregate has an OVER clause (window function)
345+
if (col.over) {
346+
// Track window aggregate for post-processing
347+
query.windowaggrs.push({
348+
as: col.as,
349+
aggregatorid: col.aggregatorid,
350+
expression: col.expression,
351+
partitionColumns: col.over.partition
352+
? col.over.partition.map(function (p) {
353+
return p.columnid || p.toString();
354+
})
355+
: [],
356+
});
357+
} else {
358+
// Regular aggregate - trigger GROUP BY
359+
if (!self.group) self.group = [''];
345360

346-
if (
347-
col.aggregatorid === 'SUM' ||
348-
col.aggregatorid === 'MAX' ||
349-
col.aggregatorid === 'MIN' ||
350-
col.aggregatorid === 'FIRST' ||
351-
col.aggregatorid === 'LAST' ||
352-
col.aggregatorid === 'AVG' ||
353-
col.aggregatorid === 'ARRAY' ||
354-
col.aggregatorid === 'REDUCE' ||
355-
col.aggregatorid === 'TOTAL'
356-
) {
357-
ss.push(
358-
"'" +
359-
escapeq(col.as) +
360-
"':" +
361-
n2u(col.expression.toJS('p', query.defaultTableid, query.defcols))
362-
);
363-
} else if (col.aggregatorid === 'COUNT') {
364-
ss.push("'" + escapeq(col.as) + "':1");
365-
// Nothing
361+
if (re_aggrWithExpression.test(col.aggregatorid)) {
362+
ss.push(
363+
"'" +
364+
escapeq(col.as) +
365+
"':" +
366+
n2u(col.expression.toJS('p', query.defaultTableid, query.defcols))
367+
);
368+
} else if (col.aggregatorid === 'COUNT') {
369+
ss.push("'" + escapeq(col.as) + "':1");
370+
}
366371
}
367-
// todo: confirm that no default action must be implemented
368-
369-
// query.selectColumns[col.aggregatorid+'('+escapeq(col.expression.toString())+')'] = thtd;
370372

373+
// Add column definition for both window and regular aggregates
371374
var coldef = {
372375
columnid: col.as || col.columnid || col.toString(),
373-
// dbtypeid:tcol.dbtypeid,
374-
// dbsize:tcol.dbsize,
375-
// dbpecision:tcol.dbprecision,
376-
// dbenum: tcol.dbenum,
377376
};
378-
// console.log(2);
379377
query.columns.push(coldef);
380378
query.xcolumns[coldef.columnid] = coldef;
381-
382-
// else if (col.aggregatorid == 'MAX') {
383-
// ss.push((col.as || col.columnid)+':'+col.toJS("p.",query.defaultTableid))
384-
// } else if (col.aggregatorid == 'MIN') {
385-
// ss.push((col.as || col.columnid)+':'+col.toJS("p.",query.defaultTableid))
386-
// }
387379
} else {
388380
// console.log(203,col.as,col.columnid,col.toString());
389381
// Check if this is an arrow expression and we're outputting to OBJECT

src/50expression.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,12 @@
834834
}
835835

836836
findAggregator(query) {
837+
// Skip adding window aggregates (aggregates with OVER clause) to selectGroup
838+
// They will be handled separately as window functions
839+
if (this.over) {
840+
return;
841+
}
842+
837843
// Check if an identical aggregate already exists in selectGroup
838844
let existingAggr = query.selectGroup.find(agg => agg.toString() === this.toString());
839845

test/test2408.js

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
}
5+
6+
describe('Test 2362 - Window Aggregate Functions (COUNT/MAX/MIN/SUM/AVG) with PARTITION BY', function () {
7+
it('1. COUNT(*) OVER (PARTITION BY) should return per-row values', function () {
8+
var data = [
9+
{dept: 'Sales', emp: 'John', salary: 1000},
10+
{dept: 'Sales', emp: 'Jane', salary: 1200},
11+
{dept: 'IT', emp: 'Bob', salary: 1500},
12+
{dept: 'IT', emp: 'Alice', salary: 1600},
13+
];
14+
15+
var res = alasql(
16+
'SELECT dept, emp, COUNT(*) OVER (PARTITION BY dept) AS dept_count FROM ? ORDER BY dept, emp',
17+
[data]
18+
);
19+
20+
assert.deepEqual(res, [
21+
{dept: 'IT', emp: 'Alice', dept_count: 2},
22+
{dept: 'IT', emp: 'Bob', dept_count: 2},
23+
{dept: 'Sales', emp: 'Jane', dept_count: 2},
24+
{dept: 'Sales', emp: 'John', dept_count: 2},
25+
]);
26+
});
27+
28+
it('2. MAX() OVER (PARTITION BY) should return per-row values', function () {
29+
var data = [
30+
{dept: 'Sales', emp: 'John', salary: 1000},
31+
{dept: 'Sales', emp: 'Jane', salary: 1200},
32+
{dept: 'IT', emp: 'Bob', salary: 1500},
33+
{dept: 'IT', emp: 'Alice', salary: 1600},
34+
];
35+
36+
var res = alasql(
37+
'SELECT dept, emp, salary, MAX(salary) OVER (PARTITION BY dept) AS max_dept_salary FROM ? ORDER BY dept, emp',
38+
[data]
39+
);
40+
41+
assert.deepEqual(res, [
42+
{dept: 'IT', emp: 'Alice', salary: 1600, max_dept_salary: 1600},
43+
{dept: 'IT', emp: 'Bob', salary: 1500, max_dept_salary: 1600},
44+
{dept: 'Sales', emp: 'Jane', salary: 1200, max_dept_salary: 1200},
45+
{dept: 'Sales', emp: 'John', salary: 1000, max_dept_salary: 1200},
46+
]);
47+
});
48+
49+
it('3. MIN() OVER (PARTITION BY) should return per-row values', function () {
50+
var data = [
51+
{dept: 'Sales', emp: 'John', salary: 1000},
52+
{dept: 'Sales', emp: 'Jane', salary: 1200},
53+
{dept: 'IT', emp: 'Bob', salary: 1500},
54+
{dept: 'IT', emp: 'Alice', salary: 1600},
55+
];
56+
57+
var res = alasql(
58+
'SELECT dept, emp, salary, MIN(salary) OVER (PARTITION BY dept) AS min_dept_salary FROM ? ORDER BY dept, emp',
59+
[data]
60+
);
61+
62+
assert.deepEqual(res, [
63+
{dept: 'IT', emp: 'Alice', salary: 1600, min_dept_salary: 1500},
64+
{dept: 'IT', emp: 'Bob', salary: 1500, min_dept_salary: 1500},
65+
{dept: 'Sales', emp: 'Jane', salary: 1200, min_dept_salary: 1000},
66+
{dept: 'Sales', emp: 'John', salary: 1000, min_dept_salary: 1000},
67+
]);
68+
});
69+
70+
it('4. SUM() OVER (PARTITION BY) should return per-row values', function () {
71+
var data = [
72+
{dept: 'Sales', emp: 'John', salary: 1000},
73+
{dept: 'Sales', emp: 'Jane', salary: 1200},
74+
{dept: 'IT', emp: 'Bob', salary: 1500},
75+
{dept: 'IT', emp: 'Alice', salary: 1600},
76+
];
77+
78+
var res = alasql(
79+
'SELECT dept, emp, salary, SUM(salary) OVER (PARTITION BY dept) AS total_dept_salary FROM ? ORDER BY dept, emp',
80+
[data]
81+
);
82+
83+
assert.deepEqual(res, [
84+
{dept: 'IT', emp: 'Alice', salary: 1600, total_dept_salary: 3100},
85+
{dept: 'IT', emp: 'Bob', salary: 1500, total_dept_salary: 3100},
86+
{dept: 'Sales', emp: 'Jane', salary: 1200, total_dept_salary: 2200},
87+
{dept: 'Sales', emp: 'John', salary: 1000, total_dept_salary: 2200},
88+
]);
89+
});
90+
91+
it('5. AVG() OVER (PARTITION BY) should return per-row values', function () {
92+
var data = [
93+
{dept: 'Sales', emp: 'John', salary: 1000},
94+
{dept: 'Sales', emp: 'Jane', salary: 1200},
95+
{dept: 'IT', emp: 'Bob', salary: 1500},
96+
{dept: 'IT', emp: 'Alice', salary: 1600},
97+
];
98+
99+
var res = alasql(
100+
'SELECT dept, emp, salary, AVG(salary) OVER (PARTITION BY dept) AS avg_dept_salary FROM ? ORDER BY dept, emp',
101+
[data]
102+
);
103+
104+
assert.deepEqual(res, [
105+
{dept: 'IT', emp: 'Alice', salary: 1600, avg_dept_salary: 1550},
106+
{dept: 'IT', emp: 'Bob', salary: 1500, avg_dept_salary: 1550},
107+
{dept: 'Sales', emp: 'Jane', salary: 1200, avg_dept_salary: 1100},
108+
{dept: 'Sales', emp: 'John', salary: 1000, avg_dept_salary: 1100},
109+
]);
110+
});
111+
112+
it('6. Multiple window aggregates in same query', function () {
113+
var data = [
114+
{category: 'A', amount: 10},
115+
{category: 'A', amount: 20},
116+
{category: 'B', amount: 30},
117+
{category: 'B', amount: 40},
118+
];
119+
120+
var res = alasql(
121+
'SELECT category, amount, COUNT(*) OVER (PARTITION BY category) AS cnt, ' +
122+
'SUM(amount) OVER (PARTITION BY category) AS sum_amt, ' +
123+
'AVG(amount) OVER (PARTITION BY category) AS avg_amt ' +
124+
'FROM ? ORDER BY category, amount',
125+
[data]
126+
);
127+
128+
assert.deepEqual(res, [
129+
{category: 'A', amount: 10, cnt: 2, sum_amt: 30, avg_amt: 15},
130+
{category: 'A', amount: 20, cnt: 2, sum_amt: 30, avg_amt: 15},
131+
{category: 'B', amount: 30, cnt: 2, sum_amt: 70, avg_amt: 35},
132+
{category: 'B', amount: 40, cnt: 2, sum_amt: 70, avg_amt: 35},
133+
]);
134+
});
135+
136+
it('7. Window aggregate with multi-column PARTITION BY', function () {
137+
var data = [
138+
{dept: 'IT', team: 'A', score: 100},
139+
{dept: 'IT', team: 'A', score: 95},
140+
{dept: 'IT', team: 'B', score: 90},
141+
{dept: 'Sales', team: 'A', score: 85},
142+
];
143+
144+
var res = alasql(
145+
'SELECT dept, team, score, MAX(score) OVER (PARTITION BY dept, team) AS max_score FROM ? ORDER BY dept, team, score DESC',
146+
[data]
147+
);
148+
149+
assert.deepEqual(res, [
150+
{dept: 'IT', team: 'A', score: 100, max_score: 100},
151+
{dept: 'IT', team: 'A', score: 95, max_score: 100},
152+
{dept: 'IT', team: 'B', score: 90, max_score: 90},
153+
{dept: 'Sales', team: 'A', score: 85, max_score: 85},
154+
]);
155+
});
156+
157+
it('8. COUNT with expression in OVER clause', function () {
158+
var data = [
159+
{category: 'A', val: 10},
160+
{category: 'A', val: null},
161+
{category: 'B', val: 30},
162+
];
163+
164+
var res = alasql(
165+
'SELECT category, val, COUNT(val) OVER (PARTITION BY category) AS cnt FROM ? ORDER BY category',
166+
[data]
167+
);
168+
169+
assert.deepEqual(res, [
170+
{category: 'A', val: 10, cnt: 1},
171+
{category: 'A', val: null, cnt: 1},
172+
{category: 'B', val: 30, cnt: 1},
173+
]);
174+
});
175+
});

0 commit comments

Comments
 (0)