Skip to content

Commit 38ecf79

Browse files
Copilotmathiasrw
andauthored
Support import of multi-sheet XLSX to fix #848 (#2207)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]> Co-authored-by: Mathias Wulff <[email protected]>
1 parent 1298c4b commit 38ecf79

File tree

3 files changed

+235
-61
lines changed

3 files changed

+235
-61
lines changed

src/84from.js

Lines changed: 99 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,71 @@ function XLSXLSX(X, filename, opts, cb, idx, query) {
440440
return text;
441441
}
442442
}
443+
444+
function processSheet(workbook, sheetid, sheetOpt) {
445+
var range;
446+
var sheetRes = [];
447+
if (typeof sheetOpt.range === 'undefined') {
448+
range = workbook.Sheets[sheetid]['!ref'];
449+
} else {
450+
range = sheetOpt.range;
451+
if (workbook.Sheets[sheetid][range]) {
452+
range = workbook.Sheets[sheetid][range];
453+
}
454+
}
455+
// if range has some value then data is present in the current sheet
456+
// else current sheet is empty
457+
if (range) {
458+
var rg = range.split(':');
459+
var col0 = rg[0].match(/[A-Z]+/)[0];
460+
var row0 = +rg[0].match(/[0-9]+/)[0];
461+
var col1 = rg[1].match(/[A-Z]+/)[0];
462+
var row1 = +rg[1].match(/[0-9]+/)[0];
463+
464+
var hh = {};
465+
var xlscnCol0 = alasql.utils.xlscn(col0);
466+
var xlscnCol1 = alasql.utils.xlscn(col1);
467+
for (var j = xlscnCol0; j <= xlscnCol1; j++) {
468+
var col = alasql.utils.xlsnc(j);
469+
if (sheetOpt.headers) {
470+
if (workbook.Sheets[sheetid][col + '' + row0]) {
471+
hh[col] = getHeaderText(workbook.Sheets[sheetid][col + '' + row0].v);
472+
} else {
473+
hh[col] = getHeaderText(col);
474+
}
475+
} else {
476+
hh[col] = col;
477+
}
478+
}
479+
if (sheetOpt.headers) {
480+
row0++;
481+
}
482+
for (var i = row0; i <= row1; i++) {
483+
var row = {};
484+
for (var j = xlscnCol0; j <= xlscnCol1; j++) {
485+
var col = alasql.utils.xlsnc(j);
486+
if (workbook.Sheets[sheetid][col + '' + i]) {
487+
row[hh[col]] = workbook.Sheets[sheetid][col + '' + i].v;
488+
}
489+
}
490+
sheetRes.push(row);
491+
}
492+
} else {
493+
sheetRes.push([]);
494+
}
495+
496+
// Remove last empty line (issue #548)
497+
if (
498+
sheetRes.length > 0 &&
499+
sheetRes[sheetRes.length - 1] &&
500+
Object.keys(sheetRes[sheetRes.length - 1]).length == 0
501+
) {
502+
sheetRes.pop();
503+
}
504+
505+
return sheetRes;
506+
}
507+
443508
filename = alasql.utils.autoExtFilename(filename, 'xls', opts);
444509
alasql.utils.loadBinaryFile(
445510
filename,
@@ -460,71 +525,44 @@ function XLSXLSX(X, filename, opts, cb, idx, query) {
460525
...opts,
461526
});
462527
}
463-
// console.log(workbook);
464-
var sheetid;
465-
if (typeof opt.sheetid === 'undefined') {
466-
sheetid = workbook.SheetNames[0];
467-
} else if (typeof opt.sheetid === 'number') {
468-
sheetid = workbook.SheetNames[opt.sheetid];
469-
} else {
470-
sheetid = opt.sheetid;
471-
}
472-
var range;
473-
var res = [];
474-
if (typeof opt.range === 'undefined') {
475-
range = workbook.Sheets[sheetid]['!ref'];
476-
} else {
477-
range = opt.range;
478-
if (workbook.Sheets[sheetid][range]) {
479-
range = workbook.Sheets[sheetid][range];
480-
}
481-
}
482-
// if range has some value then data is present in the current sheet
483-
// else current sheet is empty
484-
if (range) {
485-
var rg = range.split(':');
486-
var col0 = rg[0].match(/[A-Z]+/)[0];
487-
var row0 = +rg[0].match(/[0-9]+/)[0];
488-
var col1 = rg[1].match(/[A-Z]+/)[0];
489-
var row1 = +rg[1].match(/[0-9]+/)[0];
490-
// console.log(114,rg,col0,col1,row0,row1);
491-
// console.log(114,rg,alasql.utils.xlscn(col0),alasql.utils.xlscn(col1));
492-
493-
var hh = {};
494-
var xlscnCol0 = alasql.utils.xlscn(col0);
495-
var xlscnCol1 = alasql.utils.xlscn(col1);
496-
for (var j = xlscnCol0; j <= xlscnCol1; j++) {
497-
var col = alasql.utils.xlsnc(j);
498-
if (opt.headers) {
499-
if (workbook.Sheets[sheetid][col + '' + row0]) {
500-
hh[col] = getHeaderText(workbook.Sheets[sheetid][col + '' + row0].v);
501-
} else {
502-
hh[col] = getHeaderText(col);
503-
}
504-
} else {
505-
hh[col] = col;
506-
}
507-
}
508-
if (opt.headers) {
509-
row0++;
510-
}
511-
for (var i = row0; i <= row1; i++) {
512-
var row = {};
513-
for (var j = xlscnCol0; j <= xlscnCol1; j++) {
514-
var col = alasql.utils.xlsnc(j);
515-
if (workbook.Sheets[sheetid][col + '' + i]) {
516-
row[hh[col]] = workbook.Sheets[sheetid][col + '' + i].v;
528+
529+
// Check if we should process multiple sheets
530+
var shouldProcessMultipleSheets =
531+
opt.sheetid === '*' || (Array.isArray(opt.sheetid) && opt.sheetid.length > 0);
532+
533+
if (shouldProcessMultipleSheets) {
534+
// Process multiple sheets and combine into a single array
535+
res = [];
536+
var sheetsToProcess = opt.sheetid === '*' ? workbook.SheetNames : opt.sheetid;
537+
538+
for (var s = 0; s < sheetsToProcess.length; s++) {
539+
var currentSheetId =
540+
opt.sheetid === '*'
541+
? sheetsToProcess[s]
542+
: typeof sheetsToProcess[s] === 'number'
543+
? workbook.SheetNames[sheetsToProcess[s]]
544+
: sheetsToProcess[s];
545+
546+
if (workbook.Sheets[currentSheetId]) {
547+
var sheetData = processSheet(workbook, currentSheetId, opt);
548+
// Add sheet name to each row
549+
for (var r = 0; r < sheetData.length; r++) {
550+
sheetData[r]._sheet = currentSheetId;
517551
}
552+
res = res.concat(sheetData);
518553
}
519-
res.push(row);
520554
}
521555
} else {
522-
res.push([]);
523-
}
524-
525-
// Remove last empty line (issue #548)
526-
if (res.length > 0 && res[res.length - 1] && Object.keys(res[res.length - 1]).length == 0) {
527-
res.pop();
556+
// Process single sheet (original behavior)
557+
var sheetid;
558+
if (typeof opt.sheetid === 'undefined') {
559+
sheetid = workbook.SheetNames[0];
560+
} else if (typeof opt.sheetid === 'number') {
561+
sheetid = workbook.SheetNames[opt.sheetid];
562+
} else {
563+
sheetid = opt.sheetid;
564+
}
565+
res = processSheet(workbook, sheetid, opt);
528566
}
529567

530568
if (cb) {

test/test848.js

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
var path = require('path');
5+
var dirname = path.normalize(__dirname) + '/';
6+
} else {
7+
var dirname = './';
8+
}
9+
10+
describe('Test 848 - Multi-sheet XLSX import', function () {
11+
it('1. Import all sheets using sheetid: "*"', function (done) {
12+
alasql(
13+
'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"*"})',
14+
[],
15+
function (data) {
16+
// Should return a flat array with _sheet property on each row
17+
assert(Array.isArray(data));
18+
assert(data.length > 0);
19+
// Check that _sheet property exists
20+
assert(data[0]._sheet);
21+
// Should have rows from both sheets
22+
var sheet1Rows = data.filter(function (row) {
23+
return row._sheet === 'Sheet1';
24+
});
25+
var sheet2Rows = data.filter(function (row) {
26+
return row._sheet === 'Sheet2';
27+
});
28+
assert(sheet1Rows.length === 3);
29+
assert(sheet2Rows.length === 4);
30+
done();
31+
}
32+
);
33+
});
34+
35+
it('2. Import multiple specific sheets using sheetid array', function (done) {
36+
alasql(
37+
'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:?})',
38+
[['Sheet1', 'Sheet2']],
39+
function (data) {
40+
// Should return a flat array with _sheet property
41+
assert(Array.isArray(data));
42+
assert(data.length === 7); // 3 + 4 rows
43+
assert(data[0]._sheet);
44+
done();
45+
}
46+
);
47+
});
48+
49+
it('3. Import specific sheets by index using sheetid array', function (done) {
50+
alasql(
51+
'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:?})',
52+
[[0, 1]],
53+
function (data) {
54+
// Should return a flat array with _sheet property
55+
assert(Array.isArray(data));
56+
assert(data.length === 7); // 3 + 4 rows
57+
assert(data[0]._sheet === 'Sheet1');
58+
done();
59+
}
60+
);
61+
});
62+
63+
it('4. Original single sheet behavior should still work', function (done) {
64+
alasql(
65+
'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true})',
66+
[],
67+
function (data) {
68+
// Should return an array (original behavior)
69+
assert(Array.isArray(data));
70+
assert(data.length === 3);
71+
// Should not have _sheet property
72+
assert(!data[0]._sheet);
73+
done();
74+
}
75+
);
76+
});
77+
78+
it('5. Original single sheet with explicit sheetid should still work', function (done) {
79+
alasql(
80+
'select * from xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"Sheet2"})',
81+
[],
82+
function (data) {
83+
// Should return an array (original behavior)
84+
assert(Array.isArray(data));
85+
assert(data.length === 4);
86+
assert(data[3].five === 800);
87+
// Should not have _sheet property
88+
assert(!data[0]._sheet);
89+
done();
90+
}
91+
);
92+
});
93+
94+
it('6. Query rows from specific sheet using WHERE clause', function (done) {
95+
alasql(
96+
'select * from xlsx("' +
97+
dirname +
98+
'test848.xlsx", {headers:true, sheetid:"*"}) WHERE _sheet = "Sheet2"',
99+
[],
100+
function (data) {
101+
// Should only return rows from Sheet2
102+
assert(Array.isArray(data));
103+
assert(data.length === 4);
104+
assert(
105+
data.every(function (row) {
106+
return row._sheet === 'Sheet2';
107+
})
108+
);
109+
done();
110+
}
111+
);
112+
});
113+
114+
it('7. Count rows per sheet', function (done) {
115+
// First get the data, then query it
116+
alasql(
117+
'SELECT * FROM xlsx("' + dirname + 'test848.xlsx", {headers:true, sheetid:"*"})',
118+
[],
119+
function (allData) {
120+
// Now count using a separate query
121+
var counts = alasql('SELECT [_sheet], COUNT(*) FROM ? GROUP BY [_sheet]', [allData]);
122+
assert(Array.isArray(counts));
123+
assert(counts.length === 2);
124+
var sheet1 = counts.find(function (row) {
125+
return row._sheet === 'Sheet1';
126+
});
127+
var sheet2 = counts.find(function (row) {
128+
return row._sheet === 'Sheet2';
129+
});
130+
assert(sheet1['COUNT(*)'] === 3);
131+
assert(sheet2['COUNT(*)'] === 4);
132+
done();
133+
}
134+
);
135+
});
136+
});

test/test848.xlsx

37.5 KB
Binary file not shown.

0 commit comments

Comments
 (0)