Skip to content

Commit e4ad005

Browse files
Copilotmathiasrw
andauthored
Fix CSV importer to respect column-type to close #1181 (#2403)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]>
1 parent c4b46f3 commit e4ad005

File tree

5 files changed

+300
-35
lines changed

5 files changed

+300
-35
lines changed

src/15utility.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,5 +1347,64 @@ var getXLSX = function () {
13471347
return XLSX;
13481348
};
13491349

1350+
/**
1351+
* Type converter regex patterns
1352+
*/
1353+
var reTypeConverter = {
1354+
str: /string|char$|text/i,
1355+
int: /^int|int$/i,
1356+
num: /float|double|real|^num|decimal|money/i,
1357+
bool: /^bool/i,
1358+
date: /^date|^time/i,
1359+
};
1360+
1361+
/**
1362+
* Convert a value to the appropriate type based on column definition
1363+
* @param {*} value - The value to convert
1364+
* @param {string} dbtypeid - The database type (INT, FLOAT, STRING, etc.)
1365+
* @return {*} The converted value
1366+
*/
1367+
utils.typeConverter = function (value, dbtypeid) {
1368+
// If value is null or undefined, return as is
1369+
if (value === null || value === undefined) {
1370+
return value;
1371+
}
1372+
1373+
// If no type specified, try to auto-convert if it looks like a number
1374+
if (!dbtypeid) {
1375+
if (alasql.options.csvStringToNumber && typeof value === 'string' && value.length > 0) {
1376+
if (value == +value) {
1377+
return +value;
1378+
}
1379+
}
1380+
return value;
1381+
}
1382+
1383+
// Check type using regex patterns
1384+
if (reTypeConverter.str.test(dbtypeid)) {
1385+
return String(value);
1386+
}
1387+
if (reTypeConverter.int.test(dbtypeid)) {
1388+
var intVal = parseInt(value, 10);
1389+
return isNaN(intVal) ? value : intVal;
1390+
}
1391+
if (reTypeConverter.num.test(dbtypeid)) {
1392+
var numVal = parseFloat(value);
1393+
return isNaN(numVal) ? value : numVal;
1394+
}
1395+
if (reTypeConverter.bool.test(dbtypeid)) {
1396+
if (typeof value === 'string') {
1397+
return /^(true|1|yes)$/i.test(value);
1398+
}
1399+
return Boolean(value);
1400+
}
1401+
if (reTypeConverter.date.test(dbtypeid)) {
1402+
return value instanceof Date ? value : new Date(value);
1403+
}
1404+
1405+
// Unknown type, return as is
1406+
return value;
1407+
};
1408+
13501409
// set AlaSQl path
13511410
alasql.path = alasql.utils.findAlaSQLPath();

src/17alasql.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ alasql.options = {
131131

132132
/** Whether GETDATE() and NOW() return dates as string. If false, then a Date object is returned */
133133
dateAsString: true,
134+
135+
/** Automatically convert string values to numbers when reading from CSV files. Set to false to preserve string types */
136+
csvStringToNumber: true,
134137
};
135138

136139
//alasql.options.worker = false;

src/40select.js

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,19 @@ yy.Select = class Select {
301301
cb
302302
);`;
303303
} else {
304-
// Into AlaSQL tables
305-
query.intofns = `alasql
306-
.databases[${JSON.stringify(this.into.databaseid || databaseid)}]
307-
.tables[${JSON.stringify(this.into.tableid)}]
308-
.data.push(r);
309-
`;
304+
// Into AlaSQL tables - convert types based on column definitions
305+
var dbid = this.into.databaseid || databaseid;
306+
var tblid = this.into.tableid;
307+
query.intofns = `
308+
var db = alasql.databases[${JSON.stringify(dbid)}];
309+
var table = db.tables[${JSON.stringify(tblid)}];
310+
var converted = {};
311+
for (var key in r) {
312+
var colDef = table.xcolumns && table.xcolumns[key];
313+
converted[key] = alasql.utils.typeConverter(r[key], colDef ? colDef.dbtypeid : null);
314+
}
315+
table.data.push(converted);
316+
`;
310317
}
311318
} else if (this.into instanceof yy.VarValue) {
312319
//

src/84from.js

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -333,50 +333,24 @@ alasql.from.CSV = function (contents, opts, cb, idx, query) {
333333
var r = {};
334334
hs.forEach(function (h, idx) {
335335
r[h] = a[idx];
336-
// Please avoid === here
337-
if (
338-
!opt.raw &&
339-
typeof r[h] !== 'undefined' &&
340-
r[h].length !== 0 &&
341-
r[h].trim() == +r[h]
342-
) {
343-
// jshint ignore:line
344-
r[h] = +r[h];
345-
}
336+
// Keep as string - type conversion happens at INSERT time based on column definitions
346337
});
347338
rows.push(r);
348339
}
349340
} else {
350341
var r = {};
351342
hs.forEach(function (h, idx) {
352343
r[h] = a[idx];
353-
if (
354-
!opt.raw &&
355-
typeof r[h] !== 'undefined' &&
356-
r[h].length !== 0 &&
357-
r[h].trim() == +r[h]
358-
) {
359-
// jshint ignore:line
360-
r[h] = +r[h];
361-
}
344+
// Keep as string - type conversion happens at INSERT time based on column definitions
362345
});
363346
rows.push(r);
364347
}
365348
n++;
366349
} else {
367350
var r = {};
368-
// different bug here, if headers are not defined, the numerical values will not be parsed
351+
// Keep as string - type conversion happens at INSERT time based on column definitions
369352
a.forEach(function (v, idx) {
370353
r[idx] = a[idx];
371-
if (
372-
!opt.raw &&
373-
typeof r[idx] !== 'undefined' &&
374-
r[idx].length !== 0 &&
375-
r[idx].trim() == +r[idx]
376-
) {
377-
// jshint ignore:line
378-
r[idx] = +r[idx];
379-
}
380354
});
381355
rows.push(r);
382356
}

test/test-csv-string-type.js

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
var fs = require('fs');
5+
}
6+
7+
describe('Test CSV string type preservation and column type conversion', function () {
8+
const test = 'csvstringtype';
9+
10+
before(function () {
11+
alasql('create database test' + test);
12+
alasql('use test' + test);
13+
});
14+
15+
after(function () {
16+
alasql('drop database test' + test);
17+
});
18+
19+
it('A) CSV parser always keeps values as strings', function () {
20+
var csvData = '"117.20";"500"\n"88.33";"600"';
21+
var res = alasql('SELECT * FROM CSV(?, {separator:";", headers:false})', [csvData]);
22+
assert.deepEqual(res, [
23+
{0: '117.20', 1: '500'},
24+
{0: '88.33', 1: '600'},
25+
]);
26+
});
27+
28+
it('B) STRING type - preserves string values', function () {
29+
alasql('CREATE TABLE test_string (id STRING, name STRING)');
30+
var csvData = '"id";"name"\n"117.20";"test"\n"88.33";"item"';
31+
alasql('SELECT * INTO test_string FROM CSV(?, {separator:";"})', [csvData]);
32+
var res = alasql('SELECT * FROM test_string');
33+
assert.deepEqual(res, [
34+
{id: '117.20', name: 'test'},
35+
{id: '88.33', name: 'item'},
36+
]);
37+
alasql('DROP TABLE test_string');
38+
});
39+
40+
it('C) STRING type - converts numbers to strings', function () {
41+
alasql('CREATE TABLE test_str_num (code STRING, qty STRING)');
42+
var csvData = '"code";"qty"\n"123";"456"';
43+
alasql('SELECT * INTO test_str_num FROM CSV(?, {separator:";"})', [csvData]);
44+
var res = alasql('SELECT * FROM test_str_num');
45+
assert.deepEqual(res, [{code: '123', qty: '456'}]);
46+
alasql('DROP TABLE test_str_num');
47+
});
48+
49+
it('D) INT type - converts strings to integers', function () {
50+
alasql('CREATE TABLE test_int (id INT, qty INT)');
51+
var csvData = '"id";"qty"\n"123";"456"';
52+
alasql('SELECT * INTO test_int FROM CSV(?, {separator:";"})', [csvData]);
53+
var res = alasql('SELECT * FROM test_int');
54+
assert.deepEqual(res, [{id: 123, qty: 456}]);
55+
alasql('DROP TABLE test_int');
56+
});
57+
58+
it('E) INT type - truncates decimal values', function () {
59+
alasql('CREATE TABLE test_int_dec (amount INT)');
60+
var csvData = '"amount"\n"99.99"\n"123.45"';
61+
alasql('SELECT * INTO test_int_dec FROM CSV(?, {separator:";"})', [csvData]);
62+
var res = alasql('SELECT * FROM test_int_dec');
63+
assert.deepEqual(res, [{amount: 99}, {amount: 123}]);
64+
alasql('DROP TABLE test_int_dec');
65+
});
66+
67+
it('F) FLOAT type - preserves decimal precision', function () {
68+
alasql('CREATE TABLE test_float (price FLOAT, cost FLOAT)');
69+
var csvData = '"price";"cost"\n"99.99";"123.45"';
70+
alasql('SELECT * INTO test_float FROM CSV(?, {separator:";"})', [csvData]);
71+
var res = alasql('SELECT * FROM test_float');
72+
assert.deepEqual(res, [{price: 99.99, cost: 123.45}]);
73+
alasql('DROP TABLE test_float');
74+
});
75+
76+
it('G) FLOAT type - converts string decimals to numbers', function () {
77+
alasql('CREATE TABLE test_float_str (amount FLOAT)');
78+
var csvData = '"amount"\n"117.20"\n"88.33"';
79+
alasql('SELECT * INTO test_float_str FROM CSV(?, {separator:";"})', [csvData]);
80+
var res = alasql('SELECT * FROM test_float_str');
81+
assert.deepEqual(res, [{amount: 117.2}, {amount: 88.33}]);
82+
alasql('DROP TABLE test_float_str');
83+
});
84+
85+
it('H) BOOLEAN type - converts string true/false', function () {
86+
alasql('CREATE TABLE test_bool (active BOOLEAN, enabled BOOLEAN)');
87+
var csvData = '"active";"enabled"\n"true";"false"\n"1";"0"';
88+
alasql('SELECT * INTO test_bool FROM CSV(?, {separator:";"})', [csvData]);
89+
var res = alasql('SELECT * FROM test_bool');
90+
assert.deepEqual(res, [
91+
{active: true, enabled: false},
92+
{active: true, enabled: false},
93+
]);
94+
alasql('DROP TABLE test_bool');
95+
});
96+
97+
it('I) BOOLEAN type - handles yes/no strings', function () {
98+
alasql('CREATE TABLE test_bool_yn (flag BOOLEAN)');
99+
var csvData = '"flag"\n"yes"\n"no"';
100+
alasql('SELECT * INTO test_bool_yn FROM CSV(?, {separator:";"})', [csvData]);
101+
var res = alasql('SELECT * FROM test_bool_yn');
102+
assert.deepEqual(res, [{flag: true}, {flag: false}]);
103+
alasql('DROP TABLE test_bool_yn');
104+
});
105+
106+
it('J) DATE type - converts string dates', function () {
107+
alasql('CREATE TABLE test_date (created DATE)');
108+
var csvData = '"created"\n"2023-01-15"';
109+
alasql('SELECT * INTO test_date FROM CSV(?, {separator:";"})', [csvData]);
110+
var res = alasql('SELECT * FROM test_date');
111+
assert.deepEqual(res, [{created: new Date('2023-01-15')}]);
112+
alasql('DROP TABLE test_date');
113+
});
114+
115+
it('K) Mixed types - all conversions work together', function () {
116+
alasql('CREATE TABLE test_mixed (id STRING, qty INT, price FLOAT, active BOOLEAN)');
117+
var csvData = '"id";"qty";"price";"active"\n"117.20";"10";"99.99";"true"';
118+
alasql('SELECT * INTO test_mixed FROM CSV(?, {separator:";"})', [csvData]);
119+
var res = alasql('SELECT * FROM test_mixed');
120+
assert.deepEqual(res, [{id: '117.20', qty: 10, price: 99.99, active: true}]);
121+
alasql('DROP TABLE test_mixed');
122+
});
123+
124+
it('L) No column definitions with csvStringToNumber=true - auto-converts', function () {
125+
alasql.options.csvStringToNumber = true;
126+
alasql('CREATE TABLE test_nodef');
127+
var csvData = '"id";"amount"\n"117.20";"500"';
128+
alasql('SELECT * INTO test_nodef FROM CSV(?, {separator:";"})', [csvData]);
129+
var res = alasql('SELECT * FROM test_nodef');
130+
assert.deepEqual(res, [{id: 117.2, amount: 500}]);
131+
alasql('DROP TABLE test_nodef');
132+
});
133+
134+
it('M) No column definitions with csvStringToNumber=false - preserves strings', function () {
135+
alasql.options.csvStringToNumber = false;
136+
alasql('CREATE TABLE test_nodef2');
137+
var csvData = '"id";"amount"\n"117.20";"500"';
138+
alasql('SELECT * INTO test_nodef2 FROM CSV(?, {separator:";"})', [csvData]);
139+
var res = alasql('SELECT * FROM test_nodef2');
140+
assert.deepEqual(res, [{id: '117.20', amount: '500'}]);
141+
alasql('DROP TABLE test_nodef2');
142+
alasql.options.csvStringToNumber = true; // Restore default
143+
});
144+
145+
it('N) VARCHAR and CHAR types work like STRING', function () {
146+
alasql('CREATE TABLE test_varchar (name VARCHAR, code CHAR)');
147+
var csvData = '"name";"code"\n"123.45";"ABC"';
148+
alasql('SELECT * INTO test_varchar FROM CSV(?, {separator:";"})', [csvData]);
149+
var res = alasql('SELECT * FROM test_varchar');
150+
assert.deepEqual(res, [{name: '123.45', code: 'ABC'}]);
151+
alasql('DROP TABLE test_varchar');
152+
});
153+
154+
it('O) NULL and undefined values are preserved', function () {
155+
alasql('CREATE TABLE test_nulls (id STRING, qty INT)');
156+
var csvData = '"id";"qty"\n"A001";""';
157+
alasql('SELECT * INTO test_nulls FROM CSV(?, {separator:";"})', [csvData]);
158+
var res = alasql('SELECT * FROM test_nulls');
159+
// Empty string in CSV becomes empty string, parseInt returns NaN so original value is returned
160+
assert.deepEqual(res, [{id: 'A001', qty: ''}]);
161+
alasql('DROP TABLE test_nulls');
162+
});
163+
164+
it('P) TSV also respects column types', function () {
165+
alasql('CREATE TABLE test_tsv (id STRING, amount INT)');
166+
var tsvData = 'id\tamount\n117.20\t500';
167+
alasql('SELECT * INTO test_tsv FROM TSV(?)', [tsvData]);
168+
var res = alasql('SELECT * FROM test_tsv');
169+
assert.deepEqual(res, [{id: '117.20', amount: 500}]);
170+
alasql('DROP TABLE test_tsv');
171+
});
172+
173+
it('Q) Direct SELECT from CSV without INSERT returns strings', function () {
174+
var csvData = '"id";"name"\n"117.20";"test"';
175+
var res = alasql('SELECT * FROM CSV(?, {separator:";"})', [csvData]);
176+
assert.deepEqual(res, [{id: '117.20', name: 'test'}]);
177+
});
178+
179+
it('R) Unquoted CSV data - preserves strings when column is STRING', function () {
180+
alasql('CREATE TABLE test_unquoted (id STRING, amount INT)');
181+
var csvData = 'id;amount\n117.20;500\n88.33;600';
182+
alasql('SELECT * INTO test_unquoted FROM CSV(?, {separator:";"})', [csvData]);
183+
var res = alasql('SELECT * FROM test_unquoted');
184+
assert.deepEqual(res, [
185+
{id: '117.20', amount: 500},
186+
{id: '88.33', amount: 600},
187+
]);
188+
alasql('DROP TABLE test_unquoted');
189+
});
190+
191+
it('S) Unquoted CSV data - converts to numbers when column is INT', function () {
192+
alasql('CREATE TABLE test_unquoted_int (qty INT, price INT)');
193+
var csvData = 'qty;price\n123;456\n789;012';
194+
alasql('SELECT * INTO test_unquoted_int FROM CSV(?, {separator:";"})', [csvData]);
195+
var res = alasql('SELECT * FROM test_unquoted_int');
196+
assert.deepEqual(res, [
197+
{qty: 123, price: 456},
198+
{qty: 789, price: 12},
199+
]);
200+
alasql('DROP TABLE test_unquoted_int');
201+
});
202+
203+
it('T) Unquoted CSV data without column definitions - auto-converts with csvStringToNumber=true', function () {
204+
alasql.options.csvStringToNumber = true;
205+
alasql('CREATE TABLE test_unquoted_nodef');
206+
var csvData = 'id;amount\n117.20;500';
207+
alasql('SELECT * INTO test_unquoted_nodef FROM CSV(?, {separator:";"})', [csvData]);
208+
var res = alasql('SELECT * FROM test_unquoted_nodef');
209+
assert.deepEqual(res, [{id: 117.2, amount: 500}]);
210+
alasql('DROP TABLE test_unquoted_nodef');
211+
});
212+
213+
it('U) Unquoted CSV data with invalid numbers returns original value', function () {
214+
alasql('CREATE TABLE test_invalid (id STRING, code INT, amount FLOAT)');
215+
var csvData = 'id;code;amount\nABC;xyz;notanumber';
216+
alasql('SELECT * INTO test_invalid FROM CSV(?, {separator:";"})', [csvData]);
217+
var res = alasql('SELECT * FROM test_invalid');
218+
// Invalid conversions return original string value
219+
assert.deepEqual(res, [{id: 'ABC', code: 'xyz', amount: 'notanumber'}]);
220+
alasql('DROP TABLE test_invalid');
221+
});
222+
});

0 commit comments

Comments
 (0)