Skip to content

Commit ad1ce0d

Browse files
committed
parsers expose original book type
1 parent 6126261 commit ad1ce0d

28 files changed

+1014
-606
lines changed

bits/40_harb.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ {
235235
}
236236

237237
function dbf_to_workbook(buf, opts)/*:Workbook*/ {
238-
try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); }
238+
try {
239+
var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts);
240+
o.bookType = "dbf";
241+
return o;
242+
}
239243
catch(e) { if(opts && opts.WTF) throw e; }
240244
return ({SheetNames:[],Sheets:{}});
241245
}
@@ -546,6 +550,7 @@ var SYLK = /*#__PURE__*/(function() {
546550
keys(ws).forEach(function(k) { o[k] = ws[k]; });
547551
var outwb = sheet_to_workbook(o, opts);
548552
keys(wb).forEach(function(k) { outwb[k] = wb[k]; });
553+
outwb.bookType = "sylk";
549554
return outwb;
550555
}
551556

@@ -664,7 +669,11 @@ var DIF = /*#__PURE__*/(function() {
664669
}
665670

666671
function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); }
667-
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); }
672+
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
673+
var o = sheet_to_workbook(dif_to_sheet(str, opts), opts);
674+
o.bookType = "dif";
675+
return o;
676+
}
668677

669678
var sheet_to_dif = /*#__PURE__*/(function() {
670679
var push_field = function pf(o/*:Array<string>*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) {

bits/45_rtf.js

Lines changed: 94 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,97 @@
1-
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
2-
switch(opts.type) {
3-
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
4-
case 'binary': return rtf_to_book_str(d, opts);
5-
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
6-
case 'array': return rtf_to_book_str(cc2str(d), opts);
7-
}
8-
throw new Error("Unrecognized type " + opts.type);
1+
function rtf_to_sheet(d, opts) {
2+
switch (opts.type) {
3+
case "base64":
4+
return rtf_to_sheet_str(Base64_decode(d), opts);
5+
case "binary":
6+
return rtf_to_sheet_str(d, opts);
7+
case "buffer":
8+
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
9+
case "array":
10+
return rtf_to_sheet_str(cc2str(d), opts);
11+
}
12+
throw new Error("Unrecognized type " + opts.type);
913
}
10-
11-
/* TODO: RTF technically can store multiple tables, even if Excel does not */
12-
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
13-
var o = opts || {};
14-
var sname = o.sheet || "Sheet1";
15-
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
16-
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
17-
wb.Sheets[sname] = ws;
18-
19-
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
20-
if(!rows.length) throw new Error("RTF missing table");
21-
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
22-
rows.forEach(function(rowtf, R) {
23-
if(Array.isArray(ws)) ws[R] = [];
24-
var rtfre = /\\[\w\-]+\b/g;
25-
var last_index = 0;
26-
var res;
27-
var C = -1;
28-
var payload = [];
29-
while((res = rtfre.exec(rowtf))) {
30-
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
31-
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
32-
if(data.length) payload.push(data);
33-
switch(res[0]) {
34-
case "\\cell":
35-
++C;
36-
if(payload.length) {
37-
// TODO: value parsing, including codepage adjustments
38-
var cell = {v: payload.join(""), t:"s"};
39-
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
40-
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
41-
42-
if(Array.isArray(ws)) ws[R][C] = cell;
43-
else ws[encode_cell({r:R, c:C})] = cell;
44-
}
45-
payload = [];
46-
break;
47-
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
48-
payload.push("\n");
49-
break;
50-
}
51-
last_index = rtfre.lastIndex;
52-
}
53-
if(C > range.e.c) range.e.c = C;
54-
});
55-
ws['!ref'] = encode_range(range);
56-
return wb;
14+
function rtf_to_sheet_str(str, opts) {
15+
var o = opts || {};
16+
var ws = o.dense ? [] : {};
17+
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
18+
if (!rows)
19+
throw new Error("RTF missing table");
20+
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
21+
rows.forEach(function(rowtf, R) {
22+
if (Array.isArray(ws))
23+
ws[R] = [];
24+
var rtfre = /\\[\w\-]+\b/g;
25+
var last_index = 0;
26+
var res;
27+
var C = -1;
28+
var payload = [];
29+
while ((res = rtfre.exec(rowtf)) != null) {
30+
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
31+
if (data.charCodeAt(0) == 32)
32+
data = data.slice(1);
33+
if (data.length)
34+
payload.push(data);
35+
switch (res[0]) {
36+
case "\\cell":
37+
++C;
38+
if (payload.length) {
39+
var cell = { v: payload.join(""), t: "s" };
40+
if (cell.v == "TRUE" || cell.v == "FALSE") {
41+
cell.v = cell.v == "TRUE";
42+
cell.t = "b";
43+
} else if (!isNaN(fuzzynum(cell.v))) {
44+
cell.t = "n";
45+
if (o.cellText !== false)
46+
cell.w = cell.v;
47+
cell.v = fuzzynum(cell.v);
48+
}
49+
if (Array.isArray(ws))
50+
ws[R][C] = cell;
51+
else
52+
ws[encode_cell({ r: R, c: C })] = cell;
53+
}
54+
payload = [];
55+
break;
56+
case "\\par":
57+
payload.push("\n");
58+
break;
59+
}
60+
last_index = rtfre.lastIndex;
61+
}
62+
if (C > range.e.c)
63+
range.e.c = C;
64+
});
65+
ws["!ref"] = encode_range(range);
66+
return ws;
5767
}
58-
59-
/* TODO: standardize sheet names as titles for tables */
60-
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
61-
var o = ["{\\rtf1\\ansi"];
62-
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
63-
var dense = Array.isArray(ws);
64-
for(var R = r.s.r; R <= r.e.r; ++R) {
65-
o.push("\\trowd\\trautofit1");
66-
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
67-
o.push("\\pard\\intbl");
68-
for(C = r.s.c; C <= r.e.c; ++C) {
69-
var coord = encode_cell({r:R,c:C});
70-
cell = dense ? (ws[R]||[])[C]: ws[coord];
71-
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
72-
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
73-
o.push("\\cell");
74-
}
75-
o.push("\\pard\\intbl\\row");
76-
}
77-
return o.join("") + "}";
68+
function rtf_to_workbook(d, opts) {
69+
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
70+
wb.bookType = "rtf";
71+
return wb;
72+
}
73+
function sheet_to_rtf(ws, opts) {
74+
var o = ["{\\rtf1\\ansi"];
75+
if (!ws["!ref"])
76+
return o[0] + "}";
77+
var r = safe_decode_range(ws["!ref"]), cell;
78+
var dense = Array.isArray(ws);
79+
for (var R = r.s.r; R <= r.e.r; ++R) {
80+
o.push("\\trowd\\trautofit1");
81+
for (var C = r.s.c; C <= r.e.c; ++C)
82+
o.push("\\cellx" + (C + 1));
83+
o.push("\\pard\\intbl");
84+
for (C = r.s.c; C <= r.e.c; ++C) {
85+
var coord = encode_cell({ r: R, c: C });
86+
cell = dense ? (ws[R] || [])[C] : ws[coord];
87+
if (!cell || cell.v == null && (!cell.f || cell.F)) {
88+
o.push(" \\cell");
89+
continue;
90+
}
91+
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
92+
o.push("\\cell");
93+
}
94+
o.push("\\pard\\intbl\\row");
95+
}
96+
return o.join("") + "}";
7897
}

bits/75_xlml.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
919919
out.SSF = dup(table_fmt);
920920
out.Props = Props;
921921
out.Custprops = Custprops;
922+
out.bookType = "xlml";
922923
return out;
923924
}
924925

bits/79_html.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,14 @@ var HTML_END = '</body></html>';
9797
function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
9898
var mtch = str.match(/<table[\s\S]*?>[\s\S]*?<\/table>/gi);
9999
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
100-
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
100+
if(mtch.length == 1) {
101+
var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
102+
w.bookType = "html";
103+
return w;
104+
}
101105
var wb = book_new();
102106
mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
107+
wb.bookType = "html";
103108
return wb;
104109
}
105110

@@ -215,7 +220,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
215220
}
216221

217222
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
218-
return sheet_to_workbook(parse_dom_table(table, opts), opts);
223+
var o = sheet_to_workbook(parse_dom_table(table, opts), opts);
224+
//o.bookType = "dom"; // TODO: define a type for this
225+
return o;
219226
}
220227

221228
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {

bits/80_parseods.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -770,9 +770,12 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
770770
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
771771
var wb = parse_content_xml(utf8read(content), opts, Styles);
772772
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
773+
wb.bookType = "ods";
773774
return wb;
774775
}
775776
function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
776-
return parse_content_xml(data, opts);
777+
var wb = parse_content_xml(data, opts);
778+
wb.bookType = "fods";
779+
return wb;
777780
}
778781

bits/83_numbers.js

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ function parse_old_storage(buf, sst, rsst, v) {
396396
var ret;
397397
switch (buf[2]) {
398398
case 0:
399-
break;
399+
return void 0;
400400
case 2:
401401
ret = { t: "n", v: ieee };
402402
break;
@@ -456,7 +456,7 @@ function parse_new_storage(buf, sst, rsst) {
456456
var ret;
457457
switch (buf[1]) {
458458
case 0:
459-
break;
459+
return void 0;
460460
case 2:
461461
ret = { t: "n", v: d128 };
462462
break;
@@ -761,6 +761,7 @@ function parse_TN_DocumentArchive(M, root) {
761761
});
762762
if (out.SheetNames.length == 0)
763763
throw new Error("Empty NUMBERS file");
764+
out.bookType = "numbers";
764765
return out;
765766
}
766767
function parse_numbers_iwa(cfb) {
@@ -961,15 +962,21 @@ function write_numbers_iwa(wb, opts) {
961962
throw new Error("Too many messages");
962963
}
963964
var entry = CFB.find(cfb, dependents[1].location);
965+
if (!entry)
966+
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
964967
var x = parse_iwa_file(decompress_iwa_file(entry.content));
965968
var docroot;
966969
for (var xi = 0; xi < x.length; ++xi) {
967970
var packet = x[xi];
968971
if (packet.id == 1)
969972
docroot = packet;
970973
}
974+
if (docroot == null)
975+
throw "Could not find message ".concat(1, " in Numbers template");
971976
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
972977
entry = CFB.find(cfb, dependents[sheetrootref].location);
978+
if (!entry)
979+
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
973980
x = parse_iwa_file(decompress_iwa_file(entry.content));
974981
for (xi = 0; xi < x.length; ++xi) {
975982
packet = x[xi];
@@ -985,6 +992,8 @@ function write_numbers_iwa(wb, opts) {
985992
entry.size = entry.content.length;
986993
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
987994
entry = CFB.find(cfb, dependents[sheetrootref].location);
995+
if (!entry)
996+
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
988997
x = parse_iwa_file(decompress_iwa_file(entry.content));
989998
for (xi = 0; xi < x.length; ++xi) {
990999
packet = x[xi];
@@ -993,6 +1002,8 @@ function write_numbers_iwa(wb, opts) {
9931002
}
9941003
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
9951004
entry = CFB.find(cfb, dependents[sheetrootref].location);
1005+
if (!entry)
1006+
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
9961007
x = parse_iwa_file(decompress_iwa_file(entry.content));
9971008
for (xi = 0; xi < x.length; ++xi) {
9981009
packet = x[xi];
@@ -1005,6 +1016,8 @@ function write_numbers_iwa(wb, opts) {
10051016
pb[7][0].data = write_varint49(range.e.c + 1);
10061017
var cruidsref = parse_TSP_Reference(pb[46][0].data);
10071018
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
1019+
if (!oldbucket)
1020+
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
10081021
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
10091022
{
10101023
for (var j = 0; j < _x.length; ++j) {
@@ -1047,6 +1060,8 @@ function write_numbers_iwa(wb, opts) {
10471060
var row_headers = parse_shallow(store[1][0].data);
10481061
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
10491062
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
1063+
if (!oldbucket)
1064+
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
10501065
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
10511066
{
10521067
if (_x[0].id != row_header_ref)
@@ -1065,6 +1080,8 @@ function write_numbers_iwa(wb, opts) {
10651080
oldbucket.size = oldbucket.content.length;
10661081
var col_header_ref = parse_TSP_Reference(store[2][0].data);
10671082
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
1083+
if (!oldbucket)
1084+
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
10681085
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
10691086
{
10701087
if (_x[0].id != col_header_ref)
@@ -1109,13 +1126,17 @@ function write_numbers_iwa(wb, opts) {
11091126
var sstref = parse_TSP_Reference(store[4][0].data);
11101127
(function() {
11111128
var sentry = CFB.find(cfb, dependents[sstref].location);
1129+
if (!sentry)
1130+
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
11121131
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
11131132
var sstroot;
11141133
for (var sxi = 0; sxi < sx.length; ++sxi) {
11151134
var packet2 = sx[sxi];
11161135
if (packet2.id == sstref)
11171136
sstroot = packet2;
11181137
}
1138+
if (sstroot == null)
1139+
throw "Could not find message ".concat(sstref, " in Numbers template");
11191140
var sstdata = parse_shallow(sstroot.messages[0].data);
11201141
{
11211142
sstdata[3] = [];
@@ -1141,6 +1162,8 @@ function write_numbers_iwa(wb, opts) {
11411162
var tileref = parse_TSP_Reference(tl[2][0].data);
11421163
(function() {
11431164
var tentry = CFB.find(cfb, dependents[tileref].location);
1165+
if (!tentry)
1166+
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
11441167
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
11451168
var tileroot;
11461169
for (var sxi = 0; sxi < tx.length; ++sxi) {

bits/85_parsezip.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
247247
if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true);
248248
else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true);
249249
}
250+
// TODO: pass back content types metdata for xlsm/xlsx resolution
251+
out.bookType = xlsb ? "xlsb" : "xlsx";
250252
return out;
251253
}
252254

demos/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ can be installed with Bash on Windows or with `cygwin`.
2020
**JavaScript APIs**
2121
- [`XMLHttpRequest and fetch`](xhr/)
2222
- [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard)
23-
- [`Typed Arrays and Math`](array/)
23+
- [`Typed Arrays for Machine Learning`](https://docs.sheetjs.com/docs/getting-started/demos/ml)
2424

2525
**Frameworks**
2626
- [`angularjs`](angular/)

0 commit comments

Comments
 (0)