Skip to content

Commit 6126261

Browse files
committed
parse number values from RTF cells
1 parent efa36be commit 6126261

22 files changed

+345
-428
lines changed

Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,15 @@ $(TESTESMFMT): test-esm_%:
165165
FMTS=$* make test-esm
166166

167167
TESTDENOFMT=$(patsubst %,test-deno_%,$(FMT))
168-
.PHONY: $(TESTESMFMT)
168+
.PHONY: $(TESTDENOFMT)
169169
$(TESTDENOFMT): test-deno_%:
170170
FMTS=$* make test-deno
171171

172+
TESTDENOCPFMT=$(patsubst %,test-denocp_%,$(FMT))
173+
.PHONY: $(TESTDENOCPFMT)
174+
$(TESTDENOCPFMT): test-denocp_%:
175+
FMTS=$* make test-denocp
176+
172177
.PHONY: travis
173178
travis: ## Run test suite with minimal output
174179
mocha -R dot -t 30000

bits/45_rtf.js

Lines changed: 71 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,78 @@
1-
var RTF = /*#__PURE__*/(function() {
2-
function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
3-
switch(opts.type) {
4-
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
5-
case 'binary': return rtf_to_sheet_str(d, opts);
6-
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
7-
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
8-
}
9-
throw new Error("Unrecognized type " + opts.type);
1+
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
2+
switch(opts.type) {
3+
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
4+
case 'binary': return rtf_to_book_str(d, opts);
5+
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
6+
case 'array': return rtf_to_book_str(cc2str(d), opts);
107
}
8+
throw new Error("Unrecognized type " + opts.type);
9+
}
1110

12-
/* TODO: this is a stub */
13-
function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
14-
var o = opts || {};
15-
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
16-
17-
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
18-
if(!rows.length) throw new Error("RTF missing table");
19-
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
20-
rows.forEach(function(rowtf, R) {
21-
if(Array.isArray(ws)) ws[R] = [];
22-
var rtfre = /\\[\w\-]+\b/g;
23-
var last_index = 0;
24-
var res;
25-
var C = -1;
26-
var payload = [];
27-
while((res = rtfre.exec(rowtf))) {
28-
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
29-
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
30-
if(data.length) payload.push(data);
31-
switch(res[0]) {
32-
case "\\cell":
33-
++C;
34-
if(payload.length) {
35-
// TODO: value parsing, including codepage adjustments
36-
var cell = {v: payload.join(""), t:"s"};
37-
if(Array.isArray(ws)) ws[R][C] = cell;
38-
else ws[encode_cell({r:R, c:C})] = cell;
39-
}
40-
payload = [];
41-
break;
42-
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
43-
payload.push("\n");
44-
break;
45-
}
46-
last_index = rtfre.lastIndex;
47-
}
48-
if(C > range.e.c) range.e.c = C;
49-
});
50-
ws['!ref'] = encode_range(range);
51-
return ws;
52-
}
11+
/* TODO: RTF technically can store multiple tables, even if Excel does not */
12+
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
13+
var o = opts || {};
14+
var sname = o.sheet || "Sheet1";
15+
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
16+
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
17+
wb.Sheets[sname] = ws;
5318

54-
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); }
19+
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
20+
if(!rows.length) throw new Error("RTF missing table");
21+
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
22+
rows.forEach(function(rowtf, R) {
23+
if(Array.isArray(ws)) ws[R] = [];
24+
var rtfre = /\\[\w\-]+\b/g;
25+
var last_index = 0;
26+
var res;
27+
var C = -1;
28+
var payload = [];
29+
while((res = rtfre.exec(rowtf))) {
30+
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
31+
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
32+
if(data.length) payload.push(data);
33+
switch(res[0]) {
34+
case "\\cell":
35+
++C;
36+
if(payload.length) {
37+
// TODO: value parsing, including codepage adjustments
38+
var cell = {v: payload.join(""), t:"s"};
39+
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
40+
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
5541

56-
/* TODO: this is a stub */
57-
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
58-
var o = ["{\\rtf1\\ansi"];
59-
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
60-
var dense = Array.isArray(ws);
61-
for(var R = r.s.r; R <= r.e.r; ++R) {
62-
o.push("\\trowd\\trautofit1");
63-
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
64-
o.push("\\pard\\intbl");
65-
for(C = r.s.c; C <= r.e.c; ++C) {
66-
var coord = encode_cell({r:R,c:C});
67-
cell = dense ? (ws[R]||[])[C]: ws[coord];
68-
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
69-
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
70-
o.push("\\cell");
42+
if(Array.isArray(ws)) ws[R][C] = cell;
43+
else ws[encode_cell({r:R, c:C})] = cell;
44+
}
45+
payload = [];
46+
break;
47+
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
48+
payload.push("\n");
49+
break;
7150
}
72-
o.push("\\pard\\intbl\\row");
51+
last_index = rtfre.lastIndex;
7352
}
74-
return o.join("") + "}";
75-
}
53+
if(C > range.e.c) range.e.c = C;
54+
});
55+
ws['!ref'] = encode_range(range);
56+
return wb;
57+
}
7658

77-
return {
78-
to_workbook: rtf_to_workbook,
79-
to_sheet: rtf_to_sheet,
80-
from_sheet: sheet_to_rtf
81-
};
82-
})();
59+
/* TODO: standardize sheet names as titles for tables */
60+
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
61+
var o = ["{\\rtf1\\ansi"];
62+
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
63+
var dense = Array.isArray(ws);
64+
for(var R = r.s.r; R <= r.e.r; ++R) {
65+
o.push("\\trowd\\trautofit1");
66+
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
67+
o.push("\\pard\\intbl");
68+
for(C = r.s.c; C <= r.e.c; ++C) {
69+
var coord = encode_cell({r:R,c:C});
70+
cell = dense ? (ws[R]||[])[C]: ws[coord];
71+
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
72+
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
73+
o.push("\\cell");
74+
}
75+
o.push("\\pard\\intbl\\row");
76+
}
77+
return o.join("") + "}";
78+
}

bits/87_read.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
105105
}
106106
break;
107107
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
108-
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
108+
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break;
109109
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
110110
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
111111
case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break;

bits/88_write.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
142142
case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
143143
case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
144144
case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
145-
case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
145+
case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o);
146146
case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
147147
case 'fods': return write_string_type(write_ods(wb, o), o);
148148
case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);

demos/README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,37 +17,40 @@ can be installed with Bash on Windows or with `cygwin`.
1717

1818
### Included Demos
1919

20-
**Frameworks and APIs**
20+
**JavaScript APIs**
21+
- [`XMLHttpRequest and fetch`](xhr/)
22+
- [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard)
23+
- [`Typed Arrays and Math`](array/)
24+
25+
**Frameworks**
2126
- [`angularjs`](angular/)
2227
- [`angular and ionic`](angular2/)
2328
- [`knockout`](knockout/)
2429
- [`meteor`](meteor/)
2530
- [`react, react-native, next`](react/)
2631
- [`vue 2.x, weex, nuxt`](vue/)
27-
- [`XMLHttpRequest and fetch`](xhr/)
28-
- [`nodejs server`](server/)
29-
- [`databases and key/value stores`](database/)
30-
- [`typed arrays and math`](array/)
3132

3233
**Front-End UI Components**
3334
- [`canvas-datagrid`](datagrid/)
3435
- [`x-spreadsheet`](xspreadsheet/)
3536
- [`react-data-grid`](react/modify/)
36-
- [`vue3-table-light`](/vue/modify/)
37+
- [`vue3-table-light`](vue/modify/)
3738

3839
**Platforms and Integrations**
39-
- [`deno`](deno/)
40+
- [`NodeJS Server-Side Processing`](server/)
41+
- [`Deno`](deno/)
4042
- [`electron application`](electron/)
41-
- [`nw.js application`](nwjs/)
43+
- [`NW.js`](nwjs/)
4244
- [`Chrome / Chromium extensions`](chrome/)
4345
- [`Google Sheets API`](https://docs.sheetjs.com/docs/getting-started/demos/gsheet)
4446
- [`ExtendScript for Adobe Apps`](https://docs.sheetjs.com/docs/getting-started/demos/extendscript)
4547
- [`NetSuite SuiteScript`](https://docs.sheetjs.com/docs/getting-started/demos/netsuite)
4648
- [`SalesForce Lightning Web Components`](https://docs.sheetjs.com/docs/getting-started/demos/salesforce)
4749
- [`Excel JavaScript API`](https://docs.sheetjs.com/docs/getting-started/demos/excel)
48-
- [`Headless Browsers`](headless/)
50+
- [`Headless Automation`](https://docs.sheetjs.com/docs/getting-started/demos/headless)
4951
- [`Swift JSC and other engines`](altjs/)
5052
- [`"serverless" functions`](function/)
53+
- [`databases and key/value stores`](database/)
5154
- [`internet explorer`](oldie/)
5255

5356
**Bundlers and Tooling**

demos/headless/.eslintrc

Lines changed: 0 additions & 11 deletions
This file was deleted.

demos/headless/.gitignore

Lines changed: 0 additions & 1 deletion
This file was deleted.

demos/headless/README.md

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,8 @@
11
# Headless Browsers
22

3-
The library, eschewing unstable and nascent ECMAScript features, plays nicely
4-
with most headless browsers. This demo shows a few common headless scenarios.
5-
6-
NodeJS does not ship with its own layout engine. For advanced HTML exports, a
7-
headless browser is generally indistinguishable from a browser process.
8-
9-
## Chromium Automation with Puppeteer
10-
11-
[Puppeteer](https://pptr.dev/) enables headless Chromium automation.
12-
13-
[`html.js`](./html.js) shows a dedicated script for converting an HTML file to
14-
XLSB using puppeteer. The first argument is the path to the HTML file. The
15-
script writes to `output.xlsb`:
16-
17-
```bash
18-
# read from test.html and write to output.xlsb
19-
$ node html.js test.html
20-
```
21-
22-
The script pulls up the webpage using headless Chromium and adds a script tag
23-
reference to the standalone browser build. That will make the `XLSX` variable
24-
available to future scripts added in the page! The browser context is not able
25-
to save the file using `writeFile`, so the demo generates the XLSB spreadsheet
26-
bytes with the `base64` type, sends the string back to the main process, and
27-
uses `fs.writeFileSync` to write the file.
28-
29-
## WebKit Automation with PhantomJS
30-
31-
This was tested using [PhantomJS 2.1.1](https://phantomjs.org/download.html)
32-
33-
```bash
34-
$ phantomjs phantomjs.js
35-
```
36-
37-
The flow is similar to the Puppeteer flow (scrape table and generate workbook in
38-
website context, copy string back, write string to file from main process).
39-
40-
The `binary` type generates strings that can be written in PhantomJS using the
41-
`fs.write` method with mode `"wb"`.
42-
43-
## wkhtmltopdf
44-
45-
This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:
46-
47-
```bash
48-
$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/sheetjs/tests/ test.pdf
49-
```
3+
[The new demo](https://docs.sheetjs.com/docs/getting-started/demos/headless)
4+
has a more focused table export example as well as a demo script for Chromium
5+
automation with Puppeteer and multi-browser automation with Playwright.
506

517

528
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx)

demos/headless/html.js

Lines changed: 0 additions & 51 deletions
This file was deleted.

demos/headless/phantomjs.js

Lines changed: 0 additions & 35 deletions
This file was deleted.

0 commit comments

Comments
 (0)