Skip to content

Commit 08f5678

Browse files
committed
ODS read/write number formats
1 parent 4cc0412 commit 08f5678

File tree

2 files changed

+432
-35
lines changed

2 files changed

+432
-35
lines changed

bits/80_parseods.js

Lines changed: 249 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,236 @@ var number_formats_ods = {
2727
quarter: ["\\Qm", "m\\\"th quarter\""]
2828
};
2929

30+
/* Note: ODS can stick styles in content.xml or styles.xml, FODS blurs lines */
31+
function parse_ods_styles(d/*:string*/, _opts, _nfm) {
32+
var number_format_map = _nfm || {};
33+
var str = xlml_normalize(d);
34+
xlmlregex.lastIndex = 0;
35+
str = str.replace(/<!--([\s\S]*?)-->/mg,"").replace(/<!DOCTYPE[^\[]*\[[^\]]*\]>/gm,"");
36+
var Rn, NFtag, NF = "", tNF = "", y, etpos = 0, tidx = -1, infmt = false, payload = "";
37+
while((Rn = xlmlregex.exec(str))) {
38+
switch((Rn[3]=Rn[3].replace(/_.*$/,""))) {
39+
/* Number Format Definitions */
40+
case 'number-style': // <number:number-style> 16.29.2
41+
case 'currency-style': // <number:currency-style> 16.29.8
42+
case 'percentage-style': // <number:percentage-style> 16.29.10
43+
case 'date-style': // <number:date-style> 16.29.11
44+
case 'time-style': // <number:time-style> 16.29.19
45+
case 'text-style': // <number:text-style> 16.29.26
46+
if(Rn[1]==='/') {
47+
infmt = false;
48+
if(NFtag['truncate-on-overflow'] == "false") {
49+
if(NF.match(/h/)) NF = NF.replace(/h+/, "[$&]");
50+
else if(NF.match(/m/)) NF = NF.replace(/m+/, "[$&]");
51+
else if(NF.match(/s/)) NF = NF.replace(/s+/, "[$&]");
52+
}
53+
number_format_map[NFtag.name] = NF;
54+
NF = "";
55+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
56+
infmt = true;
57+
NF = "";
58+
NFtag = parsexmltag(Rn[0], false);
59+
} break;
60+
61+
// LibreOffice bug https://bugs.documentfoundation.org/show_bug.cgi?id=149484
62+
case 'boolean-style': // <number:boolean-style> 16.29.24
63+
if(Rn[1]==='/') {
64+
infmt = false;
65+
number_format_map[NFtag.name] = "General";
66+
NF = "";
67+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
68+
infmt = true;
69+
NF = "";
70+
NFtag = parsexmltag(Rn[0], false);
71+
} break;
72+
73+
/* Number Format Elements */
74+
case 'boolean': // <number:boolean> 16.29.25
75+
NF += "General"; // ODF spec is unfortunately underspecified here
76+
break;
77+
78+
case 'text': // <number:text> 16.29.27
79+
if(Rn[1]==='/') {
80+
payload = str.slice(tidx, xlmlregex.lastIndex - Rn[0].length);
81+
// NOTE: Excel has a different interpretation of "%%" and friends
82+
if(payload == "%" && NFtag[0] == '<number:percentage-style') NF += "%";
83+
else NF += '"' + payload.replace(/"/g, '""') + '"';
84+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
85+
tidx = xlmlregex.lastIndex;
86+
} break;
87+
88+
89+
case 'day': { // <number:day> 16.29.12
90+
y = parsexmltag(Rn[0], false);
91+
switch(y["style"]) {
92+
case "short": NF += "d"; break;
93+
case "long": NF += "dd"; break;
94+
default: NF += "dd"; break; // TODO: error condition
95+
}
96+
} break;
97+
98+
case 'day-of-week': { // <number:day-of-week> 16.29.16
99+
y = parsexmltag(Rn[0], false);
100+
switch(y["style"]) {
101+
case "short": NF += "ddd"; break;
102+
case "long": NF += "dddd"; break;
103+
default: NF += "ddd"; break;
104+
}
105+
} break;
106+
107+
case 'era': { // <number:era> 16.29.15 TODO: proper mapping
108+
y = parsexmltag(Rn[0], false);
109+
switch(y["style"]) {
110+
case "short": NF += "ee"; break;
111+
case "long": NF += "eeee"; break;
112+
default: NF += "eeee"; break; // TODO: error condition
113+
}
114+
} break;
115+
116+
case 'hours': { // <number:hours> 16.29.20
117+
y = parsexmltag(Rn[0], false);
118+
switch(y["style"]) {
119+
case "short": NF += "h"; break;
120+
case "long": NF += "hh"; break;
121+
default: NF += "hh"; break; // TODO: error condition
122+
}
123+
} break;
124+
125+
case 'minutes': { // <number:minutes> 16.29.21
126+
y = parsexmltag(Rn[0], false);
127+
switch(y["style"]) {
128+
case "short": NF += "m"; break;
129+
case "long": NF += "mm"; break;
130+
default: NF += "mm"; break; // TODO: error condition
131+
}
132+
} break;
133+
134+
case 'month': { // <number:month> 16.29.13
135+
y = parsexmltag(Rn[0], false);
136+
if(y["textual"]) NF += "mm";
137+
switch(y["style"]) {
138+
case "short": NF += "m"; break;
139+
case "long": NF += "mm"; break;
140+
default: NF += "m"; break;
141+
}
142+
} break;
143+
144+
case 'seconds': { // <number:seconds> 16.29.22
145+
y = parsexmltag(Rn[0], false);
146+
switch(y["style"]) {
147+
case "short": NF += "s"; break;
148+
case "long": NF += "ss"; break;
149+
default: NF += "ss"; break; // TODO: error condition
150+
}
151+
if(y["decimal-places"]) NF += "." + fill("0", +y["decimal-places"]);
152+
} break;
153+
154+
case 'year': { // <number:year> 16.29.14
155+
y = parsexmltag(Rn[0], false);
156+
switch(y["style"]) {
157+
case "short": NF += "yy"; break;
158+
case "long": NF += "yyyy"; break;
159+
default: NF += "yy"; break; // TODO: error condition
160+
}
161+
} break;
162+
163+
case 'am-pm': // <number:am-pm> 16.29.23
164+
NF += "AM/PM"; // LO autocorrects A/P -> AM/PM
165+
break;
166+
167+
case 'week-of-year': // <number:week-of-year> 16.29.17
168+
case 'quarter': // <number:quarter> 16.29.18
169+
console.error("Excel does not support ODS format token " + Rn[3]);
170+
break;
171+
172+
case 'fill-character': // <number:fill-character> 16.29.5
173+
if(Rn[1]==='/') {
174+
payload = str.slice(tidx, xlmlregex.lastIndex - Rn[0].length);
175+
// NOTE: Excel has a different interpretation of "%%" and friends
176+
NF += '"' + payload.replace(/"/g, '""') + '"*';
177+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
178+
tidx = xlmlregex.lastIndex;
179+
} break;
180+
181+
case 'scientific-number': // <number:scientific-number> 16.29.6
182+
// TODO: find a mapping for all parameters
183+
y = parsexmltag(Rn[0], false);
184+
NF += "0." + fill("0", +y["min-decimal-places"] || +y["decimal-places"] || 2) + fill("?", +y["decimal-places"] - +y["min-decimal-places"] || 0) + "E" + (parsexmlbool(y["forced-exponent-sign"]) ? "+" : "") + fill("0", +y["min-exponent-digits"] || 2);
185+
break;
186+
187+
case 'fraction': // <number:fraction> 16.29.7
188+
// TODO: find a mapping for all parameters
189+
y = parsexmltag(Rn[0], false);
190+
if(!+y["min-integer-digits"]) NF += "#";
191+
else NF += fill("0", +y["min-integer-digits"]);
192+
NF += " ";
193+
NF += fill("?", +y["min-numerator-digits"] || 1);
194+
NF += "/";
195+
if(+y["denominator-value"]) NF += y["denominator-value"];
196+
else NF += fill("?", +y["min-denominator-digits"] || 1);
197+
break;
198+
199+
case 'currency-symbol': // <number:currency-symbol> 16.29.9
200+
// TODO: localization with [$-...]
201+
if(Rn[1]==='/') {
202+
NF += '"' + str.slice(tidx, xlmlregex.lastIndex - Rn[0].length).replace(/"/g, '""') + '"';
203+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
204+
tidx = xlmlregex.lastIndex;
205+
} else NF += "$";
206+
break;
207+
208+
case 'text-properties': // <style:text-properties> 16.29.29
209+
y = parsexmltag(Rn[0], false);
210+
switch((y["color"]||"").toLowerCase().replace("#", "")) {
211+
case "ff0000": case "red": NF = "[Red]" + NF; break;
212+
}
213+
break;
214+
215+
case 'text-content': // <number:text-content> 16.29.28
216+
NF += "@";
217+
break;
218+
219+
case 'map': // <style:map> 16.3
220+
// TODO: handle more complex maps
221+
y = parsexmltag(Rn[0], false);
222+
if(unescapexml(y["condition"]) == "value()>=0") NF = number_format_map[y["apply-style-name"]] + ";" + NF;
223+
else console.error("ODS number format may be incorrect: " + y["condition"]);
224+
break;
225+
226+
case 'number': // <number:number> 16.29.3
227+
// TODO: handle all the attributes
228+
if(Rn[1]==='/') break;
229+
y = parsexmltag(Rn[0], false);
230+
tNF = "";
231+
tNF += fill("0", +y["min-integer-digits"] || 1);
232+
if(parsexmlbool(y["grouping"])) tNF = commaify(fill("#", Math.max(0, 4 - tNF.length)) + tNF);
233+
if(+y["min-decimal-places"] || +y["decimal-places"]) tNF += ".";
234+
if(+y["min-decimal-places"]) tNF += fill("0", +y["min-decimal-places"] || 1);
235+
if(+y["decimal-places"] - (+y["min-decimal-places"]||0)) tNF += fill("#", +y["decimal-places"] - (+y["min-decimal-places"]||0));
236+
NF += tNF;
237+
break;
238+
239+
case 'embedded-text': // <number:embedded-text> 16.29.4
240+
// TODO: verify interplay with grouping et al
241+
if(Rn[1]==='/') {
242+
if(etpos == 0) NF += '"' + str.slice(tidx, xlmlregex.lastIndex - Rn[0].length).replace(/"/g, '""') + '"';
243+
else NF = NF.slice(0, etpos) + '"' + str.slice(tidx, xlmlregex.lastIndex - Rn[0].length).replace(/"/g, '""') + '"' + NF.slice(etpos);
244+
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
245+
tidx = xlmlregex.lastIndex;
246+
etpos = -+parsexmltag(Rn[0], false)["position"] || 0;
247+
} break;
248+
249+
}}
250+
return number_format_map;
251+
}
30252

31-
function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
253+
function parse_content_xml(d/*:string*/, _opts, _nfm)/*:Workbook*/ {
32254
var opts = _opts || {};
33255
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
34256
var str = xlml_normalize(d);
35257
var state/*:Array<any>*/ = [], tmp;
36258
var tag/*:: = {}*/;
37-
var NFtag = {name:""}, NF = "", pidx = 0;
259+
var nfidx, NF = "", pidx = 0;
38260
var sheetag/*:: = {name:"", '名称':""}*/;
39261
var rowtag/*:: = {'行号':""}*/;
40262
var Sheets = {}, SheetNames/*:Array<string>*/ = [];
@@ -45,7 +267,7 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
45267
var textR = [];
46268
var R = -1, C = -1, range = {s: {r:1000000,c:10000000}, e: {r:0, c:0}};
47269
var row_ol = 0;
48-
var number_format_map = {};
270+
var number_format_map = _nfm || {}, styles = {};
49271
var merges/*:Array<Range>*/ = [], mrange = {}, mR = 0, mC = 0;
50272
var rowinfo/*:Array<RowInfo>*/ = [], rowpeat = 1, colpeat = 1;
51273
var arrayf/*:Array<[Range, string]>*/ = [];
@@ -56,7 +278,7 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
56278
var creator = "", creatoridx = 0;
57279
var isstub = false, intable = false;
58280
var i = 0;
59-
var baddate = 1;
281+
var baddate = 0;
60282
xlmlregex.lastIndex = 0;
61283
str = str.replace(/<!--([\s\S]*?)-->/mg,"").replace(/<!DOCTYPE[^\[]*\[[^\]]*\]>/gm,"");
62284
while((Rn = xlmlregex.exec(str))) switch((Rn[3]=Rn[3].replace(/_.*$/,""))) {
@@ -114,6 +336,7 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
114336
colpeat = parseInt(ctag['number-columns-repeated']||"1", 10);
115337
q = ({t:'z', v:null/*:: , z:null, w:"",c:[]*/}/*:any*/);
116338
if(ctag.formula && opts.cellFormula != false) q.f = ods_to_csf_formula(unescapexml(ctag.formula));
339+
if(ctag["style-name"] && styles[ctag["style-name"]]) q.z = styles[ctag["style-name"]];
117340
if((ctag['数据类型'] || ctag['value-type']) == "string") {
118341
q.t = "s"; q.v = unescapexml(ctag['string-value'] || "");
119342
if(opts.dense) {
@@ -136,6 +359,7 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
136359
ctag = parsexmltag(Rn[0], false);
137360
comments = []; comment = ({}/*:any*/);
138361
q = ({t:ctag['数据类型'] || ctag['value-type'], v:null/*:: , z:null, w:"",c:[]*/}/*:any*/);
362+
if(ctag["style-name"] && styles[ctag["style-name"]]) q.z = styles[ctag["style-name"]];
139363
if(opts.cellFormula) {
140364
if(ctag.formula) ctag.formula = unescapexml(ctag.formula);
141365
if(ctag['number-matrix-columns-spanned'] && ctag['number-matrix-rows-spanned']) {
@@ -163,16 +387,16 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
163387

164388
/* 19.385 office:value-type */
165389
switch(q.t) {
166-
case 'boolean': q.t = 'b'; q.v = parsexmlbool(ctag['boolean-value']); break;
390+
case 'boolean': q.t = 'b'; q.v = parsexmlbool(ctag['boolean-value']) || (+ctag['boolean-value'] >= 1); break;
167391
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
168392
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
169393
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
170394
case 'date': q.t = 'd'; q.v = parseDate(ctag['date-value']);
171395
if(!opts.cellDates) { q.t = 'n'; q.v = datenum(q.v, WB.WBProps.date1904) - baddate; }
172-
q.z = 'm/d/yy'; break;
396+
if(!q.z) q.z = 'm/d/yy'; break;
173397
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400;
174398
if(opts.cellDates) { q.t = 'd'; q.v = numdate(q.v); }
175-
q.z = 'HH:MM:SS'; break;
399+
if(!q.z) q.z = 'HH:MM:SS'; break;
176400
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
177401
default:
178402
if(q.t === 'string' || q.t === 'text' || !q.t) {
@@ -267,23 +491,24 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
267491
textp = ""; textpidx = 0; textR = [];
268492
break;
269493

270-
case 'scientific-number': // TODO: <number:scientific-number>
271-
break;
272-
case 'currency-symbol': // TODO: <number:currency-symbol>
273-
break;
274-
case 'currency-style': // TODO: <number:currency-style>
494+
case 'scientific-number': // <number:scientific-number>
495+
case 'currency-symbol': // <number:currency-symbol>
496+
case 'fill-character': // 16.29.5 <number:fill-character>
275497
break;
498+
499+
case 'text-style': // 16.27.25 <number:text-style>
500+
case 'boolean-style': // 16.27.23 <number:boolean-style>
276501
case 'number-style': // 16.27.2 <number:number-style>
502+
case 'currency-style': // 16.29.8 <number:currency-style>
277503
case 'percentage-style': // 16.27.9 <number:percentage-style>
278504
case 'date-style': // 16.27.10 <number:date-style>
279505
case 'time-style': // 16.27.18 <number:time-style>
280506
if(Rn[1]==='/'){
281-
number_format_map[NFtag.name] = NF;
282-
if((tmp=state.pop())[0]!==Rn[3]) throw "Bad state: "+tmp;
507+
var xlmlidx = xlmlregex.lastIndex;
508+
parse_ods_styles(str.slice(nfidx, xlmlregex.lastIndex), _opts, number_format_map);
509+
xlmlregex.lastIndex = xlmlidx;
283510
} else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
284-
NF = "";
285-
NFtag = parsexmltag(Rn[0], false);
286-
state.push([Rn[3], true]);
511+
nfidx = xlmlregex.lastIndex - Rn[0].length;
287512
} break;
288513

289514
case 'script': break; // 3.13 <office:script>
@@ -292,8 +517,10 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
292517

293518
case 'default-style': // TODO: <style:default-style>
294519
case 'page-layout': break; // TODO: <style:page-layout>
295-
case 'style': // 16.2 <style:style>
296-
break;
520+
case 'style': { // 16.2 <style:style>
521+
var styletag = parsexmltag(Rn[0], false);
522+
if(styletag["family"] == "table-cell" && number_format_map[styletag["data-style-name"]]) styles[styletag["name"]] = number_format_map[styletag["data-style-name"]];
523+
} break;
297524
case 'map': break; // 16.3 <style:map>
298525
case 'font-face': break; // 16.21 <style:font-face>
299526

@@ -331,9 +558,7 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
331558
NF += number_formats_ods[Rn[3]][tag.style==='long'?1:0]; break;
332559
} break;
333560

334-
case 'boolean-style': break; // 16.27.23 <number:boolean-style>
335561
case 'boolean': break; // 16.27.24 <number:boolean>
336-
case 'text-style': break; // 16.27.25 <number:text-style>
337562
case 'text': // 16.27.26 <number:text>
338563
if(Rn[0].slice(-2) === "/>") break;
339564
else if(Rn[1]==="/") switch(state[state.length-1][0]) {
@@ -564,9 +789,11 @@ function parse_content_xml(d/*:string*/, _opts)/*:Workbook*/ {
564789
function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
565790
opts = opts || ({}/*:any*/);
566791
if(safegetzipfile(zip, 'META-INF/manifest.xml')) parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
792+
var styles = getzipstr(zip, 'styles.xml');
793+
var Styles = styles && parse_ods_styles(utf8read(styles), opts);
567794
var content = getzipstr(zip, 'content.xml');
568795
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
569-
var wb = parse_content_xml(utf8read(content), opts);
796+
var wb = parse_content_xml(utf8read(content), opts, Styles);
570797
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
571798
return wb;
572799
}

0 commit comments

Comments
 (0)