Skip to content

Commit ee2b8cd

Browse files
author
Olivier Chédru
authored
Merge pull request #162 from mbrocchieri/parsing-error
Add option to not throw exception when parsing error and support defa…
2 parents ba61c91 + e7f1e67 commit ee2b8cd

File tree

10 files changed

+133
-9
lines changed

10 files changed

+133
-9
lines changed

fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/OPCPackage.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,16 @@ private static String relsNameFor(String entryName) {
6161
}
6262

6363
private Map<String, String> readWorkbookPartsIds(String workbookRelsEntryName) throws IOException, XMLStreamException {
64+
String xlFolder = workbookRelsEntryName.substring(0, workbookRelsEntryName.indexOf("_rel"));
6465
Map<String, String> partsIdById = new HashMap<>();
6566
SimpleXmlReader rels = new SimpleXmlReader(factory, getRequiredEntryContent(workbookRelsEntryName));
6667
while (rels.goTo("Relationship")) {
6768
String id = rels.getAttribute("Id");
6869
String target = rels.getAttribute("Target");
70+
// if name does not start with /, it is a relative path
71+
if (!target.startsWith("/")) {
72+
target = xlFolder + target;
73+
} // else it is an absolute path
6974
partsIdById.put(id, target);
7075
}
7176
return partsIdById;
@@ -89,6 +94,11 @@ private PartEntryNames extractPartEntriesFromContentTypes() throws XMLStreamExce
8994
break;
9095
}
9196
}
97+
if (entries.workbook == null) {
98+
// in case of a default workbook path, we got this
99+
// <Default Extension="xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" />
100+
entries.workbook = "/xl/workbook.xml";
101+
}
92102
}
93103
return entries;
94104
}
@@ -173,7 +183,7 @@ public InputStream getSheetContent(Sheet sheet) throws IOException {
173183
sheet.getIndex(), sheet.getName(), sheet.getId());
174184
throw new ExcelReaderException(msg);
175185
}
176-
return getRequiredEntryContent("xl/" + name);
186+
return getRequiredEntryContent(name);
177187
}
178188

179189
public List<String> getFormatList() {

fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/ReadableWorkbook.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,33 @@ public class ReadableWorkbook implements Closeable {
3030

3131
private final OPCPackage pkg;
3232
private final SST sst;
33+
private final ReadingOptions readingOptions;
3334

3435
private boolean date1904;
3536
private final List<Sheet> sheets = new ArrayList<>();
3637
private Integer activeTab;
3738

3839
public ReadableWorkbook(File inputFile) throws IOException {
39-
this(OPCPackage.open(inputFile));
40+
this(OPCPackage.open(inputFile), ReadingOptions.DEFAULT_READING_OPTIONS);
4041
}
4142

4243
/**
4344
* Note: will load the whole xlsx file into memory,
4445
* (but will not uncompress it in memory)
4546
*/
4647
public ReadableWorkbook(InputStream inputStream) throws IOException {
47-
this(inputStream, false);
48+
this(inputStream, ReadingOptions.DEFAULT_READING_OPTIONS);
4849
}
4950

5051
/**
5152
* Note: will load the whole xlsx file into memory,
5253
* (but will not uncompress it in memory)
5354
*/
54-
public ReadableWorkbook(InputStream inputStream, boolean withStyle) throws IOException {
55-
this(OPCPackage.open(inputStream, withStyle));
55+
public ReadableWorkbook(InputStream inputStream, ReadingOptions readingOptions) throws IOException {
56+
this(OPCPackage.open(inputStream, readingOptions.isWithCellFormat()), readingOptions);
5657
}
5758

58-
private ReadableWorkbook(OPCPackage pkg) throws IOException {
59+
private ReadableWorkbook(OPCPackage pkg, ReadingOptions readingOptions) throws IOException {
5960

6061
try {
6162
this.pkg = pkg;
@@ -68,6 +69,7 @@ private ReadableWorkbook(OPCPackage pkg) throws IOException {
6869
} catch (XMLStreamException e) {
6970
throw new ExcelReaderException(e);
7071
}
72+
this.readingOptions = readingOptions;
7173
}
7274

7375
@Override
@@ -166,6 +168,10 @@ public static boolean isOLE2Header(byte[] bytes) {
166168
return HeaderSignatures.isHeader(bytes, HeaderSignatures.OLE_2_SIGNATURE);
167169
}
168170

171+
ReadingOptions getReadingOptions() {
172+
return readingOptions;
173+
}
174+
169175
private static Runnable asUncheckedRunnable(Closeable c) {
170176
return () -> {
171177
try {
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package org.dhatim.fastexcel.reader;
2+
3+
public class ReadingOptions {
4+
public static final ReadingOptions DEFAULT_READING_OPTIONS = new ReadingOptions(false, false);
5+
private final boolean withCellFormat;
6+
private final boolean cellInErrorIfParseError;
7+
8+
/**
9+
* @param withCellFormat If true, extract cell formatting
10+
* @param cellInErrorIfParseError If true, cell type is ERROR if it is not possible to parse cell value.
11+
* If false, an exception is throw when there is a parsing error
12+
*/
13+
public ReadingOptions(boolean withCellFormat, boolean cellInErrorIfParseError) {
14+
this.withCellFormat = withCellFormat;
15+
this.cellInErrorIfParseError = cellInErrorIfParseError;
16+
}
17+
18+
/**
19+
* @return true for extract cell formatting
20+
*/
21+
public boolean isWithCellFormat() {
22+
return withCellFormat;
23+
}
24+
25+
/**
26+
* @return true for cell type is ERROR if it is not possible to parse cell value,
27+
* false for an exception is throw when there is a parsing error
28+
*/
29+
public boolean isCellInErrorIfParseError() {
30+
return cellInErrorIfParseError;
31+
}
32+
}

fastexcel-reader/src/main/java/org/dhatim/fastexcel/reader/RowSpliterator.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,15 @@ private Cell parseOther(CellAddress addr, String type, String dataFormatId, Stri
136136
while (r.goTo(() -> r.isStartElement("v") || r.isEndElement("c") || r.isStartElement("f"))) {
137137
if ("v".equals(r.getLocalName())) {
138138
rawValue = r.getValueUntilEndElement("v");
139-
value = "".equals(rawValue) ? null : parser.apply(rawValue);
139+
try {
140+
value = "".equals(rawValue) ? null : parser.apply(rawValue);
141+
} catch (ExcelReaderException e) {
142+
if (workbook.getReadingOptions().isCellInErrorIfParseError()) {
143+
definedType = CellType.ERROR;
144+
} else {
145+
throw e;
146+
}
147+
}
140148
} else if ("f".equals(r.getLocalName())) {
141149
String ref = r.getAttribute("ref");
142150
String t = r.getAttribute("t");

fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/InvalidOPCPackageTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void expectErrors() throws IOException {
1414
expectError("/invalid/only-content-types.xlsx", "/xl/_rels/custom1-workbook.xml.rels not found");
1515
expectError("/invalid/no-workbook-rels.xlsx", "/xl/_rels/custom1-workbook.xml.rels not found");
1616
expectError("/invalid/no-workbook-xml.xlsx", "/xl/custom1-workbook.xml not found");
17-
expectError("/invalid/no-sheet.xlsx", "xl/worksheets/custom3-sheet1.xml not found");
17+
expectError("/invalid/no-sheet.xlsx", "/xl/worksheets/custom3-sheet1.xml not found");
1818
expectError("/invalid/missing-sheet-entry.xlsx", "Sheet#0 'Feuil1' is missing an entry in workbook rels (for id: 'rId42')");
1919
}
2020

fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/SimpleReaderTest.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,18 @@
1717

1818
import org.junit.jupiter.api.Test;
1919

20+
import java.io.FileInputStream;
2021
import java.io.IOException;
2122
import java.io.InputStream;
2223
import java.math.BigDecimal;
2324
import java.time.LocalDateTime;
25+
import java.util.Iterator;
2426
import java.util.stream.Stream;
2527

2628
import static org.assertj.core.api.Assertions.assertThat;
29+
import static org.junit.jupiter.api.Assertions.assertEquals;
30+
import static org.junit.jupiter.api.Assertions.assertTrue;
31+
import static org.junit.jupiter.api.Assertions.fail;
2732

2833
class SimpleReaderTest {
2934

@@ -65,5 +70,68 @@ void test() throws IOException {
6570
}
6671
}
6772

73+
@Test
74+
void testWithParseErrorOnNumber() throws IOException {
75+
try (InputStream is = Resources.open("/xlsx/parseError.xlsx");
76+
ReadableWorkbook wb = new ReadableWorkbook(is, ReadingOptions.DEFAULT_READING_OPTIONS)) {
77+
Sheet sheet = wb.getFirstSheet();
78+
try (Stream<Row> rows = sheet.openStream()) {
79+
Iterator<Row> it = rows.iterator();
80+
try {
81+
it.hasNext();
82+
fail("Must throw an exception");
83+
} catch (ExcelReaderException e) {
84+
// OK
85+
}
86+
}
87+
}
88+
6889

90+
try (InputStream is = Resources.open("/xlsx/parseError.xlsx");
91+
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
92+
Sheet sheet = wb.getFirstSheet();
93+
try (Stream<Row> rows = sheet.openStream()) {
94+
Iterator<Row> it = rows.iterator();
95+
assertTrue(it.hasNext());
96+
Iterator<Cell> cellIt = it.next().iterator();
97+
assertTrue(cellIt.hasNext());
98+
Cell cell = cellIt.next();
99+
assertEquals(CellType.ERROR, cell.getType());
100+
}
101+
}
102+
}
103+
104+
@Test
105+
public void testDefaultWorkbookPath() throws IOException {
106+
try (InputStream is = Resources.open("/xlsx/DefaultContentType.xlsx");
107+
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
108+
Sheet sheet = wb.getFirstSheet();
109+
try (Stream<Row> rows = sheet.openStream()) {
110+
Iterator<Row> it = rows.iterator();
111+
assertTrue(it.hasNext());
112+
Iterator<Cell> cellIt = it.next().iterator();
113+
assertTrue(cellIt.hasNext());
114+
Cell cell = cellIt.next();
115+
assertEquals(CellType.NUMBER, cell.getType());
116+
assertEquals(BigDecimal.ONE, cell.getValue());
117+
}
118+
}
119+
}
120+
121+
@Test
122+
public void testDefaultWorkbookPath2() throws IOException {
123+
try (InputStream is = Resources.open("/xlsx/absolutePath.xlsx");
124+
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
125+
Sheet sheet = wb.getFirstSheet();
126+
try (Stream<Row> rows = sheet.openStream()) {
127+
Iterator<Row> it = rows.iterator();
128+
assertTrue(it.hasNext());
129+
Iterator<Cell> cellIt = it.next().iterator();
130+
assertTrue(cellIt.hasNext());
131+
Cell cell = cellIt.next();
132+
assertEquals(CellType.NUMBER, cell.getType());
133+
assertEquals(BigDecimal.ONE, cell.getValue());
134+
}
135+
}
136+
}
69137
}

fastexcel-reader/src/test/java/org/dhatim/fastexcel/reader/WithFormatTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public class WithFormatTest {
1616
@Test
1717
void testFile() throws IOException {
1818
try (InputStream inputStream = open("/xlsx/withStyle.xlsx");
19-
ReadableWorkbook excel = new ReadableWorkbook(inputStream, true)) {
19+
ReadableWorkbook excel = new ReadableWorkbook(inputStream, new ReadingOptions(true, false))) {
2020
Optional<Sheet> sheet = excel.getActiveSheet();
2121
assertTrue(sheet.isPresent());
2222
Iterator<Row> it = sheet.get().openStream().iterator();
5.25 KB
Binary file not shown.
5.25 KB
Binary file not shown.
5.25 KB
Binary file not shown.

0 commit comments

Comments
 (0)