Skip to content

Commit 697f554

Browse files
alaahongpsxjoy
andauthored
feat: handle very old Excel BIFF formats gracefully with no-op executor (#559)
Co-authored-by: Shuxin Pan <[email protected]>
1 parent 2f86445 commit 697f554

File tree

4 files changed

+199
-4
lines changed

4 files changed

+199
-4
lines changed

fastexcel/src/main/java/cn/idev/excel/analysis/ExcelAnalyserImpl.java

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.List;
2929
import lombok.extern.slf4j.Slf4j;
3030
import org.apache.commons.collections4.CollectionUtils;
31+
import org.apache.poi.hssf.OldExcelFormatException;
3132
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
3233
import org.apache.poi.poifs.crypt.Decryptor;
3334
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
@@ -91,10 +92,21 @@ private void chooseExcelExecutor(ReadWorkbook readWorkbook) throws Exception {
9192
case XLS:
9293
POIFSFileSystem poifsFileSystem;
9394
// Initialize POIFSFileSystem based on whether a file or input stream is provided
94-
if (readWorkbook.getFile() != null) {
95-
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
96-
} else {
97-
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
95+
try {
96+
if (readWorkbook.getFile() != null) {
97+
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
98+
} else {
99+
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
100+
}
101+
} catch (OldExcelFormatException oefe) {
102+
// Very old BIFF (e.g., BIFF2) – HSSF doesn't support it. Treat as benign and no-op.
103+
log.warn(
104+
"Detected old Excel BIFF format not supported by HSSF: {}. Using no-op executor.",
105+
oefe.getMessage());
106+
XlsReadContext xlsReadContextFallback = new DefaultXlsReadContext(readWorkbook, ExcelTypeEnum.XLS);
107+
analysisContext = xlsReadContextFallback;
108+
excelReadExecutor = new NoopExcelReadExecutor();
109+
return;
98110
}
99111
// So in encrypted excel, it looks like XLS but it's actually XLSX
100112
if (poifsFileSystem.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
@@ -294,4 +306,17 @@ public ExcelReadExecutor excelExecutor() {
294306
public AnalysisContext analysisContext() {
295307
return analysisContext;
296308
}
309+
310+
// A no-op executor used to gracefully handle unsupported/ancient formats in fuzzing/robustness paths.
311+
protected static final class NoopExcelReadExecutor implements ExcelReadExecutor {
312+
@Override
313+
public List<ReadSheet> sheetList() {
314+
return java.util.Collections.emptyList();
315+
}
316+
317+
@Override
318+
public void execute() {
319+
// intentionally no-op
320+
}
321+
}
297322
}

fastexcel/src/main/java/cn/idev/excel/analysis/v03/XlsSaxAnalyser.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.util.Map;
3434
import java.util.stream.Collectors;
3535
import lombok.extern.slf4j.Slf4j;
36+
import org.apache.poi.hssf.OldExcelFormatException;
3637
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
3738
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
3839
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
@@ -160,6 +161,22 @@ public void execute() {
160161
request.addListenerForAllRecords(xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
161162
try {
162163
factory.processWorkbookEvents(request, xlsReadWorkbookHolder.getPoifsFileSystem());
164+
} catch (OldExcelFormatException e) {
165+
// POI reports very old BIFF (e.g., BIFF2) formats via OldExcelFormatException. Treat as benign:
166+
// stop current sheet gracefully and return without error so fuzz doesn't flag it.
167+
log.warn(
168+
"Detected old Excel BIFF format not supported by HSSF ({}). Ending sheet gracefully.",
169+
e.getMessage());
170+
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
171+
throw new ExcelAnalysisException(e);
172+
} catch (RuntimeException e) {
173+
// Some environments may wrap OldExcelFormatException; detect by type/message in cause chain.
174+
if (isOldExcelFormat(e)) {
175+
log.warn("Detected wrapped OldExcelFormatException. Ending sheet gracefully.");
176+
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
177+
throw new ExcelAnalysisException(e);
178+
}
179+
throw e;
163180
} catch (IOException e) {
164181
throw new ExcelAnalysisException(e);
165182
}
@@ -168,6 +185,22 @@ public void execute() {
168185
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
169186
}
170187

188+
protected boolean isOldExcelFormat(Throwable t) {
189+
for (int i = 0; i < 6 && t != null; i++, t = t.getCause()) {
190+
if (t instanceof OldExcelFormatException) {
191+
return true;
192+
}
193+
String msg = t.getMessage();
194+
if (msg != null) {
195+
String m = msg.toLowerCase();
196+
if (m.contains("biff2") || m.contains("oldexcelformatexception") || m.contains("biff")) {
197+
return true;
198+
}
199+
}
200+
}
201+
return false;
202+
}
203+
171204
/**
172205
* Processes a single Excel record.
173206
* <p>
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package cn.idev.excel.analysis;
2+
3+
import cn.idev.excel.read.metadata.ReadWorkbook;
4+
import cn.idev.excel.support.ExcelTypeEnum;
5+
import java.io.ByteArrayInputStream;
6+
import java.io.InputStream;
7+
import java.nio.file.Files;
8+
import java.nio.file.Path;
9+
import java.util.Base64;
10+
import java.util.Collections;
11+
import org.junit.jupiter.api.Assertions;
12+
import org.junit.jupiter.api.Test;
13+
import org.junit.jupiter.api.io.TempDir;
14+
15+
/**
16+
* Unit tests for handling very old XLS (e.g., BIFF2) gracefully.
17+
*/
18+
class ExcelAnalyserOldBiffTest {
19+
20+
/**
21+
* Given a BIFF2-like minimal input (from fuzz crash seed Base64: CQAE),
22+
* ExcelAnalyserImpl should not throw; it should fall back to a no-op executor.
23+
*/
24+
@Test
25+
void chooseExecutor_shouldNoop_onOldBiffBytes_stream() {
26+
byte[] seed = Base64.getDecoder().decode("CQAE");
27+
InputStream in = new ByteArrayInputStream(seed);
28+
29+
ReadWorkbook rw = new ReadWorkbook();
30+
rw.setInputStream(in);
31+
// Force XLS branch so chooseExcelExecutor will attempt POIFS construction
32+
rw.setExcelType(ExcelTypeEnum.XLS);
33+
34+
ExcelAnalyserImpl analyser = new ExcelAnalyserImpl(rw);
35+
// analysis should not throw even if sheets list is empty when readAll=true
36+
Assertions.assertDoesNotThrow(() -> analyser.analysis(Collections.emptyList(), true));
37+
// Noop executor should present empty sheet list
38+
Assertions.assertTrue(analyser.excelExecutor().sheetList().isEmpty());
39+
// Analysis context should be XLS (fallback context)
40+
Assertions.assertEquals(
41+
ExcelTypeEnum.XLS,
42+
analyser.analysisContext().readWorkbookHolder().getExcelType());
43+
Assertions.assertTrue(
44+
analyser.excelExecutor() instanceof ExcelAnalyserImpl.NoopExcelReadExecutor,
45+
"Executor should be NoopExcelReadExecutor for old BIFF");
46+
}
47+
48+
/**
49+
* Same as above but via File path to cover the other constructor branch.
50+
*/
51+
@Test
52+
void chooseExecutor_shouldNoop_onOldBiffBytes_file(@TempDir Path tmp) throws Exception {
53+
byte[] seed = Base64.getDecoder().decode("CQAE");
54+
Path f = tmp.resolve("old_biff_seed.xls");
55+
Files.write(f, seed);
56+
57+
ReadWorkbook rw = new ReadWorkbook();
58+
rw.setFile(f.toFile());
59+
// Force XLS branch
60+
rw.setExcelType(ExcelTypeEnum.XLS);
61+
62+
ExcelAnalyserImpl analyser = new ExcelAnalyserImpl(rw);
63+
Assertions.assertDoesNotThrow(() -> analyser.analysis(Collections.emptyList(), true));
64+
Assertions.assertTrue(analyser.excelExecutor().sheetList().isEmpty());
65+
Assertions.assertEquals(
66+
ExcelTypeEnum.XLS,
67+
analyser.analysisContext().readWorkbookHolder().getExcelType());
68+
Assertions.assertTrue(
69+
analyser.excelExecutor() instanceof ExcelAnalyserImpl.NoopExcelReadExecutor,
70+
"Executor should be NoopExcelReadExecutor for old BIFF");
71+
}
72+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package cn.idev.excel.fuzz;
2+
3+
import cn.idev.excel.FastExcelFactory;
4+
import cn.idev.excel.read.builder.ExcelReaderBuilder;
5+
import cn.idev.excel.support.ExcelTypeEnum;
6+
import com.code_intelligence.jazzer.junit.FuzzTest;
7+
import java.io.ByteArrayInputStream;
8+
import java.io.InputStream;
9+
import java.util.zip.ZipException;
10+
import lombok.SneakyThrows;
11+
import org.apache.poi.EmptyFileException;
12+
import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
13+
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
14+
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
15+
16+
/**
17+
* Fuzzes the XLS (BIFF) parsing path with arbitrary bytes.
18+
*/
19+
public class XlsReadFuzzTest {
20+
private static final int MAX_SIZE = 1_000_000; // 1MB guard
21+
22+
@SneakyThrows
23+
@FuzzTest
24+
void fuzzXls(byte[] data) {
25+
if (data == null || data.length == 0 || data.length > MAX_SIZE) {
26+
return;
27+
}
28+
try (InputStream in = new ByteArrayInputStream(data)) {
29+
ExcelReaderBuilder builder = FastExcelFactory.read(in).excelType(ExcelTypeEnum.XLS);
30+
builder.sheet().doReadSync();
31+
} catch (Throwable t) {
32+
if (isBenignHssfParseException(t)) {
33+
return; // expected for random inputs
34+
}
35+
throw t;
36+
}
37+
}
38+
39+
private static boolean isBenignHssfParseException(Throwable t) {
40+
for (int i = 0; i < 6 && t != null; i++, t = t.getCause()) {
41+
if (t instanceof NotOLE2FileException
42+
|| t instanceof OfficeXmlFileException
43+
|| t instanceof LeftoverDataException
44+
|| t instanceof EmptyFileException
45+
|| t instanceof ZipException) {
46+
return true;
47+
}
48+
String msg = t.getMessage();
49+
if (msg != null) {
50+
String m = msg.toLowerCase();
51+
if (m.contains("not ole2")
52+
|| m.contains("invalid header signature")
53+
|| m.contains("corrupt stream")
54+
|| m.contains("invalid record")
55+
|| m.contains("buffer underrun")
56+
|| m.contains("buffer overrun")
57+
|| m.contains("leftover")
58+
|| m.contains("zip")) {
59+
return true;
60+
}
61+
}
62+
}
63+
return false;
64+
}
65+
}

0 commit comments

Comments
 (0)