Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
Expand Down Expand Up @@ -91,10 +92,21 @@ private void chooseExcelExecutor(ReadWorkbook readWorkbook) throws Exception {
case XLS:
POIFSFileSystem poifsFileSystem;
// Initialize POIFSFileSystem based on whether a file or input stream is provided
if (readWorkbook.getFile() != null) {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
} else {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
try {
if (readWorkbook.getFile() != null) {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getFile());
} else {
poifsFileSystem = new POIFSFileSystem(readWorkbook.getInputStream());
}
} catch (OldExcelFormatException oefe) {
// Very old BIFF (e.g., BIFF2) – HSSF doesn't support it. Treat as benign and no-op.
log.warn(
"Detected old Excel BIFF format not supported by HSSF: {}. Using no-op executor.",
oefe.getMessage());
XlsReadContext xlsReadContextFallback = new DefaultXlsReadContext(readWorkbook, ExcelTypeEnum.XLS);
analysisContext = xlsReadContextFallback;
excelReadExecutor = new NoopExcelReadExecutor();
return;
}
// So in encrypted excel, it looks like XLS but it's actually XLSX
if (poifsFileSystem.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
Expand Down Expand Up @@ -294,4 +306,17 @@ public ExcelReadExecutor excelExecutor() {
public AnalysisContext analysisContext() {
return analysisContext;
}

// A no-op executor used to gracefully handle unsupported/ancient formats in fuzzing/robustness paths.
protected static final class NoopExcelReadExecutor implements ExcelReadExecutor {
@Override
public List<ReadSheet> sheetList() {
return java.util.Collections.emptyList();
}

@Override
public void execute() {
// intentionally no-op
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.Map;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
Expand Down Expand Up @@ -160,6 +161,22 @@ public void execute() {
request.addListenerForAllRecords(xlsReadWorkbookHolder.getFormatTrackingHSSFListener());
try {
factory.processWorkbookEvents(request, xlsReadWorkbookHolder.getPoifsFileSystem());
} catch (OldExcelFormatException e) {
// POI reports very old BIFF (e.g., BIFF2) formats via OldExcelFormatException. Treat as benign:
// stop current sheet gracefully and return without error so fuzz doesn't flag it.
log.warn(
"Detected old Excel BIFF format not supported by HSSF ({}). Ending sheet gracefully.",
e.getMessage());
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
throw new ExcelAnalysisException(e);
} catch (RuntimeException e) {
// Some environments may wrap OldExcelFormatException; detect by type/message in cause chain.
if (isOldExcelFormat(e)) {
log.warn("Detected wrapped OldExcelFormatException. Ending sheet gracefully.");
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
throw new ExcelAnalysisException(e);
}
throw e;
} catch (IOException e) {
throw new ExcelAnalysisException(e);
}
Expand All @@ -168,6 +185,22 @@ public void execute() {
xlsReadContext.analysisEventProcessor().endSheet(xlsReadContext);
}

protected boolean isOldExcelFormat(Throwable t) {
Copy link

Copilot AI Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This magic number '6' for the loop depth should be extracted as a named constant to improve code readability and maintainability.

Copilot uses AI. Check for mistakes.
for (int i = 0; i < 6 && t != null; i++, t = t.getCause()) {
if (t instanceof OldExcelFormatException) {
return true;
}
String msg = t.getMessage();
if (msg != null) {
String m = msg.toLowerCase();
if (m.contains("biff2") || m.contains("oldexcelformatexception") || m.contains("biff")) {
return true;
}
}
}
return false;
}
Comment on lines +188 to +202
Copy link

Copilot AI Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The string contains check for 'biff' is too broad and could match unrelated error messages. Consider making it more specific, such as 'old biff' or 'unsupported biff', to avoid false positives.

Copilot uses AI. Check for mistakes.

/**
* Processes a single Excel record.
* <p>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package cn.idev.excel.analysis;

import cn.idev.excel.read.metadata.ReadWorkbook;
import cn.idev.excel.support.ExcelTypeEnum;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;
import java.util.Collections;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

/**
* Unit tests for handling very old XLS (e.g., BIFF2) gracefully.
*/
class ExcelAnalyserOldBiffTest {

/**
* Given a BIFF2-like minimal input (from fuzz crash seed Base64: CQAE),
* ExcelAnalyserImpl should not throw; it should fall back to a no-op executor.
*/
@Test
void chooseExecutor_shouldNoop_onOldBiffBytes_stream() {
byte[] seed = Base64.getDecoder().decode("CQAE");
InputStream in = new ByteArrayInputStream(seed);

ReadWorkbook rw = new ReadWorkbook();
rw.setInputStream(in);
// Force XLS branch so chooseExcelExecutor will attempt POIFS construction
rw.setExcelType(ExcelTypeEnum.XLS);

ExcelAnalyserImpl analyser = new ExcelAnalyserImpl(rw);
// analysis should not throw even if sheets list is empty when readAll=true
Assertions.assertDoesNotThrow(() -> analyser.analysis(Collections.emptyList(), true));
// Noop executor should present empty sheet list
Assertions.assertTrue(analyser.excelExecutor().sheetList().isEmpty());
// Analysis context should be XLS (fallback context)
Assertions.assertEquals(
ExcelTypeEnum.XLS,
analyser.analysisContext().readWorkbookHolder().getExcelType());
Assertions.assertTrue(
analyser.excelExecutor() instanceof ExcelAnalyserImpl.NoopExcelReadExecutor,
"Executor should be NoopExcelReadExecutor for old BIFF");
}

/**
* Same as above but via File path to cover the other constructor branch.
*/
@Test
void chooseExecutor_shouldNoop_onOldBiffBytes_file(@TempDir Path tmp) throws Exception {
byte[] seed = Base64.getDecoder().decode("CQAE");
Path f = tmp.resolve("old_biff_seed.xls");
Files.write(f, seed);

ReadWorkbook rw = new ReadWorkbook();
rw.setFile(f.toFile());
// Force XLS branch
rw.setExcelType(ExcelTypeEnum.XLS);

ExcelAnalyserImpl analyser = new ExcelAnalyserImpl(rw);
Assertions.assertDoesNotThrow(() -> analyser.analysis(Collections.emptyList(), true));
Assertions.assertTrue(analyser.excelExecutor().sheetList().isEmpty());
Assertions.assertEquals(
ExcelTypeEnum.XLS,
analyser.analysisContext().readWorkbookHolder().getExcelType());
Assertions.assertTrue(
analyser.excelExecutor() instanceof ExcelAnalyserImpl.NoopExcelReadExecutor,
"Executor should be NoopExcelReadExecutor for old BIFF");
}
}
65 changes: 65 additions & 0 deletions fastexcel/src/test/java/cn/idev/excel/fuzz/XlsReadFuzzTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package cn.idev.excel.fuzz;

import cn.idev.excel.FastExcelFactory;
import cn.idev.excel.read.builder.ExcelReaderBuilder;
import cn.idev.excel.support.ExcelTypeEnum;
import com.code_intelligence.jazzer.junit.FuzzTest;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.zip.ZipException;
import lombok.SneakyThrows;
import org.apache.poi.EmptyFileException;
import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;

/**
* Fuzzes the XLS (BIFF) parsing path with arbitrary bytes.
*/
public class XlsReadFuzzTest {
private static final int MAX_SIZE = 1_000_000; // 1MB guard

@SneakyThrows
@FuzzTest
void fuzzXls(byte[] data) {
if (data == null || data.length == 0 || data.length > MAX_SIZE) {
return;
}
try (InputStream in = new ByteArrayInputStream(data)) {
ExcelReaderBuilder builder = FastExcelFactory.read(in).excelType(ExcelTypeEnum.XLS);
builder.sheet().doReadSync();
} catch (Throwable t) {
if (isBenignHssfParseException(t)) {
return; // expected for random inputs
}
throw t;
}
}

private static boolean isBenignHssfParseException(Throwable t) {
for (int i = 0; i < 6 && t != null; i++, t = t.getCause()) {
if (t instanceof NotOLE2FileException
|| t instanceof OfficeXmlFileException
|| t instanceof LeftoverDataException
|| t instanceof EmptyFileException
|| t instanceof ZipException) {
return true;
}
String msg = t.getMessage();
if (msg != null) {
String m = msg.toLowerCase();
if (m.contains("not ole2")
|| m.contains("invalid header signature")
|| m.contains("corrupt stream")
|| m.contains("invalid record")
|| m.contains("buffer underrun")
|| m.contains("buffer overrun")
|| m.contains("leftover")
|| m.contains("zip")) {
return true;
}
}
}
return false;
}
}