Skip to content

Commit 2d84428

Browse files
deleialaahongpsxjoy
authored
feat: add a parameter named autoStrip (#567)
* feat: add strip and isBlankChar methods for whitespace handling * feat: add auto strip feature for sheet name and content * feat: add parameter utility class for autoTrim and autoStrip * fix: improve StringUtils.strip() and isBlankChar() methods * refactor: optimize sheet name comparison logic * feat: add autoStrip parameter and improve related functionalities * test: add unit tests for StringUtils * test: add unit tests for auto strip parameters * refactor: reorganize imports in AutoStripParameterTest * test: update string utils test for zero width joiner character * refactor: reorganize the comment * refactor: update method comments for isBlankChar --------- Co-authored-by: ian zhang <[email protected]> Co-authored-by: Shuxin Pan <[email protected]>
1 parent 651d2a4 commit 2d84428

File tree

17 files changed

+476
-48
lines changed

17 files changed

+476
-48
lines changed

fastexcel/src/main/java/cn/idev/excel/analysis/csv/CsvExcelReadExecutor.java

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -146,18 +146,21 @@ private CSVParser csvParser() throws IOException {
146146
// As a fallback, build the CSV parser using the input stream.
147147
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
148148
}
149+
149150
/**
150151
* Builds and returns a CSVParser instance based on the provided CSVFormat, InputStream, and ByteOrderMarkEnum.
151152
*
152-
* @param csvFormat The format configuration for parsing the CSV file.
153-
* @param inputStream The input stream from which the CSV data will be read.
154-
* @param byteOrderMark The enumeration representing the Byte Order Mark (BOM) of the file's character set.
155-
* @return A CSVParser instance configured to parse the CSV data.
156-
* @throws IOException If an I/O error occurs while creating the parser or reading from the input stream.
157-
*
153+
* <p>
158154
* This method checks if the byteOrderMark is null. If it is null, it creates a CSVParser using the provided
159155
* input stream and charset. Otherwise, it wraps the input stream with a BOMInputStream to handle files with a
160156
* Byte Order Mark, ensuring proper decoding of the file content.
157+
* </p>
158+
*
159+
* @param csvFormat The format configuration for parsing the CSV file.
160+
* @param inputStream The input stream from which the CSV data will be read.
161+
* @param byteOrderMark The enumeration representing the Byte Order Mark (BOM) of the file's character set.
162+
* @return A CSVParser instance configured to parse the CSV data.
163+
* @throws IOException If an I/O error occurs while creating the parser or reading from the input stream.
161164
*/
162165
private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, ByteOrderMarkEnum byteOrderMark)
163166
throws IOException {
@@ -173,26 +176,28 @@ private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, B
173176
/**
174177
* Processes a single CSV record and maps its content to a structured format for further analysis.
175178
*
176-
* @param record The CSV record to be processed.
179+
* @param record The CSV record to be processed.
177180
* @param rowIndex The index of the current row being processed.
178-
* This method performs the following steps:
179-
* 1. Initializes a `LinkedHashMap` to store cell data, ensuring the order of columns is preserved.
180-
* 2. Iterates through each cell in the CSV record using an iterator.
181-
* 3. For each cell, creates a `ReadCellData` object and sets its metadata (row index, column index, type, and value).
182-
* - If the cell is not blank, it is treated as a string and optionally trimmed based on the `autoTrim` configuration.
183-
* - If the cell is blank, it is marked as empty.
184-
* 4. Adds the processed cell data to the `cellMap`.
185-
* 5. Determines the row type: if the `cellMap` is empty, the row is marked as `EMPTY`; otherwise, it is marked as `DATA`.
186-
* 6. Creates a `ReadRowHolder` object with the row's metadata and cell map, and stores it in the context.
187-
* 7. Updates the context's sheet holder with the cell map and row index.
188-
* 8. Notifies the analysis event processor that the row processing has ended.
181+
* This method performs the following steps:
182+
* 1. Initializes a `LinkedHashMap` to store cell data, ensuring the order of columns is preserved.
183+
* 2. Iterates through each cell in the CSV record using an iterator.
184+
* 3. For each cell, creates a `ReadCellData` object and sets its metadata (row index, column index, type, and value).
185+
* - If the cell is not blank, it is treated as a string and optionally trimmed based on the `autoTrim` configuration.
186+
* - If the cell is blank, it is marked as empty.
187+
* 4. Adds the processed cell data to the `cellMap`.
188+
* 5. Determines the row type: if the `cellMap` is empty, the row is marked as `EMPTY`; otherwise, it is marked as `DATA`.
189+
* 6. Creates a `ReadRowHolder` object with the row's metadata and cell map, and stores it in the context.
190+
* 7. Updates the context's sheet holder with the cell map and row index.
191+
* 8. Notifies the analysis event processor that the row processing has ended.
189192
*/
190193
private void dealRecord(CSVRecord record, int rowIndex) {
191194
Map<Integer, Cell> cellMap = new LinkedHashMap<>();
192195
Iterator<String> cellIterator = record.iterator();
193196
int columnIndex = 0;
194197
Boolean autoTrim =
195-
csvReadContext.currentReadHolder().globalConfiguration().getAutoTrim();
198+
csvReadContext.csvReadWorkbookHolder().globalConfiguration().getAutoTrim();
199+
Boolean autoStrip =
200+
csvReadContext.csvReadWorkbookHolder().globalConfiguration().getAutoStrip();
196201
while (cellIterator.hasNext()) {
197202
String cellString = cellIterator.next();
198203
ReadCellData<String> readCellData = new ReadCellData<>();
@@ -202,7 +207,13 @@ private void dealRecord(CSVRecord record, int rowIndex) {
202207
// csv is an empty string of whether <code>,,</code> is read or <code>,"",</code>
203208
if (StringUtils.isNotBlank(cellString)) {
204209
readCellData.setType(CellDataTypeEnum.STRING);
205-
readCellData.setStringValue(autoTrim ? cellString.trim() : cellString);
210+
if (autoStrip) {
211+
readCellData.setStringValue(StringUtils.strip(cellString));
212+
} else if (autoTrim) {
213+
readCellData.setStringValue(cellString.trim());
214+
} else {
215+
readCellData.setStringValue(cellString);
216+
}
206217
} else {
207218
readCellData.setType(CellDataTypeEnum.EMPTY);
208219
}

fastexcel/src/main/java/cn/idev/excel/analysis/v03/handlers/LabelRecordHandler.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import cn.idev.excel.analysis.v03.IgnorableXlsRecordHandler;
44
import cn.idev.excel.context.xls.XlsReadContext;
55
import cn.idev.excel.enums.RowTypeEnum;
6+
import cn.idev.excel.metadata.GlobalConfiguration;
67
import cn.idev.excel.metadata.data.ReadCellData;
8+
import cn.idev.excel.util.StringUtils;
79
import org.apache.poi.hssf.record.LabelRecord;
810
import org.apache.poi.hssf.record.Record;
911

@@ -15,9 +17,14 @@ public class LabelRecordHandler extends AbstractXlsRecordHandler implements Igno
1517
public void processRecord(XlsReadContext xlsReadContext, Record record) {
1618
LabelRecord lrec = (LabelRecord) record;
1719
String data = lrec.getValue();
18-
if (data != null
19-
&& xlsReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
20-
data = data.trim();
20+
if (data != null) {
21+
GlobalConfiguration globalConfiguration =
22+
xlsReadContext.currentReadHolder().globalConfiguration();
23+
if (globalConfiguration.getAutoStrip()) {
24+
data = StringUtils.strip(data);
25+
} else if (globalConfiguration.getAutoTrim()) {
26+
data = data.trim();
27+
}
2128
}
2229
xlsReadContext
2330
.xlsReadSheetHolder()

fastexcel/src/main/java/cn/idev/excel/analysis/v03/handlers/LabelSstRecordHandler.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import cn.idev.excel.context.xls.XlsReadContext;
66
import cn.idev.excel.enums.RowTypeEnum;
77
import cn.idev.excel.metadata.Cell;
8+
import cn.idev.excel.metadata.GlobalConfiguration;
89
import cn.idev.excel.metadata.data.ReadCellData;
10+
import cn.idev.excel.util.StringUtils;
911
import java.util.Map;
1012
import org.apache.poi.hssf.record.LabelSSTRecord;
1113
import org.apache.poi.hssf.record.Record;
@@ -31,7 +33,12 @@ public void processRecord(XlsReadContext xlsReadContext, Record record) {
3133
(int) lsrec.getColumn(), ReadCellData.newEmptyInstance(lsrec.getRow(), (int) lsrec.getColumn()));
3234
return;
3335
}
34-
if (xlsReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
36+
37+
GlobalConfiguration globalConfiguration =
38+
xlsReadContext.currentReadHolder().globalConfiguration();
39+
if (globalConfiguration.getAutoStrip()) {
40+
data = StringUtils.strip(data);
41+
} else if (globalConfiguration.getAutoTrim()) {
3542
data = data.trim();
3643
}
3744
cellMap.put((int) lsrec.getColumn(), ReadCellData.newInstance(data, lsrec.getRow(), (int) lsrec.getColumn()));

fastexcel/src/main/java/cn/idev/excel/analysis/v07/handlers/CellTagHandler.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import cn.idev.excel.constant.FastExcelConstants;
55
import cn.idev.excel.context.xlsx.XlsxReadContext;
66
import cn.idev.excel.enums.CellDataTypeEnum;
7+
import cn.idev.excel.metadata.GlobalConfiguration;
78
import cn.idev.excel.metadata.data.ReadCellData;
89
import cn.idev.excel.read.metadata.holder.xlsx.XlsxReadSheetHolder;
910
import cn.idev.excel.util.BooleanUtils;
@@ -95,9 +96,14 @@ public void endElement(XlsxReadContext xlsxReadContext, String name) {
9596
throw new IllegalStateException("Cannot set values now");
9697
}
9798

98-
if (tempCellData.getStringValue() != null
99-
&& xlsxReadContext.currentReadHolder().globalConfiguration().getAutoTrim()) {
100-
tempCellData.setStringValue(tempCellData.getStringValue().trim());
99+
if (tempCellData.getStringValue() != null) {
100+
GlobalConfiguration globalConfiguration =
101+
xlsxReadContext.currentReadHolder().globalConfiguration();
102+
if (globalConfiguration.getAutoStrip()) {
103+
tempCellData.setStringValue(StringUtils.strip(tempCellData.getStringValue()));
104+
} else if (globalConfiguration.getAutoTrim()) {
105+
tempCellData.setStringValue(tempCellData.getStringValue().trim());
106+
}
101107
}
102108

103109
tempCellData.checkEmpty();

fastexcel/src/main/java/cn/idev/excel/metadata/AbstractHolder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ public AbstractHolder(BasicParameter basicParameter, AbstractHolder prentAbstrac
6565
globalConfiguration.setAutoTrim(basicParameter.getAutoTrim());
6666
}
6767

68+
if (basicParameter.getAutoStrip() == null) {
69+
if (prentAbstractHolder != null) {
70+
globalConfiguration.setAutoStrip(
71+
prentAbstractHolder.getGlobalConfiguration().getAutoStrip());
72+
}
73+
} else {
74+
globalConfiguration.setAutoStrip(basicParameter.getAutoStrip());
75+
}
76+
6877
if (basicParameter.getUse1904windowing() == null) {
6978
if (prentAbstractHolder != null) {
7079
globalConfiguration.setUse1904windowing(

fastexcel/src/main/java/cn/idev/excel/metadata/AbstractParameterBuilder.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public T registerConverter(Converter<?> converter) {
5858

5959
/**
6060
* true if date uses 1904 windowing, or false if using 1900 date windowing.
61-
*
61+
* <p>
6262
* default is false
6363
*
6464
* @param use1904windowing
@@ -83,7 +83,7 @@ public T locale(Locale locale) {
8383

8484
/**
8585
* The cache used when parsing fields such as head.
86-
*
86+
* <p>
8787
* default is THREAD_LOCAL.
8888
*
8989
* @since 3.3.0
@@ -104,6 +104,17 @@ public T autoTrim(Boolean autoTrim) {
104104
return self();
105105
}
106106

107+
/**
108+
* Automatic strip includes sheet name and content
109+
*
110+
* @param autoStrip
111+
* @return
112+
*/
113+
public T autoStrip(Boolean autoStrip) {
114+
parameter().setAutoStrip(autoStrip);
115+
return self();
116+
}
117+
107118
@SuppressWarnings("unchecked")
108119
protected T self() {
109120
return (T) this;

fastexcel/src/main/java/cn/idev/excel/metadata/BasicParameter.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,8 @@ public class BasicParameter {
6060
* default is THREAD_LOCAL.
6161
*/
6262
private CacheLocationEnum filedCacheLocation;
63+
/**
64+
* Automatic strip includes sheet name and content
65+
*/
66+
private Boolean autoStrip;
6367
}

fastexcel/src/main/java/cn/idev/excel/metadata/GlobalConfiguration.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ public class GlobalConfiguration {
2323
* true if date uses 1904 windowing, or false if using 1900 date windowing.
2424
*
2525
* default is false
26-
*
27-
* @return
2826
*/
2927
private Boolean use1904windowing;
3028
/**
@@ -47,8 +45,16 @@ public class GlobalConfiguration {
4745
*/
4846
private CacheLocationEnum filedCacheLocation;
4947

48+
/**
49+
* Automatic strip includes sheet name and content
50+
*
51+
* default is false
52+
*/
53+
private Boolean autoStrip;
54+
5055
public GlobalConfiguration() {
5156
this.autoTrim = Boolean.TRUE;
57+
this.autoStrip = Boolean.FALSE;
5258
this.use1904windowing = Boolean.FALSE;
5359
this.locale = Locale.getDefault();
5460
this.useScientificFormat = Boolean.FALSE;

fastexcel/src/main/java/cn/idev/excel/read/builder/CsvReaderBuilder.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ public CsvReaderBuilder escape(Character escape) {
107107
}
108108

109109
private ExcelReader buildExcelReader() {
110-
if (this.readWorkbook.getAutoTrim() != null) {
111-
this.csvFormatBuilder.setTrim(this.readWorkbook.getAutoTrim());
112-
}
110+
this.csvFormatBuilder.setTrim(this.readWorkbook.getAutoTrim() == null
111+
|| this.readWorkbook.getAutoTrim()
112+
|| Boolean.TRUE.equals(this.readWorkbook.getAutoStrip()));
113113
if (this.readWorkbook.getIgnoreEmptyRow() != null) {
114114
this.csvFormatBuilder.setIgnoreEmptyLines(this.readWorkbook.getIgnoreEmptyRow());
115115
}

fastexcel/src/main/java/cn/idev/excel/read/metadata/ReadSheet.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ public void copyBasicParameter(ReadSheet other) {
9696
this.setClazz(other.getClazz());
9797
this.setCustomConverterList(other.getCustomConverterList());
9898
this.setAutoTrim(other.getAutoTrim());
99+
this.setAutoStrip(other.getAutoStrip());
99100
this.setUse1904windowing(other.getUse1904windowing());
100101
this.setNumRows(other.getNumRows());
101102
this.setHidden(other.isHidden());

0 commit comments

Comments
 (0)