@@ -98,6 +98,8 @@ private fun setWorkbookTempDirectory() {
98
98
* when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
99
99
* ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
100
100
* for unstructured data.
101
+ * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true).
102
+ * These cells are ignored when inferring the column’s type.
101
103
*/
102
104
public fun DataFrame.Companion.readExcel (
103
105
url : URL ,
@@ -108,6 +110,7 @@ public fun DataFrame.Companion.readExcel(
108
110
rowsCount : Int? = null,
109
111
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
110
112
firstRowIsHeader : Boolean = true,
113
+ parseEmptyAsNull : Boolean = true,
111
114
): AnyFrame {
112
115
setWorkbookTempDirectory()
113
116
val wb = WorkbookFactory .create(url.openStream())
@@ -121,6 +124,7 @@ public fun DataFrame.Companion.readExcel(
121
124
rowsCount,
122
125
nameRepairStrategy,
123
126
firstRowIsHeader,
127
+ parseEmptyAsNull
124
128
)
125
129
}
126
130
}
@@ -138,6 +142,8 @@ public fun DataFrame.Companion.readExcel(
138
142
* when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
139
143
* ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
140
144
* for unstructured data.
145
+ * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true).
146
+ * These cells are ignored when inferring the column’s type.
141
147
*/
142
148
public fun DataFrame.Companion.readExcel (
143
149
file : File ,
@@ -148,6 +154,7 @@ public fun DataFrame.Companion.readExcel(
148
154
rowsCount : Int? = null,
149
155
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
150
156
firstRowIsHeader : Boolean = true,
157
+ parseEmptyAsNull : Boolean = true,
151
158
): AnyFrame {
152
159
setWorkbookTempDirectory()
153
160
@Suppress(" ktlint:standard:comment-wrapping" )
@@ -162,6 +169,7 @@ public fun DataFrame.Companion.readExcel(
162
169
rowsCount,
163
170
nameRepairStrategy,
164
171
firstRowIsHeader,
172
+ parseEmptyAsNull
165
173
)
166
174
}
167
175
}
@@ -179,6 +187,8 @@ public fun DataFrame.Companion.readExcel(
179
187
* when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
180
188
* ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
181
189
* for unstructured data.
190
+ * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true).
191
+ * These cells are ignored when inferring the column’s type.
182
192
*/
183
193
public fun DataFrame.Companion.readExcel (
184
194
fileOrUrl : String ,
@@ -189,6 +199,7 @@ public fun DataFrame.Companion.readExcel(
189
199
rowsCount : Int? = null,
190
200
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
191
201
firstRowIsHeader : Boolean = true,
202
+ parseEmptyAsNull : Boolean = true,
192
203
): AnyFrame =
193
204
readExcel(
194
205
asUrl(fileOrUrl),
@@ -199,6 +210,7 @@ public fun DataFrame.Companion.readExcel(
199
210
rowsCount,
200
211
nameRepairStrategy,
201
212
firstRowIsHeader,
213
+ parseEmptyAsNull
202
214
)
203
215
204
216
/* *
@@ -214,6 +226,8 @@ public fun DataFrame.Companion.readExcel(
214
226
* when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
215
227
* ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
216
228
* for unstructured data.
229
+ * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true).
230
+ * These cells are ignored when inferring the column’s type.
217
231
*/
218
232
public fun DataFrame.Companion.readExcel (
219
233
inputStream : InputStream ,
@@ -224,6 +238,7 @@ public fun DataFrame.Companion.readExcel(
224
238
rowsCount : Int? = null,
225
239
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
226
240
firstRowIsHeader : Boolean = true,
241
+ parseEmptyAsNull : Boolean = true,
227
242
): AnyFrame {
228
243
setWorkbookTempDirectory()
229
244
val wb = WorkbookFactory .create(inputStream)
@@ -237,6 +252,7 @@ public fun DataFrame.Companion.readExcel(
237
252
rowsCount,
238
253
nameRepairStrategy,
239
254
firstRowIsHeader,
255
+ parseEmptyAsNull
240
256
)
241
257
}
242
258
}
@@ -255,6 +271,8 @@ public fun DataFrame.Companion.readExcel(
255
271
* when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
256
272
* ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
257
273
* for unstructured data.
274
+ * @param parseEmptyAsNull when set to true, empty strings in cells are parsed as null (default true).
275
+ * These cells are ignored when inferring the column’s type.
258
276
*/
259
277
public fun DataFrame.Companion.readExcel (
260
278
wb : Workbook ,
@@ -265,11 +283,12 @@ public fun DataFrame.Companion.readExcel(
265
283
rowsCount : Int? = null,
266
284
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
267
285
firstRowIsHeader : Boolean = true,
286
+ parseEmptyAsNull : Boolean = true,
268
287
): AnyFrame {
269
288
val sheet: Sheet = sheetName
270
289
?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
271
290
? : wb.getSheetAt(0 )
272
- return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader)
291
+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader, parseEmptyAsNull )
273
292
}
274
293
275
294
/* *
@@ -312,6 +331,7 @@ public fun DataFrame.Companion.readExcel(
312
331
rowsCount : Int? = null,
313
332
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
314
333
firstRowIsHeader : Boolean = true,
334
+ parseEmptyAsNull : Boolean = true,
315
335
): AnyFrame {
316
336
val columnIndexes: Iterable <Int > = when {
317
337
columns != null -> getColumnIndices(columns)
@@ -364,12 +384,18 @@ public fun DataFrame.Companion.readExcel(
364
384
)
365
385
columnNameCounters[nameFromCell] =
366
386
columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
367
- val getCellValue: (Cell ? ) -> Any? = when {
368
- formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
369
- formattingOptions.formatter.formatCellValue(cell)
387
+ val getCellValue: (Cell ? ) -> Any? = { cell ->
388
+ if (cell == null ) {
389
+ null
390
+ } else {
391
+ val rawValue: Any? = if (formattingOptions != null && index in formattingOptions.columnIndices) {
392
+ formattingOptions.formatter.formatCellValue(cell)
393
+ } else {
394
+ cell.cellValue(sheet.sheetName)
395
+ }
396
+ if (parseEmptyAsNull && rawValue is String && rawValue.isEmpty()) null
397
+ else rawValue
370
398
}
371
-
372
- else -> { cell -> cell.cellValue(sheet.sheetName) }
373
399
}
374
400
val values: List <Any ?> = valueRowsRange.map {
375
401
val row: Row ? = sheet.getRow(it)
0 commit comments