@@ -93,11 +93,11 @@ private fun setWorkbookTempDirectory() {
93
93
* @param skipRows number of rows before header
94
94
* @param rowsCount number of rows to read.
95
95
* @param nameRepairStrategy handling of column names.
96
- * @param withDefaultHeader make default header
97
96
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
98
- * However, when withDefaultHeader is set to true,
99
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
100
- * ensuring unique column names are generated for unstructured data.
97
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
98
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
99
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
100
+ * for unstructured data.
101
101
*/
102
102
public fun DataFrame.Companion.readExcel (
103
103
url : URL ,
@@ -107,7 +107,7 @@ public fun DataFrame.Companion.readExcel(
107
107
stringColumns : StringColumns ? = null,
108
108
rowsCount : Int? = null,
109
109
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
110
- withDefaultHeader : Boolean = false ,
110
+ firstRowIsHeader : Boolean = true ,
111
111
): AnyFrame {
112
112
setWorkbookTempDirectory()
113
113
val wb = WorkbookFactory .create(url.openStream())
@@ -120,7 +120,7 @@ public fun DataFrame.Companion.readExcel(
120
120
stringColumns?.toFormattingOptions(),
121
121
rowsCount,
122
122
nameRepairStrategy,
123
- withDefaultHeader ,
123
+ firstRowIsHeader ,
124
124
)
125
125
}
126
126
}
@@ -133,11 +133,11 @@ public fun DataFrame.Companion.readExcel(
133
133
* @param skipRows number of rows before header
134
134
* @param rowsCount number of rows to read.
135
135
* @param nameRepairStrategy handling of column names.
136
- * @param withDefaultHeader make default header
137
136
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
138
- * However, when withDefaultHeader is set to true,
139
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
140
- * ensuring unique column names are generated for unstructured data.
137
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
138
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
139
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
140
+ * for unstructured data.
141
141
*/
142
142
public fun DataFrame.Companion.readExcel (
143
143
file : File ,
@@ -147,7 +147,7 @@ public fun DataFrame.Companion.readExcel(
147
147
stringColumns : StringColumns ? = null,
148
148
rowsCount : Int? = null,
149
149
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
150
- withDefaultHeader : Boolean = false ,
150
+ firstRowIsHeader : Boolean = true ,
151
151
): AnyFrame {
152
152
setWorkbookTempDirectory()
153
153
val wb = WorkbookFactory .create(file)
@@ -160,7 +160,7 @@ public fun DataFrame.Companion.readExcel(
160
160
stringColumns?.toFormattingOptions(),
161
161
rowsCount,
162
162
nameRepairStrategy,
163
- withDefaultHeader ,
163
+ firstRowIsHeader ,
164
164
)
165
165
}
166
166
}
@@ -173,11 +173,11 @@ public fun DataFrame.Companion.readExcel(
173
173
* @param skipRows number of rows before header
174
174
* @param rowsCount number of rows to read.
175
175
* @param nameRepairStrategy handling of column names.
176
- * @param withDefaultHeader make default header
177
176
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
178
- * However, when withDefaultHeader is set to true,
179
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
180
- * ensuring unique column names are generated for unstructured data.
177
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
178
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
179
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
180
+ * for unstructured data.
181
181
*/
182
182
@Refine
183
183
@Interpretable(" ReadExcel" )
@@ -189,7 +189,7 @@ public fun DataFrame.Companion.readExcel(
189
189
stringColumns : StringColumns ? = null,
190
190
rowsCount : Int? = null,
191
191
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
192
- withDefaultHeader : Boolean = false ,
192
+ firstRowIsHeader : Boolean = true ,
193
193
): AnyFrame =
194
194
readExcel(
195
195
asURL(fileOrUrl),
@@ -199,7 +199,7 @@ public fun DataFrame.Companion.readExcel(
199
199
stringColumns,
200
200
rowsCount,
201
201
nameRepairStrategy,
202
- withDefaultHeader ,
202
+ firstRowIsHeader ,
203
203
)
204
204
205
205
/* *
@@ -210,11 +210,11 @@ public fun DataFrame.Companion.readExcel(
210
210
* @param skipRows number of rows before header
211
211
* @param rowsCount number of rows to read.
212
212
* @param nameRepairStrategy handling of column names.
213
- * @param withDefaultHeader make default header
214
213
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
215
- * However, when withDefaultHeader is set to true,
216
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
217
- * ensuring unique column names are generated for unstructured data.
214
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
215
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
216
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
217
+ * for unstructured data.
218
218
*/
219
219
public fun DataFrame.Companion.readExcel (
220
220
inputStream : InputStream ,
@@ -224,7 +224,7 @@ public fun DataFrame.Companion.readExcel(
224
224
stringColumns : StringColumns ? = null,
225
225
rowsCount : Int? = null,
226
226
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
227
- withDefaultHeader : Boolean = false ,
227
+ firstRowIsHeader : Boolean = true ,
228
228
): AnyFrame {
229
229
setWorkbookTempDirectory()
230
230
val wb = WorkbookFactory .create(inputStream)
@@ -237,7 +237,7 @@ public fun DataFrame.Companion.readExcel(
237
237
stringColumns?.toFormattingOptions(),
238
238
rowsCount,
239
239
nameRepairStrategy,
240
- withDefaultHeader ,
240
+ firstRowIsHeader ,
241
241
)
242
242
}
243
243
}
@@ -251,11 +251,11 @@ public fun DataFrame.Companion.readExcel(
251
251
* @param skipRows number of rows before header
252
252
* @param rowsCount number of rows to read.
253
253
* @param nameRepairStrategy handling of column names.
254
- * @param withDefaultHeader make default header
255
254
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
256
- * However, when withDefaultHeader is set to true,
257
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
258
- * ensuring unique column names are generated for unstructured data.
255
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
256
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
257
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
258
+ * for unstructured data.
259
259
*/
260
260
public fun DataFrame.Companion.readExcel (
261
261
wb : Workbook ,
@@ -265,12 +265,12 @@ public fun DataFrame.Companion.readExcel(
265
265
formattingOptions : FormattingOptions ? = null,
266
266
rowsCount : Int? = null,
267
267
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
268
- withDefaultHeader : Boolean = false ,
268
+ firstRowIsHeader : Boolean = true ,
269
269
): AnyFrame {
270
270
val sheet: Sheet = sheetName
271
271
?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
272
272
? : wb.getSheetAt(0 )
273
- return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, withDefaultHeader )
273
+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy, firstRowIsHeader )
274
274
}
275
275
276
276
/* *
@@ -301,11 +301,11 @@ public class FormattingOptions(range: String, public val formatter: DataFormatte
301
301
* @param skipRows number of rows before header
302
302
* @param rowsCount number of rows to read.
303
303
* @param nameRepairStrategy handling of column names.
304
- * @param withDefaultHeader make default header
305
304
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE].
306
- * However, when withDefaultHeader is set to true,
307
- * it operates as [NameRepairStrategy.MAKE_UNIQUE],
308
- * ensuring unique column names are generated for unstructured data.
305
+ * @param firstRowIsHeader when set to true, it will take the first row (after skipRows) as the header.
306
+ * when set to false, it operates as [NameRepairStrategy.MAKE_UNIQUE],
307
+ * ensuring unique column names will make the columns be named according to excel columns, like "A", "B", "C" etc.
308
+ * for unstructured data.
309
309
*/
310
310
public fun DataFrame.Companion.readExcel (
311
311
sheet : Sheet ,
@@ -314,20 +314,12 @@ public fun DataFrame.Companion.readExcel(
314
314
skipRows : Int = 0,
315
315
rowsCount : Int? = null,
316
316
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
317
- withDefaultHeader : Boolean = false ,
317
+ firstRowIsHeader : Boolean = true ,
318
318
): AnyFrame {
319
319
val columnIndexes: Iterable <Int > = when {
320
- withDefaultHeader -> {
321
- val notEmptyRow = sheet.rowIterator().asSequence().find { it != null }
322
- checkNotNull(notEmptyRow) {
323
- " There are no defined cells"
324
- }
325
- notEmptyRow.firstCellNum until notEmptyRow.lastCellNum
326
- }
327
-
328
320
columns != null -> getColumnIndices(columns)
329
321
330
- else -> {
322
+ firstRowIsHeader -> {
331
323
val headerRow = checkNotNull(sheet.getRow(skipRows)) {
332
324
" Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
333
325
}
@@ -337,13 +329,21 @@ public fun DataFrame.Companion.readExcel(
337
329
}
338
330
headerRow.firstCellNum until headerRow.lastCellNum
339
331
}
332
+
333
+ else -> {
334
+ val notEmptyRow = sheet.rowIterator().asSequence().find { it != null }
335
+ checkNotNull(notEmptyRow) {
336
+ " There are no defined cells"
337
+ }
338
+ notEmptyRow.firstCellNum until notEmptyRow.lastCellNum
339
+ }
340
340
}
341
341
342
- val headerRow: Row ? = if (withDefaultHeader) {
342
+ val headerRow: Row ? = if (firstRowIsHeader) {
343
+ sheet.getRow(skipRows)
344
+ } else {
343
345
sheet.shiftRows(0 , sheet.lastRowNum, 1 )
344
346
sheet.createRow(0 )
345
- } else {
346
- sheet.getRow(skipRows)
347
347
}
348
348
349
349
val first = skipRows + 1
@@ -363,7 +363,7 @@ public fun DataFrame.Companion.readExcel(
363
363
val name = repairNameIfRequired(
364
364
nameFromCell,
365
365
columnNameCounters,
366
- if (withDefaultHeader) NameRepairStrategy . MAKE_UNIQUE else nameRepairStrategy ,
366
+ if (firstRowIsHeader) nameRepairStrategy else NameRepairStrategy . MAKE_UNIQUE ,
367
367
)
368
368
columnNameCounters[nameFromCell] =
369
369
columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
0 commit comments