@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
6
6
import org.apache.poi.hssf.usermodel.HSSFWorkbook
7
7
import org.apache.poi.ss.usermodel.Cell
8
8
import org.apache.poi.ss.usermodel.CellType
9
+ import org.apache.poi.ss.usermodel.DataFormatter
9
10
import org.apache.poi.ss.usermodel.DateUtil
10
11
import org.apache.poi.ss.usermodel.RichTextString
11
12
import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
83
84
/* *
84
85
* @param sheetName sheet to read. By default, the first sheet in the document
85
86
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
87
+ * @param stringColumns range of columns to read as String regardless of a cell type.
88
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
86
89
* @param skipRows number of rows before header
87
90
* @param rowsCount number of rows to read.
88
91
* @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
93
96
sheetName : String? = null,
94
97
skipRows : Int = 0,
95
98
columns : String? = null,
99
+ stringColumns : StringColumns ? = null,
96
100
rowsCount : Int? = null,
97
101
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
98
102
): AnyFrame {
99
103
setWorkbookTempDirectory()
100
104
val wb = WorkbookFactory .create(url.openStream())
101
- return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
105
+ return wb.use {
106
+ readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
107
+ }
102
108
}
103
109
104
110
/* *
105
111
* @param sheetName sheet to read. By default, the first sheet in the document
106
112
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
113
+ * @param stringColumns range of columns to read as String regardless of a cell type.
114
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
107
115
* @param skipRows number of rows before header
108
116
* @param rowsCount number of rows to read.
109
117
* @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
114
122
sheetName : String? = null,
115
123
skipRows : Int = 0,
116
124
columns : String? = null,
125
+ stringColumns : StringColumns ? = null,
117
126
rowsCount : Int? = null,
118
127
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
119
128
): AnyFrame {
120
129
setWorkbookTempDirectory()
121
130
val wb = WorkbookFactory .create(file)
122
- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
131
+ return wb.use {
132
+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
133
+ }
123
134
}
124
135
125
136
/* *
126
137
* @param sheetName sheet to read. By default, the first sheet in the document
127
138
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
139
+ * @param stringColumns range of columns to read as String regardless of a cell type.
140
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
128
141
* @param skipRows number of rows before header
129
142
* @param rowsCount number of rows to read.
130
143
* @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
135
148
sheetName : String? = null,
136
149
skipRows : Int = 0,
137
150
columns : String? = null,
151
+ stringColumns : StringColumns ? = null,
138
152
rowsCount : Int? = null,
139
153
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
140
- ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
154
+ ): AnyFrame =
155
+ readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
141
156
142
157
/* *
143
158
* @param sheetName sheet to read. By default, the first sheet in the document
144
159
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
160
+ * @param stringColumns range of columns to read as String regardless of a cell type.
161
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
145
162
* @param skipRows number of rows before header
146
163
* @param rowsCount number of rows to read.
147
164
* @param nameRepairStrategy handling of column names.
@@ -152,17 +169,23 @@ public fun DataFrame.Companion.readExcel(
152
169
sheetName : String? = null,
153
170
skipRows : Int = 0,
154
171
columns : String? = null,
172
+ stringColumns : StringColumns ? = null,
155
173
rowsCount : Int? = null,
156
174
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
157
175
): AnyFrame {
158
176
setWorkbookTempDirectory()
159
177
val wb = WorkbookFactory .create(inputStream)
160
- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
178
+ return wb.use {
179
+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
180
+ }
161
181
}
162
182
163
183
/* *
164
184
* @param sheetName sheet to read. By default, the first sheet in the document
165
185
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
186
+ * @param formattingOptions range of columns to read as String regardless of a cell type.
187
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
188
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
166
189
* @param skipRows number of rows before header
167
190
* @param rowsCount number of rows to read.
168
191
* @param nameRepairStrategy handling of column names.
@@ -173,18 +196,39 @@ public fun DataFrame.Companion.readExcel(
173
196
sheetName : String? = null,
174
197
skipRows : Int = 0,
175
198
columns : String? = null,
199
+ formattingOptions : FormattingOptions ? = null,
176
200
rowsCount : Int? = null,
177
201
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
178
202
): AnyFrame {
179
203
val sheet: Sheet = sheetName
180
204
?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
181
205
? : wb.getSheetAt(0 )
182
- return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
206
+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
207
+ }
208
+
209
+ /* *
210
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
211
+ */
212
+ @JvmInline
213
+ public value class StringColumns (public val range : String )
214
+
215
+ public fun StringColumns.toFormattingOptions (formatter : DataFormatter = DataFormatter ()): FormattingOptions =
216
+ FormattingOptions (range, formatter)
217
+
218
+ /* *
219
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
220
+ * @param formatter
221
+ */
222
+ public class FormattingOptions (range : String , public val formatter : DataFormatter = DataFormatter ()) {
223
+ public val columnIndices: Set <Int > = getColumnIndices(range).toSet()
183
224
}
184
225
185
226
/* *
186
227
* @param sheet sheet to read.
187
228
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
229
+ * @param formattingOptions range of columns to read as String regardless of a cell's type.
230
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
231
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
188
232
* @param skipRows number of rows before header
189
233
* @param rowsCount number of rows to read.
190
234
* @param nameRepairStrategy handling of column names.
@@ -193,19 +237,13 @@ public fun DataFrame.Companion.readExcel(
193
237
public fun DataFrame.Companion.readExcel (
194
238
sheet : Sheet ,
195
239
columns : String? = null,
240
+ formattingOptions : FormattingOptions ? = null,
196
241
skipRows : Int = 0,
197
242
rowsCount : Int? = null,
198
243
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
199
244
): AnyFrame {
200
245
val columnIndexes: Iterable <Int > = if (columns != null ) {
201
- columns.split(" ," ).flatMap {
202
- if (it.contains(" :" )) {
203
- val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
204
- start.. end
205
- } else {
206
- listOf (CellReference .convertColStringToIndex(it))
207
- }
208
- }
246
+ getColumnIndices(columns)
209
247
} else {
210
248
val headerRow = checkNotNull(sheet.getRow(skipRows)) {
211
249
" Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
@@ -235,17 +273,32 @@ public fun DataFrame.Companion.readExcel(
235
273
val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
236
274
columnNameCounters[nameFromCell] =
237
275
columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
276
+ val getCellValue: (Cell ? ) -> Any? = when {
277
+ formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
278
+ formattingOptions.formatter.formatCellValue(cell)
279
+ }
238
280
281
+ else -> { cell -> cell.cellValue(sheet.sheetName) }
282
+ }
239
283
val values: List <Any ?> = valueRowsRange.map {
240
284
val row: Row ? = sheet.getRow(it)
241
285
val cell: Cell ? = row?.getCell(index)
242
- cell.cellValue(sheet.sheetName )
286
+ getCellValue(cell )
243
287
}
244
288
DataColumn .createWithTypeInference(name, values)
245
289
}
246
290
return dataFrameOf(columns)
247
291
}
248
292
293
+ private fun getColumnIndices (columns : String ): List <Int > = columns.split(" ," ).flatMap {
294
+ if (it.contains(" :" )) {
295
+ val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
296
+ start.. end
297
+ } else {
298
+ listOf (CellReference .convertColStringToIndex(it))
299
+ }
300
+ }
301
+
249
302
/* *
250
303
* This is a universal function for name repairing
251
304
* and should be moved to the API module later,
@@ -324,7 +377,7 @@ public fun <T> DataFrame<T>.writeExcel(
324
377
keepFile : Boolean = false,
325
378
) {
326
379
val factory =
327
- if (keepFile){
380
+ if (keepFile) {
328
381
when (workBookType) {
329
382
WorkBookType .XLS -> HSSFWorkbook (file.inputStream())
330
383
WorkBookType .XLSX -> XSSFWorkbook (file.inputStream())
0 commit comments