@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
66import org.apache.poi.hssf.usermodel.HSSFWorkbook
77import org.apache.poi.ss.usermodel.Cell
88import org.apache.poi.ss.usermodel.CellType
9+ import org.apache.poi.ss.usermodel.DataFormatter
910import org.apache.poi.ss.usermodel.DateUtil
1011import org.apache.poi.ss.usermodel.RichTextString
1112import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
8384/* *
8485 * @param sheetName sheet to read. By default, the first sheet in the document
8586 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
87+ * @param stringColumns range of columns to read as String regardless of a cell type.
88+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
8689 * @param skipRows number of rows before header
8790 * @param rowsCount number of rows to read.
8891 * @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
9396 sheetName : String? = null,
9497 skipRows : Int = 0,
9598 columns : String? = null,
99+ stringColumns : StringColumns ? = null,
96100 rowsCount : Int? = null,
97101 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
98102): AnyFrame {
99103 setWorkbookTempDirectory()
100104 val wb = WorkbookFactory .create(url.openStream())
101- return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
105+ return wb.use {
106+ readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
107+ }
102108}
103109
104110/* *
105111 * @param sheetName sheet to read. By default, the first sheet in the document
106112 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
113+ * @param stringColumns range of columns to read as String regardless of a cell type.
114+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
107115 * @param skipRows number of rows before header
108116 * @param rowsCount number of rows to read.
109117 * @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
114122 sheetName : String? = null,
115123 skipRows : Int = 0,
116124 columns : String? = null,
125+ stringColumns : StringColumns ? = null,
117126 rowsCount : Int? = null,
118127 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
119128): AnyFrame {
120129 setWorkbookTempDirectory()
121130 val wb = WorkbookFactory .create(file)
122- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
131+ return wb.use {
132+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
133+ }
123134}
124135
125136/* *
126137 * @param sheetName sheet to read. By default, the first sheet in the document
127138 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
139+ * @param stringColumns range of columns to read as String regardless of a cell type.
140+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
128141 * @param skipRows number of rows before header
129142 * @param rowsCount number of rows to read.
130143 * @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
135148 sheetName : String? = null,
136149 skipRows : Int = 0,
137150 columns : String? = null,
151+ stringColumns : StringColumns ? = null,
138152 rowsCount : Int? = null,
139153 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
140- ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
154+ ): AnyFrame =
155+ readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
141156
142157/* *
143158 * @param sheetName sheet to read. By default, the first sheet in the document
144159 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
160+ * @param stringColumns range of columns to read as String regardless of a cell type.
161+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
145162 * @param skipRows number of rows before header
146163 * @param rowsCount number of rows to read.
147164 * @param nameRepairStrategy handling of column names.
@@ -152,17 +169,23 @@ public fun DataFrame.Companion.readExcel(
152169 sheetName : String? = null,
153170 skipRows : Int = 0,
154171 columns : String? = null,
172+ stringColumns : StringColumns ? = null,
155173 rowsCount : Int? = null,
156174 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
157175): AnyFrame {
158176 setWorkbookTempDirectory()
159177 val wb = WorkbookFactory .create(inputStream)
160- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
178+ return wb.use {
179+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
180+ }
161181}
162182
163183/* *
164184 * @param sheetName sheet to read. By default, the first sheet in the document
165185 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
186+ * @param formattingOptions range of columns to read as String regardless of a cell type.
187+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
188+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
166189 * @param skipRows number of rows before header
167190 * @param rowsCount number of rows to read.
168191 * @param nameRepairStrategy handling of column names.
@@ -173,18 +196,39 @@ public fun DataFrame.Companion.readExcel(
173196 sheetName : String? = null,
174197 skipRows : Int = 0,
175198 columns : String? = null,
199+ formattingOptions : FormattingOptions ? = null,
176200 rowsCount : Int? = null,
177201 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
178202): AnyFrame {
179203 val sheet: Sheet = sheetName
180204 ?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
181205 ? : wb.getSheetAt(0 )
182- return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
206+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
207+ }
208+
209+ /* *
210+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
211+ */
212+ @JvmInline
213+ public value class StringColumns (public val range : String )
214+
215+ public fun StringColumns.toFormattingOptions (formatter : DataFormatter = DataFormatter ()): FormattingOptions =
216+ FormattingOptions (range, formatter)
217+
218+ /* *
219+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
220+ * @param formatter
221+ */
222+ public class FormattingOptions (range : String , public val formatter : DataFormatter = DataFormatter ()) {
223+ public val columnIndices: Set <Int > = getColumnIndices(range).toSet()
183224}
184225
185226/* *
186227 * @param sheet sheet to read.
187228 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
229+ * @param formattingOptions range of columns to read as String regardless of a cell type.
230+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
231+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
188232 * @param skipRows number of rows before header
189233 * @param rowsCount number of rows to read.
190234 * @param nameRepairStrategy handling of column names.
@@ -193,19 +237,13 @@ public fun DataFrame.Companion.readExcel(
193237public fun DataFrame.Companion.readExcel (
194238 sheet : Sheet ,
195239 columns : String? = null,
240+ formattingOptions : FormattingOptions ? = null,
196241 skipRows : Int = 0,
197242 rowsCount : Int? = null,
198243 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
199244): AnyFrame {
200245 val columnIndexes: Iterable <Int > = if (columns != null ) {
201- columns.split(" ," ).flatMap {
202- if (it.contains(" :" )) {
203- val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
204- start.. end
205- } else {
206- listOf (CellReference .convertColStringToIndex(it))
207- }
208- }
246+ getColumnIndices(columns)
209247 } else {
210248 val headerRow = checkNotNull(sheet.getRow(skipRows)) {
211249 " Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
@@ -235,17 +273,32 @@ public fun DataFrame.Companion.readExcel(
235273 val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
236274 columnNameCounters[nameFromCell] =
237275 columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
276+ val getCellValue: (Cell ? ) -> Any? = when {
277+ formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
278+ formattingOptions.formatter.formatCellValue(cell)
279+ }
238280
281+ else -> { cell -> cell.cellValue(sheet.sheetName) }
282+ }
239283 val values: List <Any ?> = valueRowsRange.map {
240284 val row: Row ? = sheet.getRow(it)
241285 val cell: Cell ? = row?.getCell(index)
242- cell.cellValue(sheet.sheetName )
286+ getCellValue(cell )
243287 }
244288 DataColumn .createWithTypeInference(name, values)
245289 }
246290 return dataFrameOf(columns)
247291}
248292
293+ private fun getColumnIndices (columns : String ): List <Int > = columns.split(" ," ).flatMap {
294+ if (it.contains(" :" )) {
295+ val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
296+ start.. end
297+ } else {
298+ listOf (CellReference .convertColStringToIndex(it))
299+ }
300+ }
301+
249302/* *
250303 * This is a universal function for name repairing
251304 * and should be moved to the API module later,
@@ -324,7 +377,7 @@ public fun <T> DataFrame<T>.writeExcel(
324377 keepFile : Boolean = false,
325378) {
326379 val factory =
327- if (keepFile){
380+ if (keepFile) {
328381 when (workBookType) {
329382 WorkBookType .XLS -> HSSFWorkbook (file.inputStream())
330383 WorkBookType .XLSX -> XSSFWorkbook (file.inputStream())
0 commit comments