@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
66import org.apache.poi.hssf.usermodel.HSSFWorkbook
77import org.apache.poi.ss.usermodel.Cell
88import org.apache.poi.ss.usermodel.CellType
9+ import org.apache.poi.ss.usermodel.DataFormatter
910import org.apache.poi.ss.usermodel.DateUtil
1011import org.apache.poi.ss.usermodel.RichTextString
1112import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
8384/* *
8485 * @param sheetName sheet to read. By default, the first sheet in the document
8586 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
87+ * @param stringColumns range of columns to read as String regardless of a cell type.
88+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
8689 * @param skipRows number of rows before header
8790 * @param rowsCount number of rows to read.
8891 * @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
9396 sheetName : String? = null,
9497 skipRows : Int = 0,
9598 columns : String? = null,
99+ stringColumns : StringColumns ? = null,
96100 rowsCount : Int? = null,
97101 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
98102): AnyFrame {
99103 setWorkbookTempDirectory()
100104 val wb = WorkbookFactory .create(url.openStream())
101- return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
105+ return wb.use {
106+ readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
107+ }
102108}
103109
104110/* *
105111 * @param sheetName sheet to read. By default, the first sheet in the document
106112 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
113+ * @param stringColumns range of columns to read as String regardless of a cell type.
114+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
107115 * @param skipRows number of rows before header
108116 * @param rowsCount number of rows to read.
109117 * @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
114122 sheetName : String? = null,
115123 skipRows : Int = 0,
116124 columns : String? = null,
125+ stringColumns : StringColumns ? = null,
117126 rowsCount : Int? = null,
118127 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
119128): AnyFrame {
120129 setWorkbookTempDirectory()
121130 val wb = WorkbookFactory .create(file)
122- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
131+ return wb.use {
132+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
133+ }
123134}
124135
125136/* *
126137 * @param sheetName sheet to read. By default, the first sheet in the document
127138 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
139+ * @param stringColumns range of columns to read as String regardless of a cell type.
140+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
128141 * @param skipRows number of rows before header
129142 * @param rowsCount number of rows to read.
130143 * @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
135148 sheetName : String? = null,
136149 skipRows : Int = 0,
137150 columns : String? = null,
151+ stringColumns : StringColumns ? = null,
138152 rowsCount : Int? = null,
139153 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
140- ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
154+ ): AnyFrame =
155+ readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
141156
142157/* *
143158 * @param sheetName sheet to read. By default, the first sheet in the document
144159 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
160+ * @param stringColumns range of columns to read as String regardless of a cell type.
161+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
145162 * @param skipRows number of rows before header
146163 * @param rowsCount number of rows to read.
147164 * @param nameRepairStrategy handling of column names.
@@ -152,17 +169,23 @@ public fun DataFrame.Companion.readExcel(
152169 sheetName : String? = null,
153170 skipRows : Int = 0,
154171 columns : String? = null,
172+ stringColumns : StringColumns ? = null,
155173 rowsCount : Int? = null,
156174 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
157175): AnyFrame {
158176 setWorkbookTempDirectory()
159177 val wb = WorkbookFactory .create(inputStream)
160- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
178+ return wb.use {
179+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
180+ }
161181}
162182
163183/* *
164184 * @param sheetName sheet to read. By default, the first sheet in the document
165185 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
186+ * @param formattingOptions range of columns to read as String regardless of a cell type.
187+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
188+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
166189 * @param skipRows number of rows before header
167190 * @param rowsCount number of rows to read.
168191 * @param nameRepairStrategy handling of column names.
@@ -173,18 +196,37 @@ public fun DataFrame.Companion.readExcel(
173196 sheetName : String? = null,
174197 skipRows : Int = 0,
175198 columns : String? = null,
199+ formattingOptions : FormattingOptions ? = null,
176200 rowsCount : Int? = null,
177201 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
178202): AnyFrame {
179203 val sheet: Sheet = sheetName
180204 ?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
181205 ? : wb.getSheetAt(0 )
182- return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
206+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
207+ }
208+
209+ /* *
210+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
211+ */
212+ public class StringColumns (public val range : String )
213+
214+ public fun StringColumns.toFormattingOptions (formatter : DataFormatter = DataFormatter ()): FormattingOptions =
215+ FormattingOptions (range, formatter)
216+
217+ /* *
218+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
219+ */
220+ public class FormattingOptions (range : String , public val formatter : DataFormatter = DataFormatter ()) {
221+ public val columnIndices: Set <Int > = getColumnIndices(range).toSet()
183222}
184223
185224/* *
186225 * @param sheet sheet to read.
187226 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
227+ * @param formattingOptions range of columns to read as String regardless of a cell type.
228+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
229+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
188230 * @param skipRows number of rows before header
189231 * @param rowsCount number of rows to read.
190232 * @param nameRepairStrategy handling of column names.
@@ -193,19 +235,13 @@ public fun DataFrame.Companion.readExcel(
193235public fun DataFrame.Companion.readExcel (
194236 sheet : Sheet ,
195237 columns : String? = null,
238+ formattingOptions : FormattingOptions ? = null,
196239 skipRows : Int = 0,
197240 rowsCount : Int? = null,
198241 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
199242): AnyFrame {
200243 val columnIndexes: Iterable <Int > = if (columns != null ) {
201- columns.split(" ," ).flatMap {
202- if (it.contains(" :" )) {
203- val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
204- start.. end
205- } else {
206- listOf (CellReference .convertColStringToIndex(it))
207- }
208- }
244+ getColumnIndices(columns)
209245 } else {
210246 val headerRow = checkNotNull(sheet.getRow(skipRows)) {
211247 " Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
@@ -235,17 +271,32 @@ public fun DataFrame.Companion.readExcel(
235271 val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
236272 columnNameCounters[nameFromCell] =
237273 columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
274+ val getCellValue: (Cell ? ) -> Any? = when {
275+ formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
276+ formattingOptions.formatter.formatCellValue(cell)
277+ }
238278
279+ else -> { cell -> cell.cellValue(sheet.sheetName) }
280+ }
239281 val values: List <Any ?> = valueRowsRange.map {
240282 val row: Row ? = sheet.getRow(it)
241283 val cell: Cell ? = row?.getCell(index)
242- cell.cellValue(sheet.sheetName )
284+ getCellValue(cell )
243285 }
244286 DataColumn .createWithTypeInference(name, values)
245287 }
246288 return dataFrameOf(columns)
247289}
248290
291+ private fun getColumnIndices (columns : String ): List <Int > = columns.split(" ," ).flatMap {
292+ if (it.contains(" :" )) {
293+ val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
294+ start.. end
295+ } else {
296+ listOf (CellReference .convertColStringToIndex(it))
297+ }
298+ }
299+
249300/* *
250301 * This is a universal function for name repairing
251302 * and should be moved to the API module later,
@@ -324,7 +375,7 @@ public fun <T> DataFrame<T>.writeExcel(
324375 keepFile : Boolean = false,
325376) {
326377 val factory =
327- if (keepFile){
378+ if (keepFile) {
328379 when (workBookType) {
329380 WorkBookType .XLS -> HSSFWorkbook (file.inputStream())
330381 WorkBookType .XLSX -> XSSFWorkbook (file.inputStream())
0 commit comments