@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
66import org.apache.poi.hssf.usermodel.HSSFWorkbook
77import org.apache.poi.ss.usermodel.Cell
88import org.apache.poi.ss.usermodel.CellType
9+ import org.apache.poi.ss.usermodel.DataFormatter
910import org.apache.poi.ss.usermodel.DateUtil
1011import org.apache.poi.ss.usermodel.RichTextString
1112import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
8384/* *
8485 * @param sheetName sheet to read. By default, the first sheet in the document
8586 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
87+ * @param stringColumns range of columns to read as String regardless of a cell type.
88+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
8689 * @param skipRows number of rows before header
8790 * @param rowsCount number of rows to read.
8891 * @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
9396 sheetName : String? = null,
9497 skipRows : Int = 0,
9598 columns : String? = null,
99+ stringColumns : StringColumns ? = null,
96100 rowsCount : Int? = null,
97101 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
98102): AnyFrame {
99103 setWorkbookTempDirectory()
100104 val wb = WorkbookFactory .create(url.openStream())
101- return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
105+ return wb.use {
106+ readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
107+ }
102108}
103109
104110/* *
105111 * @param sheetName sheet to read. By default, the first sheet in the document
106112 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
113+ * @param stringColumns range of columns to read as String regardless of a cell type.
114+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
107115 * @param skipRows number of rows before header
108116 * @param rowsCount number of rows to read.
109117 * @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
114122 sheetName : String? = null,
115123 skipRows : Int = 0,
116124 columns : String? = null,
125+ stringColumns : StringColumns ? = null,
117126 rowsCount : Int? = null,
118127 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
119128): AnyFrame {
120129 setWorkbookTempDirectory()
121130 val wb = WorkbookFactory .create(file)
122- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
131+ return wb.use {
132+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
133+ }
123134}
124135
125136/* *
126137 * @param sheetName sheet to read. By default, the first sheet in the document
127138 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
139+ * @param stringColumns range of columns to read as String regardless of a cell type.
140+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
128141 * @param skipRows number of rows before header
129142 * @param rowsCount number of rows to read.
130143 * @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
135148 sheetName : String? = null,
136149 skipRows : Int = 0,
137150 columns : String? = null,
151+ stringColumns : StringColumns ? = null,
138152 rowsCount : Int? = null,
139153 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
140- ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
154+ ): AnyFrame =
155+ readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
141156
142157/* *
143158 * @param sheetName sheet to read. By default, the first sheet in the document
144159 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
160+ * @param stringColumns range of columns to read as String regardless of a cell type.
161+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
145162 * @param skipRows number of rows before header
146163 * @param rowsCount number of rows to read.
147164 * @param nameRepairStrategy handling of column names.
@@ -152,17 +169,22 @@ public fun DataFrame.Companion.readExcel(
152169 sheetName : String? = null,
153170 skipRows : Int = 0,
154171 columns : String? = null,
172+ stringColumns : StringColumns ? = null,
155173 rowsCount : Int? = null,
156174 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
157175): AnyFrame {
158176 setWorkbookTempDirectory()
159177 val wb = WorkbookFactory .create(inputStream)
160- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
178+ return wb.use {
179+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
180+ }
161181}
162182
163183/* *
164184 * @param sheetName sheet to read. By default, the first sheet in the document
165185 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
186+ * @param formattingOptions range of columns to read as String regardless of a cell type.
187+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
166188 * @param skipRows number of rows before header
167189 * @param rowsCount number of rows to read.
168190 * @param nameRepairStrategy handling of column names.
@@ -173,18 +195,37 @@ public fun DataFrame.Companion.readExcel(
173195 sheetName : String? = null,
174196 skipRows : Int = 0,
175197 columns : String? = null,
198+ formattingOptions : FormattingOptions ? = null,
176199 rowsCount : Int? = null,
177200 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
178201): AnyFrame {
179202 val sheet: Sheet = sheetName
180203 ?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
181204 ? : wb.getSheetAt(0 )
182- return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
205+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
206+ }
207+
208+ /* *
209+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
210+ */
211+ public class StringColumns (public val range : String )
212+
213+ public fun StringColumns.toFormattingOptions (formatter : DataFormatter = DataFormatter ()): FormattingOptions =
214+ FormattingOptions (range, formatter)
215+
216+ /* *
217+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
218+ */
219+ public class FormattingOptions (range : String , public val formatter : DataFormatter = DataFormatter ()) {
220+ public val columnIndices: Set <Int > = getColumnIndices(range).toSet()
183221}
184222
185223/* *
186224 * @param sheet sheet to read.
187225 * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
226+ * @param formattingOptions range of columns to read as String regardless of a cell type.
227+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
228+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
188229 * @param skipRows number of rows before header
189230 * @param rowsCount number of rows to read.
190231 * @param nameRepairStrategy handling of column names.
@@ -193,19 +234,13 @@ public fun DataFrame.Companion.readExcel(
193234public fun DataFrame.Companion.readExcel (
194235 sheet : Sheet ,
195236 columns : String? = null,
237+ formattingOptions : FormattingOptions ? = null,
196238 skipRows : Int = 0,
197239 rowsCount : Int? = null,
198240 nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
199241): AnyFrame {
200242 val columnIndexes: Iterable <Int > = if (columns != null ) {
201- columns.split(" ," ).flatMap {
202- if (it.contains(" :" )) {
203- val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
204- start.. end
205- } else {
206- listOf (CellReference .convertColStringToIndex(it))
207- }
208- }
243+ getColumnIndices(columns)
209244 } else {
210245 val headerRow = checkNotNull(sheet.getRow(skipRows)) {
211246 " Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
@@ -235,17 +270,32 @@ public fun DataFrame.Companion.readExcel(
235270 val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
236271 columnNameCounters[nameFromCell] =
237272 columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
273+ val getCellValue: (Cell ? ) -> Any? = when {
274+ formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
275+ formattingOptions.formatter.formatCellValue(cell)
276+ }
238277
278+ else -> { cell -> cell.cellValue(sheet.sheetName) }
279+ }
239280 val values: List <Any ?> = valueRowsRange.map {
240281 val row: Row ? = sheet.getRow(it)
241282 val cell: Cell ? = row?.getCell(index)
242- cell.cellValue(sheet.sheetName )
283+ getCellValue(cell )
243284 }
244285 DataColumn .createWithTypeInference(name, values)
245286 }
246287 return dataFrameOf(columns)
247288}
248289
290+ private fun getColumnIndices (columns : String ): List <Int > = columns.split(" ," ).flatMap {
291+ if (it.contains(" :" )) {
292+ val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
293+ start.. end
294+ } else {
295+ listOf (CellReference .convertColStringToIndex(it))
296+ }
297+ }
298+
249299/* *
250300 * This is a universal function for name repairing
251301 * and should be moved to the API module later,
@@ -324,7 +374,7 @@ public fun <T> DataFrame<T>.writeExcel(
324374 keepFile : Boolean = false,
325375) {
326376 val factory =
327- if (keepFile){
377+ if (keepFile) {
328378 when (workBookType) {
329379 WorkBookType .XLS -> HSSFWorkbook (file.inputStream())
330380 WorkBookType .XLSX -> XSSFWorkbook (file.inputStream())
0 commit comments