Skip to content

Commit 24ec80e

Browse files
authored
Merge pull request #135 from Kotlin/fix-read-excel
#132 Add `skipRows` parameter to enable reading data with header from…
2 parents 8775fdf + 76fe0c6 commit 24ec80e

File tree

3 files changed

+57
-9
lines changed

3 files changed

+57
-9
lines changed

dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,58 +52,100 @@ internal class DefaultReadExcelMethod(path: String?) : AbstractDefaultReadMethod
5252

5353
private const val readExcel = "readExcel"
5454

55+
/**
56+
* @param sheetName sheet to read. By default, first sheet in the document
57+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
58+
* @param skipRows number of rows before header
59+
* @param rowsCount number of rows to read.
60+
*/
5561
public fun DataFrame.Companion.readExcel(
5662
url: URL,
5763
sheetName: String? = null,
64+
skipRows: Int = 0,
5865
columns: String? = null,
5966
rowsCount: Int? = null
6067
): AnyFrame {
6168
val wb = WorkbookFactory.create(url.openStream())
62-
return wb.use { readExcel(wb, sheetName, columns, rowsCount) }
69+
return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount) }
6370
}
6471

72+
/**
73+
* @param sheetName sheet to read. By default, first sheet in the document
74+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
75+
* @param skipRows number of rows before header
76+
* @param rowsCount number of rows to read.
77+
*/
6578
public fun DataFrame.Companion.readExcel(
6679
file: File,
6780
sheetName: String? = null,
81+
skipRows: Int = 0,
6882
columns: String? = null,
6983
rowsCount: Int? = null
7084
): AnyFrame {
7185
val wb = WorkbookFactory.create(file)
72-
return wb.use { readExcel(it, sheetName, columns, rowsCount) }
86+
return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) }
7387
}
7488

89+
/**
90+
* @param sheetName sheet to read. By default, first sheet in the document
91+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
92+
* @param skipRows number of rows before header
93+
* @param rowsCount number of rows to read.
94+
*/
7595
public fun DataFrame.Companion.readExcel(
7696
fileOrUrl: String,
7797
sheetName: String? = null,
98+
skipRows: Int = 0,
7899
columns: String? = null,
79100
rowsCount: Int? = null
80-
): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, columns, rowsCount)
101+
): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount)
81102

103+
/**
104+
* @param sheetName sheet to read. By default, first sheet in the document
105+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
106+
* @param skipRows number of rows before header
107+
* @param rowsCount number of rows to read.
108+
*/
82109
public fun DataFrame.Companion.readExcel(
83110
inputStream: InputStream,
84111
sheetName: String? = null,
112+
skipRows: Int = 0,
85113
columns: String? = null,
86114
rowsCount: Int? = null
87115
): AnyFrame {
88116
val wb = WorkbookFactory.create(inputStream)
89-
return wb.use { readExcel(it, sheetName, columns, rowsCount) }
117+
return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) }
90118
}
91119

120+
/**
121+
* @param sheetName sheet to read. By default, first sheet in the document
122+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
123+
* @param skipRows number of rows before header
124+
* @param rowsCount number of rows to read.
125+
*/
92126
public fun DataFrame.Companion.readExcel(
93127
wb: Workbook,
94128
sheetName: String? = null,
129+
skipRows: Int = 0,
95130
columns: String? = null,
96131
rowsCount: Int? = null
97132
): AnyFrame {
98133
val sheet: Sheet = sheetName
99134
?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") }
100135
?: wb.getSheetAt(0)
101-
return readExcel(sheet, columns, rowsCount)
136+
return readExcel(sheet, columns, skipRows, rowsCount)
102137
}
103138

139+
/**
140+
* @param sheet sheet to read.
141+
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
142+
* @param skipRows number of rows before header
143+
* @param rowsCount number of rows to read.
144+
*/
104145
public fun DataFrame.Companion.readExcel(
105146
sheet: Sheet,
106147
columns: String? = null,
148+
skipRows: Int = 0,
107149
rowsCount: Int? = null
108150
): AnyFrame {
109151
val columnIndexes = if (columns != null) {
@@ -119,16 +161,16 @@ public fun DataFrame.Companion.readExcel(
119161
sheet.getRow(0).map { it.columnIndex }
120162
}
121163

122-
val headerRow = sheet.getRow(0)
123-
val valueRows = sheet.drop(1).let { if (rowsCount != null) it.take(rowsCount) else it }
164+
val headerRow = sheet.getRow(skipRows)
165+
val valueRows = sheet.drop(1 + skipRows).let { if (rowsCount != null) it.take(rowsCount) else it }
124166
val columns = columnIndexes.map { index ->
125167
val headerCell = headerRow.getCell(index)
126168
val name = if (headerCell?.cellType == CellType.NUMERIC) {
127169
headerCell.numericCellValue.toString() // Support numeric-named columns
128170
} else {
129171
headerCell?.stringCellValue ?: CellReference.convertNumToColString(index) // Use Excel column names if no data
130172
}
131-
val values = valueRows.map {
173+
val values: List<Any?> = valueRows.map {
132174
val cell: Cell? = it.getCell(index)
133175
when (cell?.cellType) {
134176
CellType._NONE -> error("Cell ${cell.address} of sheet ${sheet.sheetName} has a CellType that should only be used internally. This is a bug, please report https://github.com/Kotlin/dataframe/issues")

dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class XlsxTest {
3636

3737
@Test
3838
fun `column with empty header`() {
39-
val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", "A:C")
39+
val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", columns = "A:C")
4040
df shouldBe dataFrameOf("col1", "col2", "C")(1.0, null, 3.0)
4141
}
4242

@@ -78,4 +78,10 @@ class XlsxTest {
7878
df.writeExcel(temp)
7979
DataFrame.readExcel(temp) shouldBe df
8080
}
81+
82+
@Test
83+
fun `read header on second row`() {
84+
val df = DataFrame.readExcel(testResource("custom_header_position.xlsx"), skipRows = 1)
85+
df.columnNames() shouldBe listOf("header1", "header2")
86+
}
8187
}
Binary file not shown.

0 commit comments

Comments
 (0)