@@ -53,12 +53,12 @@ public fun DataFrame.Companion.readDelimStr(
53
53
colTypes : Map <String , ColType > = mapOf(),
54
54
skipLines : Int = 0,
55
55
readLines : Int? = null
56
- ): DataFrame <* > = readDelim( StringReader (text), CSVType .DEFAULT .format.withHeader(), colTypes, skipLines, readLines)
56
+ ): DataFrame <* > = StringReader (text).use { readDelim(it , CSVType .DEFAULT .format.withHeader(), colTypes, skipLines, readLines) }
57
57
58
58
public fun DataFrame.Companion.read (
59
59
fileOrUrl : String ,
60
60
delimiter : Char ,
61
- headers : List <String > = listOf(),
61
+ header : List <String > = listOf(),
62
62
colTypes : Map <String , ColType > = mapOf(),
63
63
skipLines : Int = 0,
64
64
readLines : Int? = null,
@@ -68,7 +68,7 @@ public fun DataFrame.Companion.read(
68
68
catchHttpResponse(asURL(fileOrUrl)) {
69
69
readDelim(
70
70
it, delimiter,
71
- headers , isCompressed(fileOrUrl),
71
+ header , isCompressed(fileOrUrl),
72
72
getCSVType(fileOrUrl), colTypes,
73
73
skipLines, readLines,
74
74
duplicate, charset
@@ -78,7 +78,7 @@ public fun DataFrame.Companion.read(
78
78
public fun DataFrame.Companion.readCSV (
79
79
fileOrUrl : String ,
80
80
delimiter : Char = ',',
81
- headers : List <String > = listOf(),
81
+ header : List <String > = listOf(),
82
82
colTypes : Map <String , ColType > = mapOf(),
83
83
skipLines : Int = 0,
84
84
readLines : Int? = null,
@@ -89,7 +89,7 @@ public fun DataFrame.Companion.readCSV(
89
89
catchHttpResponse(asURL(fileOrUrl)) {
90
90
readDelim(
91
91
it, delimiter,
92
- headers , isCompressed(fileOrUrl),
92
+ header , isCompressed(fileOrUrl),
93
93
CSVType .DEFAULT , colTypes,
94
94
skipLines, readLines,
95
95
duplicate, charset,
@@ -100,7 +100,7 @@ public fun DataFrame.Companion.readCSV(
100
100
public fun DataFrame.Companion.readCSV (
101
101
file : File ,
102
102
delimiter : Char = ',',
103
- headers : List <String > = listOf(),
103
+ header : List <String > = listOf(),
104
104
colTypes : Map <String , ColType > = mapOf(),
105
105
skipLines : Int = 0,
106
106
readLines : Int? = null,
@@ -110,7 +110,7 @@ public fun DataFrame.Companion.readCSV(
110
110
): DataFrame <* > =
111
111
readDelim(
112
112
FileInputStream (file), delimiter,
113
- headers , isCompressed(file),
113
+ header , isCompressed(file),
114
114
CSVType .DEFAULT , colTypes,
115
115
skipLines, readLines,
116
116
duplicate, charset,
@@ -120,23 +120,43 @@ public fun DataFrame.Companion.readCSV(
120
120
public fun DataFrame.Companion.readCSV (
121
121
url : URL ,
122
122
delimiter : Char = ',',
123
- headers : List <String > = listOf(),
123
+ header : List <String > = listOf(),
124
124
colTypes : Map <String , ColType > = mapOf(),
125
125
skipLines : Int = 0,
126
126
readLines : Int? = null,
127
127
duplicate : Boolean = true,
128
128
charset : Charset = Charsets .UTF_8 ,
129
129
parserOptions : ParserOptions ? = null
130
130
): DataFrame <* > =
131
- readDelim (
131
+ readCSV (
132
132
url.openStream(), delimiter,
133
- headers , isCompressed(url),
134
- CSVType . DEFAULT , colTypes,
133
+ header , isCompressed(url),
134
+ colTypes,
135
135
skipLines, readLines,
136
136
duplicate, charset,
137
137
parserOptions
138
138
)
139
139
140
+ public fun DataFrame.Companion.readCSV (
141
+ stream : InputStream ,
142
+ delimiter : Char = ',',
143
+ header : List <String > = listOf(),
144
+ isCompressed : Boolean = false,
145
+ colTypes : Map <String , ColType > = mapOf(),
146
+ skipLines : Int = 0,
147
+ readLines : Int? = null,
148
+ duplicate : Boolean = true,
149
+ charset : Charset = Charsets .UTF_8 ,
150
+ parserOptions : ParserOptions ? = null
151
+ ): DataFrame <* > = readDelim(
152
+ stream, delimiter,
153
+ header, isCompressed,
154
+ CSVType .DEFAULT , colTypes,
155
+ skipLines, readLines,
156
+ duplicate, charset,
157
+ parserOptions
158
+ )
159
+
140
160
private fun getCSVType (path : String ): CSVType =
141
161
when (path.substringAfterLast(' .' ).toLowerCase()) {
142
162
" csv" -> CSVType .DEFAULT
@@ -160,13 +180,13 @@ internal fun asURL(fileOrUrl: String): URL = (
160
180
}
161
181
).toURL()
162
182
163
- private fun getFormat (type : CSVType , delimiter : Char , headers : List <String >, duplicate : Boolean ): CSVFormat =
164
- type.format.withDelimiter(delimiter).withHeader(* headers .toTypedArray()).withAllowDuplicateHeaderNames(duplicate)
183
+ private fun getFormat (type : CSVType , delimiter : Char , header : List <String >, duplicate : Boolean ): CSVFormat =
184
+ type.format.withDelimiter(delimiter).withHeader(* header .toTypedArray()).withAllowDuplicateHeaderNames(duplicate)
165
185
166
186
public fun DataFrame.Companion.readDelim (
167
187
inStream : InputStream ,
168
188
delimiter : Char = ',',
169
- headers : List <String > = listOf(),
189
+ header : List <String > = listOf(),
170
190
isCompressed : Boolean = false,
171
191
csvType : CSVType ,
172
192
colTypes : Map <String , ColType > = mapOf(),
@@ -181,7 +201,14 @@ public fun DataFrame.Companion.readDelim(
181
201
} else {
182
202
BufferedReader (InputStreamReader (inStream, charset))
183
203
}.run {
184
- readDelim(this , getFormat(csvType, delimiter, headers, duplicate), colTypes, skipLines, readLines, parserOptions)
204
+ readDelim(
205
+ this ,
206
+ getFormat(csvType, delimiter, header, duplicate),
207
+ colTypes,
208
+ skipLines,
209
+ readLines,
210
+ parserOptions
211
+ )
185
212
}
186
213
187
214
public enum class ColType {
@@ -222,47 +249,46 @@ public fun DataFrame.Companion.readDelim(
222
249
repeat(skipLines) { reader.readLine() }
223
250
}
224
251
225
- format.parse(reader).use { csvParser ->
226
- val records = if (readLines == null ) {
227
- csvParser.records
228
- } else {
229
- require(readLines >= 0 ) { " `readLines` must not be negative" }
230
- val records = ArrayList <CSVRecord >(readLines)
231
- val iter = csvParser.iterator()
232
- var count = readLines ? : 0
233
- while (iter.hasNext() && 0 < count-- ) {
234
- records.add(iter.next())
235
- }
236
- records
252
+ val csvParser = format.parse(reader)
253
+ val records = if (readLines == null ) {
254
+ csvParser.records
255
+ } else {
256
+ require(readLines >= 0 ) { " `readLines` must not be negative" }
257
+ val records = ArrayList <CSVRecord >(readLines)
258
+ val iter = csvParser.iterator()
259
+ var count = readLines ? : 0
260
+ while (iter.hasNext() && 0 < count-- ) {
261
+ records.add(iter.next())
237
262
}
263
+ records
264
+ }
238
265
239
- val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
240
- ? : (1 .. records[0 ].count()).map { index -> " X$index " }
266
+ val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
267
+ ? : (1 .. records[0 ].count()).map { index -> " X$index " }
241
268
242
- val generator = ColumnNameGenerator ()
243
- val uniqueNames = columnNames.map { generator.addUnique(it) }
269
+ val generator = ColumnNameGenerator ()
270
+ val uniqueNames = columnNames.map { generator.addUnique(it) }
244
271
245
- val cols = uniqueNames.mapIndexed { colIndex, colName ->
246
- val defaultColType = colTypes[" .default" ]
247
- val colType = colTypes[colName] ? : defaultColType
248
- var hasNulls = false
249
- val values = records.map {
250
- it[colIndex].ifEmpty {
251
- hasNulls = true
252
- null
253
- }
272
+ val cols = uniqueNames.mapIndexed { colIndex, colName ->
273
+ val defaultColType = colTypes[" .default" ]
274
+ val colType = colTypes[colName] ? : defaultColType
275
+ var hasNulls = false
276
+ val values = records.map {
277
+ it[colIndex].ifEmpty {
278
+ hasNulls = true
279
+ null
254
280
}
255
- val column = DataColumn .createValueColumn(colName, values, typeOf< String >().withNullability(hasNulls))
256
- when (colType) {
257
- null -> column.tryParse(parserOptions)
258
- else -> {
259
- val parser = Parsers [colType.toType()] !!
260
- column.parse(parser, parserOptions)
261
- }
281
+ }
282
+ val column = DataColumn .createValueColumn(colName, values, typeOf< String >().withNullability(hasNulls))
283
+ when (colType) {
284
+ null -> column.tryParse(parserOptions)
285
+ else -> {
286
+ val parser = Parsers [colType.toType()] !!
287
+ column.parse(parser, parserOptions)
262
288
}
263
289
}
264
- return cols.toDataFrame()
265
290
}
291
+ return cols.toDataFrame()
266
292
}
267
293
268
294
public fun AnyFrame.writeCSV (file : File , format : CSVFormat = CSVFormat .DEFAULT .withHeader()): Unit =
0 commit comments