|
1 | 1 | import io.kotest.assertions.throwables.shouldThrow
|
2 | 2 | import io.kotest.matchers.shouldBe
|
| 3 | +import org.apache.arrow.vector.types.pojo.Schema |
3 | 4 | import org.apache.arrow.vector.util.Text
|
4 | 5 | import org.jetbrains.kotlinx.dataframe.DataColumn
|
5 | 6 | import org.jetbrains.kotlinx.dataframe.DataFrame
|
6 | 7 | import org.jetbrains.kotlinx.dataframe.api.NullabilityOptions
|
7 | 8 | import org.jetbrains.kotlinx.dataframe.api.columnOf
|
8 | 9 | import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
9 | 10 | import org.jetbrains.kotlinx.dataframe.api.toColumn
|
10 |
| -import org.jetbrains.kotlinx.dataframe.io.arrowWriter |
11 |
| -import org.jetbrains.kotlinx.dataframe.io.readArrowFeather |
12 |
| -import org.jetbrains.kotlinx.dataframe.io.readArrowIPC |
| 11 | +import org.jetbrains.kotlinx.dataframe.io.* |
13 | 12 | import org.junit.Test
|
14 | 13 | import java.io.File
|
15 | 14 | import java.net.URL
|
16 | 15 | import java.time.LocalDate
|
| 16 | +import java.time.LocalDateTime |
17 | 17 | import kotlin.reflect.typeOf
|
18 | 18 |
|
19 | 19 | internal class ArrowKtTest {
|
@@ -93,75 +93,38 @@ internal class ArrowKtTest {
|
93 | 93 | assertEstimations(DataFrame.readArrowIPC(testArrowIPC("test-illegal.arrow"), NullabilityOptions.Widening), true, true)
|
94 | 94 | }
|
95 | 95 |
|
96 |
| - val cities = dataFrameOf( |
97 |
| - DataColumn.createValueColumn("name", listOf( |
98 |
| - "Berlin", |
99 |
| - "Hamburg", |
100 |
| - "New York", |
101 |
| - "Washington", |
102 |
| - "Saint Petersburg", |
103 |
| - "Vatican" |
104 |
| - )), |
105 |
| - DataColumn.createValueColumn("affiliation", listOf( |
106 |
| - "Germany", |
107 |
| - "Germany", |
108 |
| - "The USA", |
109 |
| - "The USA", |
110 |
| - "Russia", |
111 |
| - null |
112 |
| - )), |
113 |
| - DataColumn.createValueColumn("is_capital", listOf( |
114 |
| - true, |
115 |
| - false, |
116 |
| - false, |
117 |
| - true, |
118 |
| - false, |
119 |
| - null |
120 |
| - )), |
121 |
| - DataColumn.createValueColumn("population", listOf( |
122 |
| - 3_769_495, |
123 |
| - 1_845_229, |
124 |
| - 8_467_513, |
125 |
| - 689_545, |
126 |
| - 5_377_503, |
127 |
| - 825 |
128 |
| - )), |
129 |
| - DataColumn.createValueColumn("area", listOf( |
130 |
| - 891.7, |
131 |
| - 755.22, |
132 |
| - 1223.59, |
133 |
| - 177.0, |
134 |
| - 1439.0, |
135 |
| - 0.44 |
136 |
| - )), |
137 |
| - DataColumn.createValueColumn("settled", listOf( |
138 |
| - LocalDate.of(1237, 1, 1), |
139 |
| - LocalDate.of(1189, 5, 7), |
140 |
| - LocalDate.of(1624, 1, 1), |
141 |
| - LocalDate.of(1790, 7, 16), |
142 |
| - LocalDate.of(1703, 5, 27), |
143 |
| - LocalDate.of(1929, 2, 11) |
144 |
| - )) |
145 |
| - ) |
146 | 96 |
|
147 | 97 | @Test
|
148 | 98 | fun testWritingGeneral() {
|
149 | 99 | fun assertEstimation(citiesDeserialized: DataFrame<*>) {
|
150 |
| - citiesDeserialized["name"] shouldBe cities["name"] |
151 |
| - citiesDeserialized["affiliation"] shouldBe cities["affiliation"] |
152 |
| - citiesDeserialized["is_capital"] shouldBe cities["is_capital"] |
153 |
| - citiesDeserialized["population"] shouldBe cities["population"] |
154 |
| - citiesDeserialized["area"] shouldBe cities["area"] |
| 100 | + citiesDeserialized["name"] shouldBe citiesExampleFrame["name"] |
| 101 | + citiesDeserialized["affiliation"] shouldBe citiesExampleFrame["affiliation"] |
| 102 | + citiesDeserialized["is_capital"] shouldBe citiesExampleFrame["is_capital"] |
| 103 | + citiesDeserialized["population"] shouldBe citiesExampleFrame["population"] |
| 104 | + citiesDeserialized["area"] shouldBe citiesExampleFrame["area"] |
155 | 105 | citiesDeserialized["settled"].type() shouldBe typeOf<LocalDate>() // cities["settled"].type() refers to FlexibleTypeImpl(LocalDate..LocalDate?) and does not match typeOf<LocalDate>()
|
156 |
| - citiesDeserialized["settled"].values() shouldBe cities["settled"].values() |
| 106 | + citiesDeserialized["settled"].values() shouldBe citiesExampleFrame["settled"].values() |
| 107 | + citiesDeserialized["page_in_wiki"].type() shouldBe typeOf<String>() // cities["page_in_wiki"].type() is URI, not supported by Arrow directly |
| 108 | + citiesDeserialized["page_in_wiki"].values() shouldBe citiesExampleFrame["page_in_wiki"].values().map { it.toString() } |
157 | 109 | }
|
158 | 110 |
|
159 | 111 | val testFile = File.createTempFile("cities", "arrow")
|
160 |
| - cities.arrowWriter().writeArrowFeather(testFile) |
| 112 | + citiesExampleFrame.writeArrowFeather(testFile) |
161 | 113 | assertEstimation(DataFrame.readArrowFeather(testFile))
|
162 | 114 |
|
163 |
| - val testByteArray = cities.arrowWriter().saveArrowIPCToByteArray() |
| 115 | + val testByteArray = citiesExampleFrame.arrowWriter().saveArrowIPCToByteArray() |
164 | 116 | assertEstimation(DataFrame.readArrowIPC(testByteArray))
|
165 | 117 | }
|
166 | 118 |
|
| 119 | + @Test |
| 120 | + fun testWritingBySchema() { |
| 121 | + val testFile = File.createTempFile("cities", "arrow") |
| 122 | + citiesExampleFrame.arrowWriter(Schema.fromJSON(citiesExampleSchema)).writeArrowFeather(testFile) |
| 123 | + val citiesDeserialized = DataFrame.readArrowFeather(testFile, NullabilityOptions.Checking) |
| 124 | + citiesDeserialized["population"].type() shouldBe typeOf<Long?>() |
| 125 | + citiesDeserialized["area"].type() shouldBe typeOf<Float>() |
| 126 | + citiesDeserialized["settled"].type() shouldBe typeOf<LocalDateTime>() |
| 127 | + shouldThrow<IllegalArgumentException> { citiesDeserialized["page_in_wiki"] shouldBe null } |
| 128 | + citiesDeserialized["film_in_youtube"] shouldBe DataColumn.createValueColumn("film_in_youtube", arrayOfNulls<String>(citiesExampleFrame.rowsCount()).asList()) |
| 129 | + } |
167 | 130 | }
|
0 commit comments