Skip to content

Commit 90d5d35

Browse files
authored
Initialize DataFrames from series (chitralverma#92)
* suppress linting for java generics * introduce dataframes from series * fmt fix * add doc strings * fix scopes for LazyFrame and Series
1 parent 347952b commit 90d5d35

File tree

8 files changed

+218
-11
lines changed

8 files changed

+218
-11
lines changed

core/src/main/scala/org/polars/scala/polars/api/DataFrame.scala

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.polars.scala.polars.api
22

3+
import java.util
34
import java.util.Collections
45

56
import scala.annotation.varargs
@@ -170,4 +171,69 @@ class DataFrame private (private[polars] val ptr: Long) {
170171
object DataFrame {
171172

172173
private[polars] def withPtr(ptr: Long) = new DataFrame(ptr)
174+
175+
/** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
176+
* [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
177+
* values are the values of this column.
178+
*
179+
* @param series
180+
* Series
181+
* @param more
182+
* Series as a scala or java array
183+
*
184+
* @return
185+
* [[org.polars.scala.polars.api.DataFrame]] formed from the provided
186+
* [[org.polars.scala.polars.api.Series]]
187+
*/
188+
def fromSeries(series: Series, more: Array[Series]): DataFrame =
189+
DataFrame.withPtr(data_frame.fromSeries(more.+:(series).map(_.ptr)))
190+
191+
/** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
192+
* [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
193+
* values are the values of this column.
194+
*
195+
* @param series
196+
* Series
197+
* @param more
198+
* Series as a scala iterable
199+
*
200+
* @return
201+
* [[org.polars.scala.polars.api.DataFrame]] formed from the provided
202+
* [[org.polars.scala.polars.api.Series]]
203+
*/
204+
def fromSeries(series: Series, more: Iterable[Series]): DataFrame =
205+
fromSeries(series, more.toArray)
206+
207+
/** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
208+
* [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
209+
* values are the values of this column.
210+
*
211+
* @param series
212+
* Series
213+
* @param more
214+
* Series as a java iterable
215+
*
216+
* @return
217+
* [[org.polars.scala.polars.api.DataFrame]] formed from the provided
218+
* [[org.polars.scala.polars.api.Series]]
219+
*/
220+
def fromSeries(series: Series, more: java.lang.Iterable[Series]): DataFrame =
221+
fromSeries(series, more.asScala)
222+
223+
/** Initialize new [[org.polars.scala.polars.api.DataFrame]] from one or more
224+
* [[org.polars.scala.polars.api.Series]]. The name of a series is used as column name and its
225+
* values are the values of this column.
226+
*
227+
* @param series
228+
* Series
229+
* @param more
230+
* Series as scala varargs
231+
*
232+
* @return
233+
* [[org.polars.scala.polars.api.DataFrame]] formed from the provided
234+
* [[org.polars.scala.polars.api.Series]]
235+
*/
236+
def fromSeries(series: Series, more: Series*): DataFrame =
237+
fromSeries(series, more)
238+
173239
}

core/src/main/scala/org/polars/scala/polars/api/JSeries.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
class JSeries {
1717
final static String EmptyString = "";
1818

19+
@SuppressWarnings({ "unchecked", "rawtypes" })
1920
static Series ofList(String name, Iterable<Iterable> values) {
2021
Iterator<Iterable> valuesIter = values.iterator();
2122
List<Series> sList = new ArrayList<>();

core/src/main/scala/org/polars/scala/polars/api/LazyFrame.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,5 +262,5 @@ class LazyFrame private (private[polars] val ptr: Long) {
262262

263263
object LazyFrame {
264264

265-
def withPtr(ptr: Long) = new LazyFrame(ptr)
265+
private[polars] def withPtr(ptr: Long) = new LazyFrame(ptr)
266266
}

core/src/main/scala/org/polars/scala/polars/api/Series.scala

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -469,45 +469,45 @@ object Series {
469469
* @param name
470470
* Name of Series
471471
* @param values
472-
* Values of Series as a scala or java array
472+
* Values of Series as a java iterable
473473
*
474474
* @return
475475
* Nested Series of provided type. If `values` is empty, empty series is returned retaining
476476
* type. Nested collections in `values` must not be empty or this will result in
477477
* [[java.lang.ArrayIndexOutOfBoundsException]].
478478
*/
479-
def ofList[T](name: String, values: Array[Array[T]]): Series =
480-
Series.ofList(name, values.map(_.toSeq).toSeq)
479+
def ofList(name: String, values: java.lang.Iterable[java.lang.Iterable[_]]): Series =
480+
JSeries.ofList(name, values)
481481

482482
/** Initialize new nested series by name and values of provided type.
483483
*
484484
* @param name
485485
* Name of Series
486486
* @param values
487-
* Values of Series as a java iterable
487+
* Values of Series as a scala iterable
488488
*
489489
* @return
490490
* Nested Series of provided type. If `values` is empty, empty series is returned retaining
491491
* type. Nested collections in `values` must not be empty or this will result in
492492
* [[java.lang.ArrayIndexOutOfBoundsException]].
493493
*/
494-
def ofList(name: String, values: java.lang.Iterable[java.lang.Iterable[_]]): Series =
495-
JSeries.ofList(name, values)
494+
def ofList(name: String, values: Iterable[Iterable[_]]): Series =
495+
Series.ofList(name, values.map(_.asJava.asInstanceOf[java.lang.Iterable[_]]).asJava)
496496

497497
/** Initialize new nested series by name and values of provided type.
498498
*
499499
* @param name
500500
* Name of Series
501501
* @param values
502-
* Values of Series as a scala iterable
502+
* Values of Series as a scala or java array
503503
*
504504
* @return
505505
* Nested Series of provided type. If `values` is empty, empty series is returned retaining
506506
* type. Nested collections in `values` must not be empty or this will result in
507507
* [[java.lang.ArrayIndexOutOfBoundsException]].
508508
*/
509-
def ofList(name: String, values: Iterable[Iterable[_]]): Series =
510-
Series.ofList(name, values.map(_.asJava.asInstanceOf[java.lang.Iterable[_]]).asJava)
509+
def ofList[T](name: String, values: Array[Array[T]]): Series =
510+
Series.ofList(name, values.map(_.toSeq).toSeq)
511511

512-
def withPtr(ptr: Long) = new Series(ptr)
512+
private[polars] def withPtr(ptr: Long) = new Series(ptr)
513513
}

core/src/main/scala/org/polars/scala/polars/internal/jni/data_frame.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ private[polars] object data_frame extends Natively {
1616

1717
@native def tail(ptr: Long, n: Long): Long
1818

19+
@native def fromSeries(ptrs: Array[Long]): Long
20+
1921
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package examples.java;
2+
3+
import java.util.Arrays;
4+
import org.polars.scala.polars.api.DataFrame;
5+
import org.polars.scala.polars.api.Series;
6+
7+
public class InstantiateDataFrame {
8+
public static void main(String[] args) {
9+
10+
/* Values as Java array(s) */
11+
12+
DataFrame.fromSeries(
13+
Series.ofInt("i32_col", new int[] {1, 2, 3}),
14+
new Series[] {
15+
Series.ofLong("i64_col", new long[] {1L, 2L, 3L}),
16+
Series.ofBoolean("bool_col", new boolean[] {true, false, true}),
17+
Series.ofList(
18+
"nested_str_col",
19+
new String[][] {{"a", "b", "c"}, {"a", "b", "c"}, {"a", "b", "c"}})
20+
})
21+
.show();
22+
23+
DataFrame.fromSeries(
24+
Series.ofInt("i32_col", new Integer[] {1, 2, 3}),
25+
new Series[] {
26+
Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
27+
Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
28+
Series.ofFloat("f32_col", new Float[] {1F, 2F, 3F})
29+
})
30+
.show();
31+
32+
/* Values as Java lists(s) */
33+
34+
DataFrame.fromSeries(
35+
Series.ofInt("i32_col", Arrays.asList(1, 2, 3)),
36+
new Series[] {
37+
Series.ofLong("i64_col", Arrays.asList(1L, 2L, 3L)),
38+
Series.ofBoolean("bool_col", Arrays.asList(true, false, true)),
39+
Series.ofFloat("f32_col", Arrays.asList(1F, 2F, 3F))
40+
})
41+
.show();
42+
43+
/* Values as a mix of Java lists(s) and array(s) */
44+
45+
DataFrame.fromSeries(
46+
Series.ofInt("i32_col", Arrays.asList(1, 2, 3)),
47+
new Series[] {
48+
Series.ofLong("i64_col", new Long[] {1L, 2L, 3L}),
49+
Series.ofBoolean("bool_col", new Boolean[] {true, false, true}),
50+
Series.ofFloat("f32_col", Arrays.asList(1F, 2F, 3F))
51+
})
52+
.show();
53+
}
54+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package examples.scala
2+
3+
import org.polars.scala.polars.api.{DataFrame, Series}
4+
5+
object InstantiateDataFrame {
6+
7+
def main(args: Array[String]): Unit = {
8+
DataFrame.fromSeries(Series.ofBoolean("bool_col", Array[Boolean](true, false, true))).show()
9+
10+
DataFrame
11+
.fromSeries(
12+
Series.ofInt("i32_col", Array[Int](1, 2, 3)),
13+
Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
14+
Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
15+
Series.ofList(
16+
"nested_str_col",
17+
Array[Array[String]](Array("a", "b", "c"), Array("a", "b", "c"), Array("a", "b", "c"))
18+
)
19+
)
20+
.show()
21+
22+
/* Values as Scala array(s) */
23+
DataFrame
24+
.fromSeries(
25+
Series.ofInt("i32_col", Array[Int](1, 2, 3)),
26+
Array[Series](
27+
Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
28+
Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
29+
Series.ofList(
30+
"nested_str_col",
31+
Array[Array[String]](Array("a", "b", "c"), Array("a", "b", "c"), Array("a", "b", "c"))
32+
)
33+
)
34+
)
35+
.show()
36+
37+
/* Values as scala lists(s) */
38+
39+
DataFrame
40+
.fromSeries(
41+
Series.ofInt("i32_col", Seq(1, 2, 3)),
42+
Array[Series](
43+
Series.ofLong("i64_col", Seq(1L, 2L, 3L)),
44+
Series.ofBoolean("bool_col", Seq(true, false, true)),
45+
Series.ofFloat("f32_col", Seq(1f, 2f, 3f))
46+
)
47+
)
48+
.show()
49+
50+
/* Values as a mix of Scala lists(s) and array(s) */
51+
52+
DataFrame
53+
.fromSeries(
54+
Series.ofInt("i32_col", Seq(1, 2, 3)),
55+
Array[Series](
56+
Series.ofLong("i64_col", Array[Long](1L, 2L, 3L)),
57+
Series.ofBoolean("bool_col", Array[Boolean](true, false, true)),
58+
Series.ofFloat("f32_col", Seq(1f, 2f, 3f))
59+
)
60+
)
61+
.show()
62+
}
63+
64+
}

native/src/internal_jni/frame.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use polars_core::utils::concat_df;
1111

1212
use crate::internal_jni::utils::*;
1313
use crate::j_data_frame::JDataFrame;
14+
use crate::j_series::JSeries;
1415

1516
#[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
1617
pub fn schemaString(mut _env: JNIEnv, _object: JObject, ldf_ptr: jlong) -> jstring {
@@ -76,3 +77,22 @@ pub fn tail(mut env: JNIEnv, object: JObject, ptr: jlong, n: jlong) -> jlong {
7677

7778
j_df.tail(&mut env, object, n as usize)
7879
}
80+
81+
#[jni_fn("org.polars.scala.polars.internal.jni.data_frame$")]
82+
pub fn fromSeries(mut env: JNIEnv, callback_obj: JObject, ptrs: JLongArray) -> jlong {
83+
let arr = unsafe { env.get_array_elements(&ptrs, NoCopyBack).unwrap() };
84+
let data: Vec<Series> = unsafe {
85+
std::slice::from_raw_parts(arr.as_ptr(), arr.len())
86+
.to_vec()
87+
.iter()
88+
.map(|p| p.to_i64().unwrap())
89+
.map(|ptr| {
90+
let j_series = &mut *(ptr as *mut JSeries);
91+
j_series.to_owned().series
92+
})
93+
.collect()
94+
};
95+
96+
let df = DataFrame::new(data);
97+
df_to_ptr(&mut env, callback_obj, df)
98+
}

0 commit comments

Comments
 (0)