Skip to content

Commit 6e5badc

Browse files
authored
Update dependencies and support more data types (chitralverma#105)
* Update dependencies * fix schema issues * update cargo.lock
1 parent 4cf4665 commit 6e5badc

File tree

16 files changed

+532
-346
lines changed

16 files changed

+532
-346
lines changed

.scalafmt.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version = 3.8.0
1+
version = 3.8.1
22
runner.dialect = scala213
33
project.git = true
44
maxColumn = 98

core/src/main/scala/org/polars/scala/polars/api/DataFrame.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.polars.scala.polars.api
22

3-
import java.util
43
import java.util.Collections
54

65
import scala.annotation.varargs

core/src/main/scala/org/polars/scala/polars/api/types/DataTypes.scala

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import java.time.ZoneId
44
import java.util.Locale
55
import java.util.concurrent.TimeUnit
66

7+
import scala.util.Try
78
import scala.util.matching.Regex
89

910
trait DataType {
@@ -31,10 +32,72 @@ case object DoubleType extends BasicDataType
3132

3233
case object DateType extends BasicDataType
3334

34-
// todo: validate the timeunit and timezone and re-enable this later
35-
//case class DateTimeType(precision: TimeUnit, timezone: ZoneId) extends DataType
35+
case class TimeType(protected val unitStr: String) extends DataType {
36+
val timeUnit: Option[TimeUnit] =
37+
unitStr match {
38+
case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
39+
case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
40+
case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
41+
case _ => None
42+
}
43+
44+
override def simpleName: String = timeUnit match {
45+
case Some(TimeUnit.NANOSECONDS) => "time[ns]"
46+
case Some(TimeUnit.MICROSECONDS) => "time[us]"
47+
case Some(TimeUnit.MILLISECONDS) => "time[ms]"
48+
case _ => "time"
49+
}
50+
}
51+
52+
case class DateTimeType(protected val unitStr: String, protected val tzStr: String)
53+
extends DataType {
54+
val timeUnit: Option[TimeUnit] =
55+
unitStr match {
56+
case null => None
57+
case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
58+
case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
59+
case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
60+
case _ => None
61+
}
62+
63+
val timeZone: Option[ZoneId] = Try(ZoneId.of(tzStr)).toOption
64+
65+
override def simpleName: String = {
66+
val tu = timeUnit match {
67+
case Some(TimeUnit.NANOSECONDS) => "ns"
68+
case Some(TimeUnit.MICROSECONDS) => "us"
69+
case Some(TimeUnit.MILLISECONDS) => "ms"
70+
case _ => null
71+
}
72+
73+
val tz = timeZone.orNull
3674

37-
case object DateTimeType extends BasicDataType
75+
(tu, tz) match {
76+
case (null, null) => "datetime"
77+
case (tu, null) => s"datetime[$tu]"
78+
case (null, tz) => s"datetime[$tz]"
79+
case (tu, tz) => s"datetime[$tu, $tz]"
80+
}
81+
82+
}
83+
}
84+
85+
case class Duration(protected val unitStr: String) extends DataType {
86+
val timeUnit: Option[TimeUnit] =
87+
unitStr match {
88+
case s if s.toLowerCase(Locale.ROOT).contains("nano") => Some(TimeUnit.NANOSECONDS)
89+
case s if s.toLowerCase(Locale.ROOT).contains("micro") => Some(TimeUnit.MICROSECONDS)
90+
case s if s.toLowerCase(Locale.ROOT).contains("milli") => Some(TimeUnit.MILLISECONDS)
91+
case _ => None
92+
}
93+
94+
override def simpleName: String = timeUnit match {
95+
case Some(TimeUnit.NANOSECONDS) => "duration[ns]"
96+
case Some(TimeUnit.MICROSECONDS) => "duration[us]"
97+
case Some(TimeUnit.MILLISECONDS) => "duration[ms]"
98+
case _ => "duration"
99+
}
100+
}
38101

39102
case class ListType(tpe: DataType) extends DataType {
40103
override def simpleName: String = "list"
@@ -63,7 +126,7 @@ object DataType {
63126
private[polars] final val LongRegex: Regex = """^(?i)Int64|UInt64$""".r
64127
private[polars] final val FloatRegex: Regex = """^(?i)Float32$""".r
65128
private[polars] final val DoubleRegex: Regex = """^(?i)Float64$""".r
66-
private[polars] final val DateRegex: Regex = """^(?i)Date$""".r
129+
private[polars] final val DateRegex: Regex = """^(?i)Date|Date32|Date64$""".r
67130

68131
def fromBasicType(typeStr: String): DataType = typeStr match {
69132
case StringRegex() => StringType

core/src/main/scala/org/polars/scala/polars/api/types/Schema.scala

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
package org.polars.scala.polars.api.types
22

3-
import java.time.ZoneId
4-
import java.util.Locale
5-
import java.util.concurrent.TimeUnit
6-
73
import scala.jdk.CollectionConverters._
84
import scala.util.Try
95

@@ -52,40 +48,54 @@ class Schema private (private[polars] val json: String) {
5248
case (name, node, _ @JsonNodeType.STRING) =>
5349
Field(name, DataType.fromBasicType(node.textValue()))
5450

51+
// For Time Type
52+
case (name, node, _ @JsonNodeType.OBJECT)
53+
if node.hasNonNull("Time") || node.hasNonNull("Time32") || node.hasNonNull("Time64") =>
54+
Seq(node.get("Time"), node.get("Time32"), node.get("Time64"))
55+
.map(Option(_))
56+
.collectFirst { case Some(v) => v } match {
57+
case Some(timeUnit) => Field(name, TimeType(timeUnit.textValue()))
58+
59+
case None =>
60+
throw new IllegalArgumentException("Invalid time cannot be parsed.")
61+
}
62+
63+
// For Duration Type
64+
case (name, node, _ @JsonNodeType.OBJECT) if node.hasNonNull("Duration") =>
65+
val timeUnit = node.get("Duration")
66+
Field(name, Duration(timeUnit.textValue()))
67+
5568
// For DateTime Type
56-
case (name, node, _ @JsonNodeType.OBJECT) if node.has("Datetime") =>
57-
// todo: validate the timeunit and timezone and re-enable this later
58-
59-
// val dtNode = node.get("Datetime")
60-
//
61-
// val (tu, tz) = dtNode.iterator().asScala.map(_.textValue()).toSeq match {
62-
// case Seq(null, null) =>
63-
// (TimeUnit.MICROSECONDS, ZoneId.of("UTC"))
64-
// case Seq(null, tz) if tz.nonEmpty =>
65-
// (TimeUnit.MICROSECONDS, ZoneId.of(tz))
66-
// case Seq(tu, null) if tu.nonEmpty =>
67-
// (TimeUnit.valueOf(tu.toUpperCase(Locale.ROOT)), ZoneId.of("UTC"))
68-
// case Seq(tu, tz) if tu.nonEmpty && tz.nonEmpty =>
69-
// (TimeUnit.valueOf(tu.toUpperCase(Locale.ROOT)), ZoneId.of(tz))
70-
// case _ =>
71-
// (TimeUnit.MICROSECONDS, ZoneId.of("UTC"))
72-
// }
73-
74-
Field(name, DateTimeType)
69+
case (name, node, _ @JsonNodeType.OBJECT) if node.hasNonNull("Timestamp") =>
70+
node.get("Timestamp").elements().asScala.map(_.asText(null)).toSeq match {
71+
case Seq(tu, tz) =>
72+
Field(name, DateTimeType(tu, tz))
73+
case _ =>
74+
Field(name, DateTimeType(null, null))
75+
}
7576

7677
// For (Nested) List Type
77-
case (name, node, _ @JsonNodeType.OBJECT) if node.has("List") =>
78-
val listNode = node.get("List")
79-
Field(name, ListType(toField((name, listNode, listNode.getNodeType)).dataType))
78+
case (name, node, _ @JsonNodeType.OBJECT)
79+
if node.hasNonNull("List") || node.hasNonNull("LargeList") =>
80+
Seq(node.get("List"), node.get("LargeList"))
81+
.map(Option(_))
82+
.collectFirst { case Some(v) => v } match {
83+
case Some(listNode) =>
84+
val listNodeType = listNode.get("data_type")
85+
Field(name, ListType(toField((name, listNodeType, listNodeType.getNodeType)).dataType))
86+
87+
case None =>
88+
throw new IllegalArgumentException("Invalid list cannot be parsed as a JSON.")
89+
}
8090

8191
// For (Nested) Struct Type
8292
case (name, node, _ @JsonNodeType.OBJECT) if node.has("Struct") =>
8393
val structNode = node.get("Struct")
8494
val structFields = structNode.iterator().asScala
8595
val sf = structFields.map {
86-
case node: JsonNode if node.fieldNames().asScala.toSet == Set("name", "dtype") =>
96+
case node: JsonNode if node.hasNonNull("name") && node.hasNonNull("data_type") =>
8797
val structFieldName: String = node.get("name").textValue()
88-
val structFieldType: JsonNode = node.get("dtype")
98+
val structFieldType: JsonNode = node.get("data_type")
8999

90100
Field(
91101
structFieldName,
@@ -106,10 +116,14 @@ class Schema private (private[polars] val json: String) {
106116
case None =>
107117
throw new IllegalArgumentException("Provided schema string cannot be parsed as a JSON.")
108118

109-
case Some(node: JsonNode) if node.has("inner") =>
110-
val fields = node.get("inner").fields().asScala
111-
_fields = fields.map(f => toField(f.getKey, f.getValue, f.getValue.getNodeType)).toArray
112-
_fieldNames = node.fieldNames().asScala.toArray
119+
case Some(node: JsonNode) if node.hasNonNull("fields") =>
120+
val fields = node.get("fields").elements().asScala.toList
121+
_fields = fields
122+
.map(f =>
123+
toField(f.get("name").textValue(), f.get("data_type"), f.get("data_type").getNodeType)
124+
)
125+
.toArray
126+
_fieldNames = fields.map(f => f.get("name").toString).toArray
113127

114128
case _ =>
115129
throw new IllegalArgumentException("Provided schema string is an invalid JSON.")
10.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)