diff --git a/build.sbt b/build.sbt index 9948954..be0709d 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "spark-json-schema" -version in ThisBuild := "0.6.3" +version in ThisBuild := "0.6.4" organization := "org.zalando" scalaVersion := "2.12.10" diff --git a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala index 909d42e..cba925e 100644 --- a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala +++ b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala @@ -20,7 +20,7 @@ import scala.io.Source * given in the dataset. * */ -case class SchemaType(typeName: String, nullable: Boolean) +case class SchemaType(typeName: String, nullable: Boolean, precision: Option[Int] = None, range: Option[Int] = None) private case class NullableDataType(dataType: DataType, nullable: Boolean) object SchemaConverter { @@ -33,15 +33,25 @@ object SchemaConverter { val SchemaRoot = "/" val Definitions = "definitions" val Reference = "$ref" + val Decimal = "decimal" + val Precision = "precision" + val Range = "range" val TypeMap = Map( "string" -> StringType, "number" -> DoubleType, "float" -> FloatType, "integer" -> LongType, "boolean" -> BooleanType, + "decimal" -> DecimalType, + "timestamp" -> DataTypes.TimestampType, "object" -> StructType, "array" -> ArrayType ) + object DecimalNames { + val Decimal = "decimal" + val Precision = "precision" + val Range = "range" + } var definitions: JsObject = JsObject(Seq.empty) private var isStrictTypingEnabled: Boolean = true @@ -85,21 +95,35 @@ object SchemaConverter { def getJsonId(json: JsValue): Option[String] = (json \ SchemaFieldId).asOpt[String] + def getDecimal(json: JsValue, nullable: Boolean): SchemaType = { + ((json \ DecimalNames.Precision).toOption, (json \ DecimalNames.Range).toOption) match { + case (Some(prec), Some(range)) => + SchemaType(DecimalNames.Decimal, nullable, Some(prec.as[Int]), Some(range.as[Int])) + case (None, None) => SchemaType(DecimalNames.Decimal, nullable) + case _ => throw new IllegalArgumentException("decimal type needs either both precision and range or none of them") + } + } + + def getSimpleType(json: JsValue, typeName: String, nullable: Boolean): SchemaType = { + if (typeName == DecimalNames.Decimal) getDecimal(json, nullable) + else SchemaType(typeName, nullable) + } + def getJsonType(json: JsObject, name: String): SchemaType = { val id = getJsonId(json).getOrElse(name) (json \ SchemaFieldType).getOrElse(JsNull) match { - case JsString(s) => SchemaType(s, nullable = false) + case JsString(s) => getSimpleType(json, s, nullable = false) case JsArray(array) => val nullable = array.contains(JsString("null")) array.size match { case 1 if nullable => throw new IllegalArgumentException("Null type only is not supported") case 1 => - SchemaType(array.apply(0).as[String], nullable = nullable) + getSimpleType(json, array.apply(0).as[String], nullable = nullable) case 2 if nullable => array.find(_ != JsString("null")) - .map(i => SchemaType(i.as[String], nullable = nullable)) + .map(i => getSimpleType(json, i.as[String], nullable = nullable)) .getOrElse { throw new IllegalArgumentException( s"Incorrect definition of a nullable parameter at <$id>" @@ -178,8 +202,17 @@ object SchemaConverter { private def getFieldType(json: JsObject, name: String): NullableDataType = { val fieldType = getJsonType(json, name) + assert( + TypeMap.keySet.contains(fieldType.typeName), + s"Unknown field type {${fieldType.typeName}}, possible values are: ${TypeMap.keySet}" + ) TypeMap(fieldType.typeName) match { + case DecimalType => (fieldType.precision, fieldType.range) match { + case (Some(prec), Some(range)) => NullableDataType(DataTypes.createDecimalType(prec, range), fieldType.nullable) + case _ => NullableDataType(DataTypes.createDecimalType(), fieldType.nullable) + } + case dataType: DataType => NullableDataType(dataType, fieldType.nullable) diff --git a/src/test/resources/testJsonSchema.json b/src/test/resources/testJsonSchema.json index b48c51a..d64582b 100644 --- a/src/test/resources/testJsonSchema.json +++ b/src/test/resources/testJsonSchema.json @@ -153,6 +153,38 @@ "description": "to do", "name": "boolean" }, + "decimal": { + "id": "testSchema/decimal", + "type": "decimal", + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal", + "precision": 38, + "range": 18 + }, + "decimal_default": { + "id": "testSchema/decimal_default", + "type": "decimal", + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal_default" + }, + "decimal_nullable": { + "id": "testSchema/decimal_nullable", + "type": ["decimal", "null"], + "title": "Test decimal schema.", + "description": "to do", + "name": "decimal_nullable", + "precision": 38, + "range": 18 + }, + "timetamp": { + "id": "testSchema/timestamp", + "type": "timestamp", + "title": "Test timestamp schema.", + "description": "to do", + "name": "timestamp" + }, "additionalProperty": { "id": "testSchema/additionalProperty", "type": "string", diff --git a/src/test/resources/testJsonSchema3.json b/src/test/resources/testJsonSchema3.json index b434a9a..003f36b 100644 --- a/src/test/resources/testJsonSchema3.json +++ b/src/test/resources/testJsonSchema3.json @@ -77,6 +77,22 @@ "boolean": { "type": "boolean" }, + "decimal": { + "type": "decimal", + "precision": 38, + "range": 18 + }, + "decimal_default": { + "type": "decimal" + }, + "decimal_nullable": { + "type": ["decimal", "null"], + "precision": 38, + "range": 18 + }, + "timestamp": { + "type": "timestamp" + }, "additionalProperty": { "type": "string" } diff --git a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala index 418ad15..701d6d9 100644 --- a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala +++ b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala @@ -30,6 +30,10 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { StructField("float", FloatType, nullable = false), StructField("nullable", DoubleType, nullable = true), StructField("boolean", BooleanType, nullable = false), + StructField("decimal", DecimalType(38, 18), nullable = false), + StructField("decimal_default", DecimalType(10, 0), nullable = false), + StructField("decimal_nullable", DecimalType(38, 18), nullable = true), + StructField("timestamp", DataTypes.TimestampType, nullable = false), StructField("additionalProperty", StringType, nullable = false) )) @@ -435,7 +439,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } test("null type only should fail") { - assertThrows[NoSuchElementException] { + assertThrows[AssertionError] { val schema = SchemaConverter.convertContent( """ { @@ -470,4 +474,37 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } } + test("decimal type with only one of precision or range should fail") { + assertThrows[IllegalArgumentException] { + val schema = SchemaConverter.convertContent( + """ + { + "type": "object", + "properties": { + "decimal": { + "type": "decimal", + "range": 18 + } + } + } + """ + ) + } + assertThrows[IllegalArgumentException] { + val schema = SchemaConverter.convertContent( + """ + { + "type": "object", + "properties": { + "decimal": { + "type": "decimal", + "precision": 38 + } + } + } + """ + ) + } + } + }