diff --git a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala index 909d42e..616b902 100644 --- a/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala +++ b/src/main/scala/org/zalando/spark/jsonschema/SchemaConverter.scala @@ -14,7 +14,7 @@ import scala.io.Source * to the type given in the schema. If this is not possible the whole row will be null (!). * A field can be null if its type is a 2-element array, one of which is "null". The converted * schema doesn't check for 'enum' fields, i.e. fields which are limited to a given set. - * It also doesn't check for required fields or if additional properties are set to true + * It also doesn't check for additional properties are set to true * or false. If a field is specified in the schema, than you can select it and it will * be null if missing. If a field is not in the schema, it cannot be selected even if * given in the dataset. @@ -24,7 +24,6 @@ case class SchemaType(typeName: String, nullable: Boolean) private case class NullableDataType(dataType: DataType, nullable: Boolean) object SchemaConverter { - val SchemaFieldName = "name" val SchemaFieldType = "type" val SchemaFieldId = "id" @@ -33,6 +32,7 @@ object SchemaConverter { val SchemaRoot = "/" val Definitions = "definitions" val Reference = "$ref" + val Required = "required" val TypeMap = Map( "string" -> StringType, "number" -> DoubleType, @@ -44,35 +44,31 @@ object SchemaConverter { ) var definitions: JsObject = JsObject(Seq.empty) private var isStrictTypingEnabled: Boolean = true - def disableStrictTyping(): SchemaConverter.type = { setStrictTyping(false) } - def enableStrictTyping(): SchemaConverter.type = { setStrictTyping(true) } - private def setStrictTyping(b: Boolean) = { isStrictTypingEnabled = b this } - def convertContent(schemaContent: String): StructType = convert(parseSchemaJson(schemaContent)) - def convert(inputPath: String): StructType = convert(loadSchemaJson(inputPath)) - def convert(inputSchema: JsObject): StructType = { definitions = (inputSchema \ Definitions).asOpt[JsObject].getOrElse(definitions) val name = getJsonName(inputSchema).getOrElse(SchemaRoot) val typeName = getJsonType(inputSchema, name).typeName if (name == SchemaRoot && typeName == "object") { - val properties = (inputSchema \ SchemaStructContents).asOpt[JsObject].getOrElse( + //TODO validation do something with this + (inputSchema \ SchemaStructContents).asOpt[JsObject].getOrElse( throw new NoSuchElementException( s"Root level of schema needs to have a [$SchemaStructContents]-field" ) ) - convertJsonStruct(new StructType, properties, properties.keys.toList) + //End validation do something with this + convertJsonStruct(new StructType, inputSchema) } else { throw new IllegalArgumentException( s"schema needs root level called <$SchemaRoot> and root type . " + @@ -80,14 +76,10 @@ object SchemaConverter { ) } } - def getJsonName(json: JsValue): Option[String] = (json \ SchemaFieldName).asOpt[String] - def getJsonId(json: JsValue): Option[String] = (json \ SchemaFieldId).asOpt[String] - def getJsonType(json: JsObject, name: String): SchemaType = { val id = getJsonId(json).getOrElse(name) - (json \ SchemaFieldType).getOrElse(JsNull) match { case JsString(s) => SchemaType(s, nullable = false) case JsArray(array) => @@ -96,7 +88,7 @@ object SchemaConverter { case 1 if nullable => throw new IllegalArgumentException("Null type only is not supported") case 1 => - SchemaType(array.apply(0).as[String], nullable = nullable) + SchemaType(array.head.as[String], nullable = nullable) case 2 if nullable => array.find(_ != JsString("null")) .map(i => SchemaType(i.as[String], nullable = nullable)) @@ -119,33 +111,38 @@ object SchemaConverter { ) } } - - private def parseSchemaJson(schemaContent: String) = Json.parse(schemaContent).as[JsObject] - + private def parseSchemaJson(schemaContent: String): JsObject = Json.parse(schemaContent).as[JsObject] def loadSchemaJson(filePath: String): JsObject = { Option(getClass.getResource(filePath)) match { case Some(relPath) => parseSchemaJson(Source.fromURL(relPath).mkString) case None => throw new IllegalArgumentException(s"Path can not be reached: $filePath") } } - - @tailrec - private def convertJsonStruct(schema: StructType, json: JsObject, jsonKeys: List[String]): StructType = { - jsonKeys match { - case Nil => schema - case head :: tail => - val enrichedSchema = addJsonField(schema, (json \ head).as[JsObject], head) - convertJsonStruct(enrichedSchema, json, tail) + private def convertJsonStruct(schema: StructType, objectDefinition: JsObject): StructType = { + val properties = (JsPath \ SchemaStructContents).asSingleJson(objectDefinition) match { + case JsDefined(v) => v.as[JsObject] + case _: JsUndefined => JsObject(Seq.empty) + } + val requiredProperies: Seq[String] = ((JsPath \ Required).asSingleJson(objectDefinition) match { + case JsDefined(v) => v.as[JsArray] + case _: JsUndefined => JsArray(Seq.empty) + }).as[Seq[String]] + properties.keys.toList.foldLeft(schema) { + (seedSchema, key) => + addJsonField( + seedSchema, + (properties \ key).as[JsObject], + key, + requiredProperies.exists(k => k.equals(key)) + ) } } - def traversePath(loc: List[String], path: JsPath): JsPath = { loc match { case head :: tail => traversePath(tail, path \ head) case Nil => path } } - private def checkRefs(inputJson: JsObject): JsObject = { val schemaRef = (inputJson \ Reference).asOpt[JsString] schemaRef match { @@ -167,42 +164,28 @@ object SchemaConverter { case None => inputJson } } - - private def addJsonField(schema: StructType, inputJson: JsObject, name: String): StructType = { - + private def addJsonField(schema: StructType, inputJson: JsObject, name: String, isRequired: Boolean): StructType = { val json = checkRefs(inputJson) val fieldType = getFieldType(json, name) - - schema.add(getJsonName(json).getOrElse(name), fieldType.dataType, nullable = fieldType.nullable) + schema.add(getJsonName(json).getOrElse(name), fieldType.dataType, nullable = fieldType.nullable || (!isRequired)) } - private def getFieldType(json: JsObject, name: String): NullableDataType = { val fieldType = getJsonType(json, name) TypeMap(fieldType.typeName) match { - case dataType: DataType => NullableDataType(dataType, fieldType.nullable) - case ArrayType => val innerJson = checkRefs((json \ SchemaArrayContents).as[JsObject]) val innerJsonType = getFieldType(innerJson, "") val dataType = ArrayType(innerJsonType.dataType, innerJsonType.nullable) NullableDataType(dataType, fieldType.nullable) - case StructType => - val dataType = getDataType(json, JsPath \ SchemaStructContents) + val dataType = getDataType(json) NullableDataType(dataType, fieldType.nullable) } } - - private def getDataType(inputJson: JsObject, contentPath: JsPath): DataType = { + private def getDataType(inputJson: JsObject): DataType = { val json = checkRefs(inputJson) - - val content = contentPath.asSingleJson(json) match { - case JsDefined(v) => v.as[JsObject] - case _: JsUndefined => JsObject(Seq.empty) - } - - convertJsonStruct(new StructType, content, content.keys.toList) + convertJsonStruct(new StructType, inputJson) } } diff --git a/src/test/resources/testJsonSchema4.json b/src/test/resources/testJsonSchema4.json index a2f2913..8894e4f 100644 --- a/src/test/resources/testJsonSchema4.json +++ b/src/test/resources/testJsonSchema4.json @@ -1,6 +1,7 @@ { "$schema": "smallTestSchema", "type": "object", + "required": ["name", "addressA"], "properties": { "name": { "type": "string" diff --git a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala index 2c99a7a..5aa9581 100644 --- a/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala +++ b/src/test/scala/org/zalando/spark/jsonschema/SchemaConverterTest.scala @@ -15,22 +15,22 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { StructField("item2", StringType, nullable = false) )), nullable = false), StructField("array", ArrayType(StructType(Array( - StructField("itemProperty1", StringType, nullable = false), - StructField("itemProperty2", DoubleType, nullable = false) + StructField("itemProperty1", StringType, nullable = true), + StructField("itemProperty2", DoubleType, nullable = true) )), containsNull = false), nullable = false), StructField("structure", StructType(Array( StructField("nestedArray", ArrayType(StructType(Array( - StructField("key", StringType, nullable = false), - StructField("value", LongType, nullable = false) - )), containsNull = false), nullable = false) + StructField("key", StringType, nullable = true), + StructField("value", LongType, nullable = true) + )), containsNull = false), nullable = true) )), nullable = false), StructField("integer", LongType, nullable = false), StructField("string", StringType, nullable = false), StructField("number", DoubleType, nullable = false), - StructField("float", FloatType, nullable = false), + StructField("float", FloatType, nullable = true), StructField("nullable", DoubleType, nullable = true), StructField("boolean", BooleanType, nullable = false), - StructField("additionalProperty", StringType, nullable = false) + StructField("additionalProperty", StringType, nullable = true) )) before { @@ -69,7 +69,9 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { StructField("name", StringType, nullable = true) ))) assert(dbNoSchema.select("name").collect()(0)(0) === "aaa") - intercept[AnalysisException] { dbNoSchema.select("address") } + intercept[AnalysisException] { + dbNoSchema.select("address") + } assert(dbNoSchema.select("foo").collect()(0)(0) === "bar") // with SchemaConverter @@ -78,7 +80,9 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { assert(dbWithSchema.select("name").collect()(0)(0) === "aaa") assert(Try(dbWithSchema.select("address.zip").collect()(0)(0)).isSuccess) assert(Try(dbWithSchema.select("address.foo").collect()(0)(0)).isFailure) - intercept[AnalysisException] { dbWithSchema.select("foo") } + intercept[AnalysisException] { + dbWithSchema.select("foo") + } } test("schema should support references") { @@ -91,7 +95,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { )), nullable = false), StructField("addressB", StructType(Array( StructField("zip", StringType, nullable = true) - )), nullable = false) + )), nullable = true) )) assert(schema === expected) @@ -103,6 +107,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$schema": "smallTestSchema", "type": "object", + "required": ["address"], "properties": { "address": { "type": "object" @@ -133,6 +138,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", @@ -159,6 +165,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", @@ -187,6 +194,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", @@ -212,11 +220,13 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", "items": { "type": "object", + "required": ["name"], "properties": { "name" : { "type" : "string" @@ -236,21 +246,20 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } test("Array of unknown type should fail") { - assertThrows[IllegalArgumentException] { val schema = SchemaConverter.convertContent( """ - { - "$$schema": "smallTestSchema", - "type": "object", - "properties": { - "array" : { - "type" : "array", - "items" : {} + { + "$$schema": "smallTestSchema", + "type": "object", + "properties": { + "array" : { + "type" : "array", + "items" : {} + } } } - } - """ + """ ) } } @@ -259,19 +268,19 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { assertThrows[IllegalArgumentException] { val schema = SchemaConverter.convertContent( """ - { - "$$schema": "smallTestSchema", - "type": "object", - "properties": { - "array" : { - "type" : "array", - "items" : { - "type" : ["string", "integer"] + { + "$$schema": "smallTestSchema", + "type": "object", + "properties": { + "array" : { + "type" : "array", + "items" : { + "type" : ["string", "integer"] + } } } } - } - """ + """ ) } } @@ -284,12 +293,14 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { "integer" -> LongType, "boolean" -> BooleanType ) - typeMap.foreach { case (name, atype) => - val schema = SchemaConverter.convertContent( - s""" + typeMap.foreach { + case (name, atype) => + val schema = SchemaConverter.convertContent( + s""" { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", @@ -300,13 +311,13 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { } } """ - ) + ) - val expected = StructType(Array( - StructField("array", ArrayType(atype, containsNull = true), nullable = false) - )) + val expected = StructType(Array( + StructField("array", ArrayType(atype, containsNull = true), nullable = false) + )) - assert(schema === expected) + assert(schema === expected) } } @@ -316,6 +327,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", @@ -341,11 +353,13 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["array"], "properties": { "array" : { "type" : "array", "items" : { "type" : ["object", "null"], + "required": ["prop"], "properties" : { "prop" : { "type" : "string" @@ -396,16 +410,16 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { assertThrows[IllegalArgumentException] { val schema = SchemaConverter.convertContent( """ - { - "$$schema": "smallTestSchema", - "type": "object", - "properties": { - "prop" : { - "type" : ["integer", "float"] + { + "$$schema": "smallTestSchema", + "type": "object", + "properties": { + "prop" : { + "type" : ["integer", "float"] + } } } - } - """ + """ ) } @@ -417,6 +431,7 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { { "$$schema": "smallTestSchema", "type": "object", + "required": ["prop"], "properties": { "prop" : { "type" : ["integer", "float"] @@ -437,16 +452,16 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { assertThrows[NoSuchElementException] { val schema = SchemaConverter.convertContent( """ - { - "$$schema": "smallTestSchema", - "type": "object", - "properties": { - "prop" : { - "type" : "null" + { + "$$schema": "smallTestSchema", + "type": "object", + "properties": { + "prop" : { + "type" : "null" + } } } - } - """ + """ ) } } @@ -455,18 +470,34 @@ class SchemaConverterTest extends FunSuite with Matchers with BeforeAndAfter { assertThrows[IllegalArgumentException] { val schema = SchemaConverter.convertContent( """ - { - "$$schema": "smallTestSchema", - "type": "object", - "properties": { - "prop" : { - "type" : ["null"] + { + "$$schema": "smallTestSchema", + "type": "object", + "properties": { + "prop" : { + "type" : ["null"] + } } } - } - """ + """ ) } } + test("Non required fields should be null regardless null is included on property type") { + val schema = SchemaConverter.enableStrictTyping().convertContent( + """ + { + "type": "object", + "required": [], + "properties": { + "size": { + "type": "integer" + } + } + } + """ + ) + assert(schema.fields(0) == StructField("size", LongType, nullable = true)) + } }