|
16 | 16 | package za.co.absa.hyperdrive.ingestor.implementation.transformer.avro.confluent |
17 | 17 |
|
18 | 18 | import org.apache.commons.configuration2.BaseConfiguration |
19 | | -import org.scalatest.{FlatSpec, Matchers} |
| 19 | +import org.apache.spark.sql.execution.streaming.MemoryStream |
| 20 | +import org.apache.spark.sql.streaming.Trigger |
| 21 | +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} |
| 22 | +import za.co.absa.abris.avro.read.confluent.SchemaManagerFactory |
| 23 | +import za.co.absa.abris.config.AbrisConfig |
| 24 | +import za.co.absa.commons.spark.SparkTestBase |
| 25 | +import za.co.absa.hyperdrive.ingestor.implementation.testutils.HyperdriveMockSchemaRegistryClient |
20 | 26 | import za.co.absa.hyperdrive.ingestor.implementation.transformer.avro.confluent.ConfluentAvroEncodingTransformer._ |
21 | 27 | import za.co.absa.hyperdrive.ingestor.implementation.utils.AbrisConfigUtil |
22 | 28 | import za.co.absa.hyperdrive.ingestor.implementation.writer.kafka.KafkaStreamWriter |
23 | 29 |
|
24 | | -class TestConfluentAvroEncodingTransformer extends FlatSpec with Matchers { |
| 30 | +class TestConfluentAvroEncodingTransformer extends FlatSpec with Matchers with BeforeAndAfter with SparkTestBase { |
25 | 31 |
|
26 | 32 | private val topic = "topic" |
27 | | - private val schemaRegistryURL = "http://localhost:8081" |
| 33 | + private val SchemaRegistryURL = "http://localhost:8081" |
28 | 34 |
|
29 | 35 | behavior of ConfluentAvroEncodingTransformer.getClass.getSimpleName |
30 | 36 |
|
| 37 | + before { |
| 38 | + val mockSchemaRegistryClient = new HyperdriveMockSchemaRegistryClient() |
| 39 | + SchemaManagerFactory.resetSRClientInstance() |
| 40 | + SchemaManagerFactory.addSRClientInstance(Map(AbrisConfig.SCHEMA_REGISTRY_URL -> SchemaRegistryURL), mockSchemaRegistryClient) |
| 41 | + } |
| 42 | + |
31 | 43 | it should "create avro stream encoder" in { |
32 | 44 | val config = new BaseConfiguration |
33 | 45 | config.addProperty(KafkaStreamWriter.KEY_TOPIC, topic) |
34 | | - config.addProperty(KEY_SCHEMA_REGISTRY_URL, schemaRegistryURL) |
| 46 | + config.addProperty(KEY_SCHEMA_REGISTRY_URL, SchemaRegistryURL) |
35 | 47 | config.addProperty(KEY_SCHEMA_REGISTRY_VALUE_NAMING_STRATEGY, AbrisConfigUtil.TopicNameStrategy) |
36 | 48 |
|
37 | 49 | val encoder = ConfluentAvroEncodingTransformer(config).asInstanceOf[ConfluentAvroEncodingTransformer] |
38 | 50 |
|
39 | 51 | encoder.config shouldBe config |
40 | 52 | encoder.withKey shouldBe false |
41 | 53 | } |
| 54 | + |
| 55 | + it should "encode the values" in { |
| 56 | + // given |
| 57 | + import spark.implicits._ |
| 58 | + val queryName = "dummyQuery" |
| 59 | + val input = MemoryStream[Int](1, spark.sqlContext) |
| 60 | + input.addData(1 to 100) |
| 61 | + val df = input.toDF() |
| 62 | + |
| 63 | + // when |
| 64 | + val config = new BaseConfiguration() |
| 65 | + config.addProperty(KafkaStreamWriter.KEY_TOPIC, topic) |
| 66 | + config.addProperty(KEY_SCHEMA_REGISTRY_URL, SchemaRegistryURL) |
| 67 | + config.addProperty(KEY_SCHEMA_REGISTRY_VALUE_NAMING_STRATEGY, AbrisConfigUtil.TopicNameStrategy) |
| 68 | + |
| 69 | + val encoder = ConfluentAvroEncodingTransformer(config) |
| 70 | + val transformedDf = encoder.transform(df) |
| 71 | + val query = transformedDf |
| 72 | + .writeStream |
| 73 | + .trigger(Trigger.Once) |
| 74 | + .queryName(queryName) |
| 75 | + .format("memory") |
| 76 | + .start() |
| 77 | + query.awaitTermination() |
| 78 | + |
| 79 | + // then |
| 80 | + import spark.implicits._ |
| 81 | + val outputDf = spark.sql(s"select * from $queryName") |
| 82 | + outputDf.count() shouldBe 100 |
| 83 | + val byteArrays = outputDf.select("value").map(_ (0).asInstanceOf[Array[Byte]]).collect() |
| 84 | + byteArrays.distinct.length shouldBe byteArrays.length |
| 85 | + } |
42 | 86 | } |
0 commit comments