@@ -2,16 +2,20 @@ package kafka
22
33import (
44 "encoding/binary"
5+ "fmt"
56 "io/ioutil"
67 "math/big"
8+ "math/rand"
79 "reflect"
810 "strings"
911 "testing"
12+ "time"
1013
1114 "github.com/Shopify/sarama"
1215 "github.com/go-avro/avro"
1316 liavro "github.com/linkedin/goavro/v2"
1417 pdk "github.com/pilosa/pdk/v2"
18+ "github.com/pilosa/pdk/v2/kafka/csrc"
1519)
1620
1721func TestAvroToPDKSchema (t * testing.T ) {
@@ -103,47 +107,47 @@ func liDecodeTestSchema(t *testing.T, filename string) *liavro.Codec {
103107 return codec
104108}
105109
106- func TestKafkaSource (t * testing.T ) {
110+ var tests = []struct {
111+ data []map [string ]interface {}
112+ schemaFile string
113+ exp [][]interface {}
114+ }{
115+ {
116+ schemaFile : "simple.json" ,
117+ data : []map [string ]interface {}{{"first" : "hello" , "last" : "goodbye" }, {"first" : "one" , "last" : "two" }},
118+ exp : [][]interface {}{{"hello" , "goodbye" }, {"one" , "two" }},
119+ },
120+ {
121+ schemaFile : "stringtypes.json" ,
122+ data : []map [string ]interface {}{{"first" : "blah" , "last" : "goodbye" , "middle" : "123456789" }},
123+ exp : [][]interface {}{{"blah" , []byte ("goodbye" ), []byte ("123456789" )}},
124+ },
125+ {
126+ schemaFile : "decimal.json" ,
127+ data : []map [string ]interface {}{{"somenum" : & big.Rat {}}, {"somenum" : big .NewRat (10 , 1 )}, {"somenum" : big .NewRat (1 , 1 )}, {"somenum" : big .NewRat (5 , 2 )}, {"somenum" : big .NewRat (1234567890 , 1 )}},
128+ exp : [][]interface {}{{uint64 (0 )}, {uint64 (1000 )}, {uint64 (100 )}, {uint64 (250 )}, {uint64 (123456789000 )}},
129+ },
130+ {
131+ schemaFile : "othertypes.json" ,
132+ data : []map [string ]interface {}{{"first" : "a" , "second" : []string {"b" , "c" }, "third" : - 8 , "fourth" : 99 , "fifth" : 99.9 , "sixth" : 101.1 , "seventh" : true }},
133+ exp : [][]interface {}{{"a" , []interface {}{"b" , "c" }, int32 (- 8 ), int64 (99 ), float32 (99.9 ), float64 (101.1 ), true }},
134+ },
135+ {
136+ schemaFile : "unions.json" ,
137+ data : []map [string ]interface {}{
138+ {"first" : map [string ]interface {}{"string" : "a" }, "second" : map [string ]interface {}{"boolean" : true }, "third" : map [string ]interface {}{"long" : 101 }, "fourth" : map [string ]interface {}{"bytes.decimal" : big .NewRat (5 , 2 )}},
139+ {"first" : nil , "second" : nil , "third" : map [string ]interface {}{"null" : nil }, "fourth" : nil },
140+ },
141+ exp : [][]interface {}{
142+ {"a" , true , int64 (101 ), uint64 (2500 )},
143+ {nil , nil , nil , nil }},
144+ },
145+ }
146+
147+ func TestKafkaSourceLocal (t * testing.T ) {
107148 // this is not an integration test, so we'll take steps to avoid
108149 // actually connecting to Kafka or Schema Registry.
109150
110- tests := []struct {
111- data []map [string ]interface {}
112- schemaFile string
113- exp [][]interface {}
114- }{
115- {
116- schemaFile : "simple.json" ,
117- data : []map [string ]interface {}{{"first" : "hello" , "last" : "goodbye" }, {"first" : "one" , "last" : "two" }},
118- exp : [][]interface {}{{"hello" , "goodbye" }, {"one" , "two" }},
119- },
120- {
121- schemaFile : "stringtypes.json" ,
122- data : []map [string ]interface {}{{"first" : "blah" , "last" : "goodbye" , "middle" : "123456789" }},
123- exp : [][]interface {}{{"blah" , []byte ("goodbye" ), []byte ("123456789" )}},
124- },
125- {
126- schemaFile : "decimal.json" ,
127- data : []map [string ]interface {}{{"somenum" : & big.Rat {}}, {"somenum" : big .NewRat (10 , 1 )}, {"somenum" : big .NewRat (1 , 1 )}, {"somenum" : big .NewRat (5 , 2 )}, {"somenum" : big .NewRat (1234567890 , 1 )}},
128- exp : [][]interface {}{{uint64 (0 )}, {uint64 (1000 )}, {uint64 (100 )}, {uint64 (250 )}, {uint64 (123456789000 )}},
129- },
130- {
131- schemaFile : "othertypes.json" ,
132- data : []map [string ]interface {}{{"first" : "a" , "second" : []string {"b" , "c" }, "third" : - 8 , "fourth" : 99 , "fifth" : 99.9 , "sixth" : 101.1 , "seventh" : true }},
133- exp : [][]interface {}{{"a" , []interface {}{"b" , "c" }, int32 (- 8 ), int64 (99 ), float32 (99.9 ), float64 (101.1 ), true }},
134- },
135- {
136- schemaFile : "unions.json" ,
137- data : []map [string ]interface {}{
138- {"first" : map [string ]interface {}{"string" : "a" }, "second" : map [string ]interface {}{"boolean" : true }, "third" : map [string ]interface {}{"long" : 101 }, "fourth" : map [string ]interface {}{"bytes.decimal" : big .NewRat (5 , 2 )}},
139- {"first" : nil , "second" : nil , "third" : map [string ]interface {}{"null" : nil }, "fourth" : nil },
140- },
141- exp : [][]interface {}{
142- {"a" , true , int64 (101 ), uint64 (2500 )},
143- {nil , nil , nil , nil }},
144- },
145- }
146-
147151 src := NewSource ()
148152 // note: we will not call Open on the source which would connect
149153 // to Kafka. Instead, we'll set the src.messages manually so we
@@ -203,6 +207,104 @@ func TestKafkaSource(t *testing.T) {
203207
204208}
205209
210+ // TestKafkaSource uses a real Kafka and Schema Registry. I downloaded
211+ // the tar archive of the Confluent Platform (self managed software)
212+ // from confluent.io/download (I got version 5.3.1). I ran `tar xzf`
213+ // on the file, changed into the directory, ran `curl -L
214+ // https://cnfl.io/cli | sh -s -- -b /Users/jaffee/bin` (that
215+ // directory is on my PATH), then ran `confluent local start
216+ // schema-registry`.
217+ //
218+ // I find that this test runs much faster after a `confluent local
219+ // destroy` followed by `confluent local start schema-registry`. The
220+ // difference is stark—10s of seconds—and I don't know why this should
221+ // be, but I think it has something to do with kafka rebalancing
222+ // itself when a new client joins.
223+ func TestKafkaSourceIntegration (t * testing.T ) {
224+ if testing .Short () {
225+ t .Skip ()
226+ }
227+ src := NewSource ()
228+ src .Topics = []string {"test" }
229+ src .Group = "group0"
230+ err := src .Open ()
231+ if err != nil {
232+ t .Fatalf ("opening source: %v" , err )
233+ }
234+
235+ schemaClient := csrc .NewClient ("localhost:8081" )
236+
237+ conf := sarama .NewConfig ()
238+ conf .Version = sarama .V0_10_0_0
239+ conf .Producer .Return .Successes = true
240+ producer , err := sarama .NewSyncProducer ([]string {"localhost:9092" }, conf )
241+ if err != nil {
242+ t .Fatalf ("getting new producer: %v" , err )
243+ }
244+ defer producer .Close ()
245+
246+ rnd := rand .New (rand .NewSource (time .Now ().UnixNano ()))
247+
248+ key := fmt .Sprintf ("%d" , rnd .Int ())
249+ for i , test := range tests {
250+ schemaStr := readTestSchema (t , test .schemaFile )
251+ resp , err := schemaClient .PostSubjects (fmt .Sprintf ("schema%d" , i ), schemaStr )
252+ if err != nil {
253+ t .Fatalf ("posting schema: %v" , err )
254+ }
255+ schemaID := resp .ID
256+ schema := liDecodeTestSchema (t , test .schemaFile )
257+ t .Run (test .schemaFile , func (t * testing.T ) {
258+
259+ for j , record := range test .data {
260+ buf := make ([]byte , 5 , 1000 )
261+ buf [0 ] = 0
262+ binary .BigEndian .PutUint32 (buf [1 :], uint32 (schemaID ))
263+ buf , err := schema .BinaryFromNative (buf , record )
264+ if err != nil {
265+ t .Errorf ("encoding:\n %+v\n err: %v" , record , err )
266+ }
267+
268+ // post buf to kafka
269+ _ , _ , err = producer .SendMessage (& sarama.ProducerMessage {Topic : "test" , Key : sarama .StringEncoder (key ), Value : sarama .ByteEncoder (buf )})
270+ if err != nil {
271+ t .Fatalf ("sending message to kafka: %v" , err )
272+ }
273+
274+ pdkRec , err := src .Record ()
275+ if j == 0 {
276+ if err != pdk .ErrSchemaChange {
277+ t .Errorf ("expected schema changed signal, got: %v" , err )
278+ }
279+ gotSchema := src .Schema ()
280+ if ! reflect .DeepEqual (gotSchema , expectedSchemas [test .schemaFile ]) {
281+ t .Errorf ("unexpected schema got/exp:\n %+v\n %+v" , gotSchema , expectedSchemas [test .schemaFile ])
282+ }
283+ } else if err != nil {
284+ t .Fatalf ("unexpected error getting record: %v" , err )
285+ }
286+ if pdkRec == nil {
287+ t .Fatalf ("should have a record" )
288+ }
289+ data := pdkRec .Data ()
290+ if ! reflect .DeepEqual (data , test .exp [j ]) {
291+ t .Errorf ("data mismatch exp/got:\n %+v\n %+v" , test .exp [j ], data )
292+ if len (data ) != len (test .exp [j ]) {
293+ t .Fatalf ("mismatched lengths exp/got %d/%d" , len (test .exp [j ]), len (data ))
294+ }
295+ for k := range test .exp [j ] {
296+ if ! reflect .DeepEqual (test.exp [j ][k ], data [k ]) {
297+ t .Errorf ("Mismatch at %d, exp/got\n %v of %[2]T\n %v of %[3]T" , k , test.exp [j ][k ], data [k ])
298+ }
299+ }
300+
301+ }
302+ }
303+ })
304+ }
305+
306+ }
307+
206308var expectedSchemas = map [string ][]pdk.Field {
207309 "simple.json" : []pdk.Field {pdk.StringField {NameVal : "first" }, pdk.StringField {NameVal : "last" }},
208310 "stringtypes.json" : []pdk.Field {pdk.StringField {NameVal : "first" }, pdk.StringField {NameVal : "last" }, pdk.StringField {NameVal : "middle" }},
0 commit comments