11use crate :: { dead_letters:: DeadLetter , MessageDeserializationError , MessageFormat } ;
22use async_trait:: async_trait;
3+ use dashmap:: DashMap ;
34use flate2:: read:: GzDecoder ;
45use schema_registry_converter:: async_impl:: {
56 easy_avro:: EasyAvroDecoder , easy_json:: EasyJsonDecoder , schema_registry:: SrSettings ,
67} ;
78use serde_json:: Value ;
8- use std:: { borrow:: BorrowMut , convert:: TryFrom , io:: Cursor , io:: Read , path:: PathBuf } ;
9+
10+ // use crate::avro_canonical_schema_workaround::parse_into_canonical_form;
11+ use apache_avro:: { rabin:: Rabin , GenericSingleObjectReader , Schema } ;
12+ use std:: {
13+ borrow:: BorrowMut ,
14+ convert:: { TryFrom , TryInto } ,
15+ io:: { Cursor , Read } ,
16+ path:: PathBuf ,
17+ } ;
18+
19+ use log:: debug;
920
1021#[ async_trait]
1122pub ( crate ) trait MessageDeserializer {
@@ -48,6 +59,10 @@ impl MessageDeserializerFactory {
4859 }
4960 }
5061 } ,
62+ MessageFormat :: SoeAvro ( path) => match SoeAvroDeserializer :: try_from_path ( path) {
63+ Ok ( s) => Ok ( Box :: new ( s) ) ,
64+ Err ( e) => Err ( e) ,
65+ } ,
5166 _ => Ok ( Box :: new ( DefaultDeserializer :: new ( decompress_gzip) ) ) ,
5267 }
5368 }
@@ -128,6 +143,11 @@ struct AvroDeserializer {
128143 decoder : EasyAvroDecoder ,
129144}
130145
146+ struct SoeAvroDeserializer {
147+ //Deserializer for avro single object encoding
148+ decoders : DashMap < i64 , GenericSingleObjectReader > ,
149+ }
150+
131151#[ derive( Default ) ]
132152struct AvroSchemaDeserializer {
133153 schema : Option < apache_avro:: Schema > ,
@@ -137,6 +157,58 @@ struct JsonDeserializer {
137157 decoder : EasyJsonDecoder ,
138158}
139159
160+ #[ async_trait]
161+ impl MessageDeserializer for SoeAvroDeserializer {
162+ async fn deserialize (
163+ & mut self ,
164+ message_bytes : & [ u8 ] ,
165+ ) -> Result < Value , MessageDeserializationError > {
166+ let key = Self :: extract_message_fingerprint ( message_bytes) . map_err ( |e| {
167+ MessageDeserializationError :: AvroDeserialization {
168+ dead_letter : DeadLetter :: from_failed_deserialization ( message_bytes, e. to_string ( ) ) ,
169+ }
170+ } ) ?;
171+
172+ let decoder =
173+ self . decoders
174+ . get ( & key)
175+ . ok_or ( MessageDeserializationError :: AvroDeserialization {
176+ dead_letter : DeadLetter :: from_failed_deserialization (
177+ message_bytes,
178+ format ! (
179+ "Unkown schema with fingerprint {}" ,
180+ & message_bytes[ 2 ..10 ]
181+ . iter( )
182+ . map( |byte| format!( "{:02x}" , byte) )
183+ . collect:: <Vec <String >>( )
184+ . join( "" )
185+ ) ,
186+ ) ,
187+ } ) ?;
188+ let mut reader = Cursor :: new ( message_bytes) ;
189+
190+ match decoder. read_value ( & mut reader) {
191+ Ok ( drs) => match Value :: try_from ( drs) {
192+ Ok ( v) => Ok ( v) ,
193+ Err ( e) => Err ( MessageDeserializationError :: AvroDeserialization {
194+ dead_letter : DeadLetter :: from_failed_deserialization (
195+ message_bytes,
196+ e. to_string ( ) ,
197+ ) ,
198+ } ) ,
199+ } ,
200+ Err ( e) => {
201+ return Err ( MessageDeserializationError :: AvroDeserialization {
202+ dead_letter : DeadLetter :: from_failed_deserialization (
203+ message_bytes,
204+ e. to_string ( ) ,
205+ ) ,
206+ } ) ;
207+ }
208+ }
209+ }
210+ }
211+
140212#[ async_trait]
141213impl MessageDeserializer for AvroDeserializer {
142214 async fn deserialize (
@@ -293,5 +365,79 @@ impl AvroDeserializer {
293365 }
294366}
295367
368+ impl SoeAvroDeserializer {
369+ pub ( crate ) fn try_from_path ( path : & PathBuf ) -> Result < Self , anyhow:: Error > {
370+ if path. is_file ( ) {
371+ let ( key, seo_reader) = Self :: read_single_schema_file ( path) ?;
372+ debug ! (
373+ "Loaded schema {:?} with key (i64 rep of fingerprint) {:?}" ,
374+ path, key
375+ ) ;
376+ let map: DashMap < i64 , GenericSingleObjectReader > = DashMap :: with_capacity ( 1 ) ;
377+ map. insert ( key, seo_reader) ;
378+ Ok ( SoeAvroDeserializer { decoders : map } )
379+ } else if path. is_dir ( ) {
380+ let decoders = path
381+ . read_dir ( ) ?
382+ . map ( |file| {
383+ let file_path = file?. path ( ) ;
384+ let value = Self :: read_single_schema_file ( & file_path) ?;
385+ Ok ( value)
386+ } )
387+ . collect :: < anyhow:: Result < DashMap < _ , _ > > > ( ) ?;
388+
389+ Ok ( SoeAvroDeserializer { decoders } )
390+ } else {
391+ Err ( anyhow:: format_err!( "Path '{:?}' does not exists" , path) )
392+ }
393+ }
394+
395+ fn read_single_schema_file (
396+ path : & PathBuf ,
397+ ) -> Result < ( i64 , GenericSingleObjectReader ) , anyhow:: Error > {
398+ match std:: fs:: read_to_string ( path) {
399+ Ok ( content) => match Schema :: parse_str ( & content) {
400+ Ok ( s) => {
401+ let fingerprint = s. fingerprint :: < Rabin > ( ) . bytes ;
402+ let fingerprint = fingerprint
403+ . try_into ( )
404+ . expect ( "Rabin fingerprints are 8 bytes" ) ;
405+ let key = Self :: fingerprint_to_i64 ( fingerprint) ;
406+ match GenericSingleObjectReader :: new ( s) {
407+ Ok ( decoder) => Ok ( ( key, decoder) ) ,
408+ Err ( e) => Err ( anyhow:: format_err!(
409+ "Schema file '{:?}'; Error: {}" ,
410+ path,
411+ e. to_string( )
412+ ) ) ,
413+ }
414+ }
415+ Err ( e) => Err ( anyhow:: format_err!(
416+ "Schema file '{:?}'; Error: {}" ,
417+ path,
418+ e. to_string( )
419+ ) ) ,
420+ } ,
421+ Err ( e) => Err ( anyhow:: format_err!(
422+ "Schema file '{:?}'; Error: {}" ,
423+ path,
424+ e. to_string( )
425+ ) ) ,
426+ }
427+ }
428+
429+ fn extract_message_fingerprint ( msg : & [ u8 ] ) -> Result < i64 , anyhow:: Error > {
430+ msg. get ( 2 ..10 )
431+ . ok_or ( anyhow:: anyhow!(
432+ "Message does not contain a valid fingerprint"
433+ ) )
434+ . map ( |x| Self :: fingerprint_to_i64 ( x. try_into ( ) . expect ( "Slice must be 8 bytes long" ) ) )
435+ }
436+
437+ fn fingerprint_to_i64 ( msg : [ u8 ; 8 ] ) -> i64 {
438+ i64:: from_le_bytes ( msg)
439+ }
440+ }
441+
296442#[ cfg( test) ]
297443mod tests { }
0 commit comments