@@ -67,6 +67,24 @@ def _schema_loads(schema_str):
6767 return Schema (schema_str , schema_type = 'AVRO' )
6868
6969
70+ def _resolve_named_schema (schema , schema_registry_client , named_schemas = None ):
71+ """
72+ Resolves named schemas referenced by the provided schema recursively.
73+ :param schema: Schema to resolve named schemas for.
74+ :param schema_registry_client: SchemaRegistryClient to use for retrieval.
75+ :param named_schemas: Dict of named schemas resolved recursively.
76+ :return: named_schemas dict.
77+ """
78+ if named_schemas is None :
79+ named_schemas = {}
80+ if schema .references is not None :
81+ for ref in schema .references :
82+ referenced_schema = schema_registry_client .get_version (ref .subject , ref .version )
83+ _resolve_named_schema (referenced_schema .schema , schema_registry_client , named_schemas )
84+ parse_schema (loads (referenced_schema .schema .schema_str ), named_schemas = named_schemas )
85+ return named_schemas
86+
87+
7088class AvroSerializer (Serializer ):
7189 """
7290 Serializer that outputs Avro binary encoded data with Confluent Schema Registry framing.
@@ -146,7 +164,7 @@ class AvroSerializer(Serializer):
146164 Args:
147165 schema_registry_client (SchemaRegistryClient): Schema Registry client instance.
148166
149- schema_str (str): Avro `Schema Declaration. <https://avro.apache.org/docs/current/spec.html#schemas>`_
167+ schema_str (str or Schema ): Avro `Schema Declaration. <https://avro.apache.org/docs/current/spec.html#schemas>`_ Accepts either a string or a `Schema`(Schema) instance. Note that string definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
150168
151169 to_dict (callable, optional): Callable(object, SerializationContext) -> dict. Converts object to a dict.
152170
@@ -155,15 +173,21 @@ class AvroSerializer(Serializer):
155173 __slots__ = ['_hash' , '_auto_register' , '_normalize_schemas' , '_use_latest_version' ,
156174 '_known_subjects' , '_parsed_schema' ,
157175 '_registry' , '_schema' , '_schema_id' , '_schema_name' ,
158- '_subject_name_func' , '_to_dict' ]
176+ '_subject_name_func' , '_to_dict' , '_named_schemas' ]
159177
160178 _default_conf = {'auto.register.schemas' : True ,
161179 'normalize.schemas' : False ,
162180 'use.latest.version' : False ,
163181 'subject.name.strategy' : topic_subject_name_strategy }
164182
165- def __init__ (self , schema_registry_client , schema_str ,
166- to_dict = None , conf = None ):
183+ def __init__ (self , schema_registry_client , schema_str , to_dict = None , conf = None ):
184+ if isinstance (schema_str , str ):
185+ schema = _schema_loads (schema_str )
186+ elif isinstance (schema_str , Schema ):
187+ schema = schema_str
188+ else :
189+ raise TypeError ('You must pass either schema string or schema object' )
190+
167191 self ._registry = schema_registry_client
168192 self ._schema_id = None
169193 self ._known_subjects = set ()
@@ -200,9 +224,9 @@ def __init__(self, schema_registry_client, schema_str,
200224 raise ValueError ("Unrecognized properties: {}"
201225 .format (", " .join (conf_copy .keys ())))
202226
203- schema = _schema_loads (schema_str )
204227 schema_dict = loads (schema .schema_str )
205- parsed_schema = parse_schema (schema_dict )
228+ self ._named_schemas = _resolve_named_schema (schema , schema_registry_client )
229+ parsed_schema = parse_schema (schema_dict , named_schemas = self ._named_schemas )
206230
207231 if isinstance (parsed_schema , list ):
208232 # if parsed_schema is a list, we have an Avro union and there
@@ -299,8 +323,9 @@ class AvroDeserializer(Deserializer):
299323 schema_registry_client (SchemaRegistryClient): Confluent Schema Registry
300324 client instance.
301325
302- schema_str (str, optional): The reader schema.
303- If not provided, the writer schema will be used as the reader schema.
326+ schema_str (str, Schema, optional): Avro reader schema declaration Accepts either a string or a `Schema`(
327+ Schema) instance. If not provided, the writer schema will be used as the reader schema. Note that string
328+ definitions cannot reference other schemas. For referencing other schemas, use a Schema instance.
304329
305330 from_dict (callable, optional): Callable(dict, SerializationContext) -> object.
306331 Converts a dict to an instance of some object.
@@ -315,13 +340,31 @@ class AvroDeserializer(Deserializer):
315340 `Apache Avro Schema Resolution <https://avro.apache.org/docs/1.8.2/spec.html#Schema+Resolution>`_
316341 """
317342
318- __slots__ = ['_reader_schema' , '_registry' , '_from_dict' , '_writer_schemas' , '_return_record_name' ]
343+ __slots__ = ['_reader_schema' , '_registry' , '_from_dict' , '_writer_schemas' , '_return_record_name' , '_schema' ,
344+ '_named_schemas' ]
319345
320346 def __init__ (self , schema_registry_client , schema_str = None , from_dict = None , return_record_name = False ):
347+ schema = None
348+ if schema_str is not None :
349+ if isinstance (schema_str , str ):
350+ schema = _schema_loads (schema_str )
351+ elif isinstance (schema_str , Schema ):
352+ schema = schema_str
353+ else :
354+ raise TypeError ('You must pass either schema string or schema object' )
355+
356+ self ._schema = schema
321357 self ._registry = schema_registry_client
322358 self ._writer_schemas = {}
323359
324- self ._reader_schema = parse_schema (loads (schema_str )) if schema_str else None
360+ if schema :
361+ schema_dict = loads (self ._schema .schema_str )
362+ self ._named_schemas = _resolve_named_schema (self ._schema , schema_registry_client )
363+ self ._reader_schema = parse_schema (schema_dict ,
364+ named_schemas = self ._named_schemas )
365+ else :
366+ self ._named_schemas = None
367+ self ._reader_schema = None
325368
326369 if from_dict is not None and not callable (from_dict ):
327370 raise ValueError ("from_dict must be callable with the signature "
@@ -370,10 +413,11 @@ def __call__(self, data, ctx):
370413 writer_schema = self ._writer_schemas .get (schema_id , None )
371414
372415 if writer_schema is None :
373- schema = self ._registry .get_schema (schema_id )
374- prepared_schema = _schema_loads (schema .schema_str )
416+ registered_schema = self ._registry .get_schema (schema_id )
417+ self ._named_schemas = _resolve_named_schema (registered_schema , self ._registry )
418+ prepared_schema = _schema_loads (registered_schema .schema_str )
375419 writer_schema = parse_schema (loads (
376- prepared_schema .schema_str ))
420+ prepared_schema .schema_str ), named_schemas = self . _named_schemas )
377421 self ._writer_schemas [schema_id ] = writer_schema
378422
379423 obj_dict = schemaless_reader (payload ,
0 commit comments