Skip to content

Commit aef7faa

Browse files
authored
DGS-19409 Ensure Avro serde caches per subject (#2387)
* DGS-19409 Ensure Avro serde caches per subject * Add test * Fix test
1 parent 00f1fa8 commit aef7faa

File tree

4 files changed

+70
-81
lines changed

4 files changed

+70
-81
lines changed

src/Confluent.SchemaRegistry.Serdes.Avro/GenericSerializerImpl.cs

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ namespace Confluent.SchemaRegistry.Serdes
3131
{
3232
internal class GenericSerializerImpl : AsyncSerializer<GenericRecord, Avro.Schema>
3333
{
34-
private Dictionary<Avro.Schema, string> knownSchemas = new Dictionary<global::Avro.Schema, string>();
35-
private HashSet<KeyValuePair<string, string>> registeredSchemas = new HashSet<KeyValuePair<string, string>>();
36-
private Dictionary<string, int> schemaIds = new Dictionary<string, int>();
34+
private Dictionary<Avro.Schema, string> knownSchemas =
35+
new Dictionary<global::Avro.Schema, string>();
36+
private Dictionary<KeyValuePair<string, string>, int> registeredSchemas =
37+
new Dictionary<KeyValuePair<string, string>, int>();
3738

3839
public GenericSerializerImpl(
3940
ISchemaRegistryClient schemaRegistryClient,
@@ -99,12 +100,10 @@ public async Task<byte[]> Serialize(string topic, Headers headers, GenericRecord
99100
// something more sophisticated than the below + not allow
100101
// the misuse to keep happening without warning.
101102
if (knownSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
102-
registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas ||
103-
schemaIds.Count > schemaRegistryClient.MaxCachedSchemas)
103+
registeredSchemas.Count > schemaRegistryClient.MaxCachedSchemas)
104104
{
105105
knownSchemas.Clear();
106106
registeredSchemas.Clear();
107-
schemaIds.Clear();
108107
}
109108

110109
// Determine a schema string corresponding to the schema object.
@@ -139,41 +138,18 @@ public async Task<byte[]> Serialize(string topic, Headers headers, GenericRecord
139138
{
140139
schemaId = latestSchema.Id;
141140
}
142-
else if (!registeredSchemas.Contains(subjectSchemaPair))
141+
else if (!registeredSchemas.TryGetValue(subjectSchemaPair, out schemaId))
143142
{
144-
int newSchemaId;
145-
146143
// first usage: register/get schema to check compatibility
147-
if (autoRegisterSchema)
148-
{
149-
newSchemaId = await schemaRegistryClient
144+
schemaId = autoRegisterSchema
145+
? await schemaRegistryClient
150146
.RegisterSchemaAsync(subject, writerSchemaString, normalizeSchemas)
147+
.ConfigureAwait(continueOnCapturedContext: false)
148+
: await schemaRegistryClient
149+
.GetSchemaIdAsync(subject, writerSchemaString, normalizeSchemas)
151150
.ConfigureAwait(continueOnCapturedContext: false);
152-
}
153-
else
154-
{
155-
newSchemaId = await schemaRegistryClient.GetSchemaIdAsync(subject, writerSchemaString, normalizeSchemas)
156-
.ConfigureAwait(continueOnCapturedContext: false);
157-
}
158-
159-
if (!schemaIds.ContainsKey(writerSchemaString))
160-
{
161-
schemaIds.Add(writerSchemaString, newSchemaId);
162-
}
163-
else if (schemaIds[writerSchemaString] != newSchemaId)
164-
{
165-
schemaIds.Clear();
166-
registeredSchemas.Clear();
167-
throw new KafkaException(new Error(isKey ? ErrorCode.Local_KeySerialization : ErrorCode.Local_ValueSerialization, $"Duplicate schema registration encountered: Schema ids {schemaIds[writerSchemaString]} and {newSchemaId} are associated with the same schema."));
168-
}
169-
170-
registeredSchemas.Add(subjectSchemaPair);
171151

172-
schemaId = schemaIds[writerSchemaString];
173-
}
174-
else
175-
{
176-
schemaId = schemaIds[writerSchemaString];
152+
registeredSchemas.Add(subjectSchemaPair, schemaId);
177153
}
178154
}
179155
finally

src/Confluent.SchemaRegistry.Serdes.Avro/SpecificSerializerImpl.cs

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,8 @@ internal class SerializerSchemaData
3535
{
3636
private string writerSchemaString;
3737
private global::Avro.Schema writerSchema;
38-
39-
/// <remarks>
40-
/// A given schema is uniquely identified by a schema id, even when
41-
/// registered against multiple subjects.
42-
/// </remarks>
43-
private int? writerSchemaId;
44-
4538
private SpecificWriter<T> avroWriter;
4639

47-
private HashSet<string> subjectsRegistered = new HashSet<string>();
48-
49-
public HashSet<string> SubjectsRegistered
50-
{
51-
get => subjectsRegistered;
52-
set => subjectsRegistered = value;
53-
}
54-
5540
public string WriterSchemaString
5641
{
5742
get => writerSchemaString;
@@ -64,12 +49,6 @@ public Avro.Schema WriterSchema
6449
set => writerSchema = value;
6550
}
6651

67-
public int? WriterSchemaId
68-
{
69-
get => writerSchemaId;
70-
set => writerSchemaId = value;
71-
}
72-
7352
public SpecificWriter<T> AvroWriter
7453
{
7554
get => avroWriter;
@@ -79,20 +58,14 @@ public SpecificWriter<T> AvroWriter
7958

8059
private Dictionary<Type, SerializerSchemaData> multiSchemaData =
8160
new Dictionary<Type, SerializerSchemaData>();
82-
83-
private SerializerSchemaData singleSchemaData;
61+
private Dictionary<KeyValuePair<string, string>, int> registeredSchemas =
62+
new Dictionary<KeyValuePair<string, string>, int>();
8463

8564
public SpecificSerializerImpl(
8665
ISchemaRegistryClient schemaRegistryClient,
8766
AvroSerializerConfig config,
8867
RuleRegistry ruleRegistry) : base(schemaRegistryClient, config, ruleRegistry)
8968
{
90-
Type writerType = typeof(T);
91-
if (writerType != typeof(ISpecificRecord))
92-
{
93-
singleSchemaData = ExtractSchemaData(writerType);
94-
}
95-
9669
if (config == null) { return; }
9770

9871
if (config.BufferBytes != null) { this.initialBufferSize = config.BufferBytes.Value; }
@@ -177,24 +150,18 @@ public async Task<byte[]> Serialize(string topic, Headers headers, T data, bool
177150
{
178151
try
179152
{
153+
int schemaId;
180154
string subject;
181155
RegisteredSchema latestSchema = null;
182156
SerializerSchemaData currentSchemaData;
183157
await serdeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false);
184158
try
185159
{
186-
if (singleSchemaData == null)
187-
{
188-
var key = data.GetType();
189-
if (!multiSchemaData.TryGetValue(key, out currentSchemaData))
190-
{
191-
currentSchemaData = ExtractSchemaData(key);
192-
multiSchemaData[key] = currentSchemaData;
193-
}
194-
}
195-
else
160+
var key = data != null ? data.GetType() : typeof(Null);
161+
if (!multiSchemaData.TryGetValue(key, out currentSchemaData))
196162
{
197-
currentSchemaData = singleSchemaData;
163+
currentSchemaData = ExtractSchemaData(key);
164+
multiSchemaData[key] = currentSchemaData;
198165
}
199166

200167
string fullname = null;
@@ -204,25 +171,26 @@ public async Task<byte[]> Serialize(string topic, Headers headers, T data, bool
204171
}
205172

206173
subject = GetSubjectName(topic, isKey, fullname);
174+
var subjectSchemaPair = new KeyValuePair<string, string>(subject, currentSchemaData.WriterSchemaString);
207175
latestSchema = await GetReaderSchema(subject)
208176
.ConfigureAwait(continueOnCapturedContext: false);
209177

210178
if (latestSchema != null)
211179
{
212-
currentSchemaData.WriterSchemaId = latestSchema.Id;
180+
schemaId = latestSchema.Id;
213181
}
214-
else if (!currentSchemaData.SubjectsRegistered.Contains(subject))
182+
else if (!registeredSchemas.TryGetValue(subjectSchemaPair, out schemaId))
215183
{
216184
// first usage: register/get schema to check compatibility
217-
currentSchemaData.WriterSchemaId = autoRegisterSchema
185+
schemaId = autoRegisterSchema
218186
? await schemaRegistryClient
219187
.RegisterSchemaAsync(subject, currentSchemaData.WriterSchemaString, normalizeSchemas)
220188
.ConfigureAwait(continueOnCapturedContext: false)
221189
: await schemaRegistryClient
222190
.GetSchemaIdAsync(subject, currentSchemaData.WriterSchemaString, normalizeSchemas)
223191
.ConfigureAwait(continueOnCapturedContext: false);
224192

225-
currentSchemaData.SubjectsRegistered.Add(subject);
193+
registeredSchemas.Add(subjectSchemaPair, schemaId);
226194
}
227195
}
228196
finally
@@ -248,7 +216,7 @@ public async Task<byte[]> Serialize(string topic, Headers headers, T data, bool
248216
{
249217
stream.WriteByte(Constants.MagicByte);
250218

251-
writer.Write(IPAddress.HostToNetworkOrder(currentSchemaData.WriterSchemaId.Value));
219+
writer.Write(IPAddress.HostToNetworkOrder(schemaId));
252220
currentSchemaData.AvroWriter.Write(data, new BinaryEncoder(stream));
253221

254222
// TODO: maybe change the ISerializer interface so that this copy isn't necessary.

test/Confluent.SchemaRegistry.Serdes.UnitTests/BaseSerializeDeserialize.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ public BaseSerializeDeserializeTests()
4747
schemaRegistryMock.Setup(x => x.RegisterSchemaAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<bool>())).ReturnsAsync(
4848
(string subject, string schema, bool normalize) => store.TryGetValue(schema, out int id) ? id : store[schema] = store.Count + 1
4949
);
50+
schemaRegistryMock.Setup(x => x.GetSchemaIdAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<bool>())).ReturnsAsync(
51+
(string subject, string schema, bool normalize) =>
52+
{
53+
return subjectStore[subject].First(x =>
54+
x.SchemaString == schema
55+
).Id;
56+
}
57+
);
5058
schemaRegistryMock.Setup(x => x.LookupSchemaAsync(It.IsAny<string>(), It.IsAny<Schema>(), It.IsAny<bool>(), It.IsAny<bool>())).ReturnsAsync(
5159
(string subject, Schema schema, bool ignoreDeleted, bool normalize) =>
5260
{

test/Confluent.SchemaRegistry.Serdes.UnitTests/SerializeDeserialize.cs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,43 @@ public void ISpecificRecord()
154154
Assert.Equal(user.favorite_number, result.favorite_number);
155155
}
156156

157+
[Fact]
158+
public void ISpecificRecordStrings()
159+
{
160+
var schemaStr = "{\"type\":\"string\"}";
161+
var schema = new RegisteredSchema("topic1-value", 1, 1, schemaStr, SchemaType.Avro, null);
162+
store[schemaStr] = 1;
163+
subjectStore["topic1-value"] = new List<RegisteredSchema> { schema };
164+
165+
schema = new RegisteredSchema("topic2-value", 1, 2, schemaStr, SchemaType.Avro, null);
166+
schema.Metadata = new Metadata(null, new Dictionary<string, string>
167+
{
168+
{ "confluent:version", "1" }
169+
}, null);
170+
store[schemaStr] = 2;
171+
subjectStore["topic2-value"] = new List<RegisteredSchema> { schema };
172+
173+
var config = new AvroSerializerConfig
174+
{
175+
AutoRegisterSchemas = false,
176+
SubjectNameStrategy = SubjectNameStrategy.Topic
177+
};
178+
var serializer = new AvroSerializer<String>(schemaRegistryClient, config);
179+
180+
Headers headers = new Headers();
181+
var bytes = serializer.SerializeAsync("hi", new SerializationContext(MessageComponentType.Value, "topic1", headers)).Result;
182+
Assert.Equal(1, bytes[4]);
183+
184+
bytes = serializer.SerializeAsync("world", new SerializationContext(MessageComponentType.Value, "topic2", headers)).Result;
185+
Assert.Equal(2, bytes[4]);
186+
187+
bytes = serializer.SerializeAsync("hi", new SerializationContext(MessageComponentType.Value, "topic1", headers)).Result;
188+
Assert.Equal(1, bytes[4]);
189+
190+
bytes = serializer.SerializeAsync("world", new SerializationContext(MessageComponentType.Value, "topic2", headers)).Result;
191+
Assert.Equal(2, bytes[4]);
192+
}
193+
157194
[Fact]
158195
public void ISpecificRecordRecordNameStrategy()
159196
{

0 commit comments

Comments
 (0)