Skip to content

Commit be4578a

Browse files
authored
[Schema Registry Avro] New Encoder Design (Azure#19842)
Fixes Azure#20061 ## Overview Revamps the schema registry encoder to work on messages instead of buffers based on the recommendation of the Azure messaging architect. This changes the APIs as follows: ```ts const buffer: NodeJS.Buffer = await serializer.serialize(value, schema); ``` becomes ```ts const message: MessageWithMetadata = await encoder.encodeMessageData(value, schema); ``` where `MessageWithMetadata` has a `body` field as well as a `contentType` field. The latter's format is `avro/binary+<Schema ID>` For derserializing, the change is as follows: ```ts const deserializedValue = await serializer.deserialize(buffer); ``` becomes: ```ts const decodedObject = await encoder.decodeMessageData(message); ``` ## Improvement upon Azure#15959 This design introduces a new `messageAdapter` option in the encoder constructor to support processing of any message type (e.g. [cloud event](https://github.com/cloudevents/spec/blob/v1.0.1/spec.md)): ```ts const encoder = new SchemaRegistryAvroEncoder(schemaRegistryClient, { groupName, messageAdapter: adapter }); ``` where `adapter` is a message adapter that follows the following contract: ```ts interface MessageAdapter<MessageT> { produceMessage: (messageWithMetadata: MessageWithMetadata) => MessageT; consumeMessage: (message: MessageT) => MessageWithMetadata; } interface MessageWithMetadata { body: Uint8Array; contentType: string; } ``` For convenience, the PR adds a couple of convenience adapter factories for Event Hubs's `EventData` and Event Grid's `SendCloudEventInput<Uint8Array>`. For example, the `createCloudEventAdapter` factory can be called to construct an adapter for the latter as follows: ```ts const adapter = createCloudEventAdapter({ type: "azure.sdk.eventgrid.samples.cloudevent", source: "/azure/sdk/schemaregistry/samples/withEventGrid", }), ``` Note that these adapter factories are exported by their respective messaging package without explicitly implementing the contract and the PR adds new encoder tests that check whether the produced adapters follow the contract. This organization could change in the future if we create a new core place for the contract to be imported from. See the newly added samples for how to send such messages with Event Hubs and Event Grid. Schema Registry commitment tracking: Azure#15959 Tracking issue: Azure#18608 First iteration design: Azure#18365
1 parent a3f10ce commit be4578a

26 files changed

+1219
-477
lines changed

sdk/core/core-amqp/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"clean": "rimraf dist dist-* temp types coverage coverage-browser .nyc_output *.tgz *.log test*.xml",
4444
"execute:samples": "echo skipped",
4545
"extract-api": "tsc -p . && api-extractor run --local",
46-
"format": "prettier --write --config ../../../.prettierrc.json --ignore-path ../../../.prettierignore \"src/**/*.ts\" \"test/**/*.ts\" \"samples-dev/**/*.ts\" \"*.{js,json}\"",
46+
"format": "prettier --write --config ../../../.prettierrc.json --ignore-path ../../../.prettierignore \"src/**/*.ts\" \"test/**/*.ts\" \"*.{js,json}\"",
4747
"integration-test:browser": "echo skipped",
4848
"integration-test:node": "echo skipped",
4949
"integration-test": "npm run integration-test:node && npm run integration-test:browser",

sdk/core/core-amqp/src/util/utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ export interface WebSocketOptions {
5858
* A constant that indicates whether the environment is node.js or browser based.
5959
*/
6060
export const isNode =
61-
!!process && !!process.version && !!process.versions && !!process.versions.node;
61+
typeof process !== "undefined" && Boolean(process.version) && Boolean(process.versions?.node);
6262

6363
/**
6464
* Defines an object with possible properties defined in T.

sdk/eventhub/event-hubs/review/event-hubs.api.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ export interface CreateBatchOptions extends OperationOptions {
6161
partitionKey?: string;
6262
}
6363

64+
// @public
65+
export function createEventDataAdapter(params?: EventDataAdapterParameters): MessageAdapter<EventData>;
66+
6467
// @public
6568
export const earliestEventPosition: EventPosition;
6669

@@ -79,6 +82,15 @@ export interface EventData {
7982
};
8083
}
8184

85+
// @public
86+
export interface EventDataAdapterParameters {
87+
correlationId?: string | number | Buffer;
88+
messageId?: string | number | Buffer;
89+
properties?: {
90+
[key: string]: any;
91+
};
92+
}
93+
8294
// @public
8395
export interface EventDataBatch {
8496
readonly count: number;
@@ -225,6 +237,18 @@ export interface LoadBalancingOptions {
225237
// @public
226238
export const logger: AzureLogger;
227239

240+
// @public
241+
export interface MessageAdapter<MessageT> {
242+
consumeMessage: (message: MessageT) => MessageWithMetadata;
243+
produceMessage: (messageWithMetadata: MessageWithMetadata) => MessageT;
244+
}
245+
246+
// @public
247+
export interface MessageWithMetadata {
248+
body: Uint8Array;
249+
contentType: string;
250+
}
251+
228252
export { MessagingError }
229253

230254
// @public
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
import { EventData } from "./eventData";
5+
6+
/**
7+
* A message with payload and content type fields
8+
*
9+
* This interface is hidden because it is already exported by `@azure/schema-registry-avro`
10+
*
11+
* @hidden
12+
*/
13+
export interface MessageWithMetadata {
14+
/**
15+
* The message's binary data
16+
*/
17+
body: Uint8Array;
18+
/**
19+
* The message's content type
20+
*/
21+
contentType: string;
22+
}
23+
24+
/**
25+
* A message adapter interface that specifies methods for producing and consuming
26+
* messages with payloads and content type fields.
27+
*
28+
* This interface is hidden because it is already exported by `@azure/schema-registry-avro`
29+
*
30+
* @hidden
31+
*/
32+
export interface MessageAdapter<MessageT> {
33+
/**
34+
* defines how to create a message from a payload and a content type
35+
*/
36+
produceMessage: (messageWithMetadata: MessageWithMetadata) => MessageT;
37+
/**
38+
* defines how to access the payload and the content type of a message
39+
*/
40+
consumeMessage: (message: MessageT) => MessageWithMetadata;
41+
}
42+
43+
// This type should always be equivalent to Omit<Omit<EventData, "body">, "contentType">
44+
/**
45+
* Parameters to the `createEventDataAdapter` function that creates an event data adapter.
46+
*/
47+
export interface EventDataAdapterParameters {
48+
/**
49+
* The correlation identifier that allows an
50+
* application to specify a context for the message for the purposes of correlation, for example
51+
* reflecting the MessageId of a message that is being replied to.
52+
*/
53+
correlationId?: string | number | Buffer;
54+
55+
/**
56+
* The message identifier is an
57+
* application-defined value that uniquely identifies the message and its payload.
58+
*
59+
* Note: Numbers that are not whole integers are not allowed.
60+
*/
61+
messageId?: string | number | Buffer;
62+
63+
/**
64+
* Set of key value pairs that can be used to set properties specific to user application.
65+
*/
66+
properties?: {
67+
[key: string]: any;
68+
};
69+
}
70+
71+
/**
72+
* A function that constructs an event data adapter. That adapter can be used
73+
* with `@azure/schema-registry-avro` to encode and decode body in event data.
74+
*
75+
* @param params - parameters to create the event data
76+
* @returns An event data adapter that can produce and consume event data
77+
*/
78+
export function createEventDataAdapter(
79+
params: EventDataAdapterParameters = {}
80+
): MessageAdapter<EventData> {
81+
return {
82+
produceMessage: ({ body, contentType }: MessageWithMetadata) => {
83+
return {
84+
...params,
85+
body,
86+
contentType,
87+
};
88+
},
89+
consumeMessage: (message: EventData): MessageWithMetadata => {
90+
const { body, contentType } = message;
91+
if (body === undefined || !(body instanceof Uint8Array)) {
92+
throw new Error("Expected the body field to be defined and have a Uint8Array");
93+
}
94+
if (contentType === undefined) {
95+
throw new Error("Expected the contentType field to be defined");
96+
}
97+
return {
98+
body,
99+
contentType,
100+
};
101+
},
102+
};
103+
}

sdk/eventhub/event-hubs/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ export {
5151
parseEventHubConnectionString,
5252
EventHubConnectionStringProperties,
5353
} from "./util/connectionStringUtils";
54+
55+
export * from "./eventDataAdapter";

sdk/schemaregistry/schema-registry-avro/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44

55
### Features Added
66

7+
- The serializer APIs have been revamped to work on messages instead of buffers where the payload is the pure encoded-data. The schema ID became part of the content type of that message. This change will improve the experience of using this encoder with the other messaging clients (e.g. Event Hubs, Service Bus, and Event Grid clients).
8+
- `decodeMessageData` now supports decoding using a different but compatible schema
9+
710
### Breaking Changes
11+
- The `SchemaRegistryAvroSerializer` class has been renamed to `SchemaRegistryAvroEncoder`
12+
- The `serialize` method has been renamed to `encodeMessageData` and it now returns a message
13+
- The `deserialize` method has been renamed to `decodeMessageData` and it now takes a message as input
814

915
### Bugs Fixed
1016

sdk/schemaregistry/schema-registry-avro/README.md

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# Azure Schema Registry Avro serializer client library for JavaScript
1+
# Azure Schema Registry Avro Encoder client library for JavaScript
22

33
Azure Schema Registry is a schema repository service hosted by Azure Event Hubs,
44
providing schema storage, versioning, and management. This package provides an
5-
Avro serializer capable of serializing and deserializing payloads containing
6-
Schema Registry schema identifiers and Avro-encoded data.
5+
Avro encoder capable of encoding and decoding payloads containing
6+
Avro-encoded data.
77

88
Key links:
99

@@ -31,64 +31,68 @@ npm install @azure/schema-registry-avro
3131

3232
## Key concepts
3333

34-
### SchemaRegistryAvroSerializer
34+
### SchemaRegistryAvroEncoder
3535

36-
Provides API to serialize to and deserialize from Avro Binary Encoding plus a
37-
header with schema ID. Uses
36+
Provides API to encode to and decode from Avro Binary Encoding wrapped in a message
37+
with a content type field containing the schema ID. Uses
3838
`SchemaRegistryClient` from the [@azure/schema-registry](https://www.npmjs.com/package/@azure/schema-registry) package
3939
to get schema IDs from schema definition or vice versa. The provided API has internal cache to avoid calling the schema registry service when possible.
4040

41-
### Message format
41+
### Messages
4242

43-
The same format is used by schema registry serializers across Azure SDK languages.
43+
By default, the encoder will create messages structured as follows:
4444

45-
Messages are encoded as follows:
45+
- `body`: a byte array containing data in the Avro Binary Encoding. Note that it
46+
is NOT Avro Object Container File. The latter includes the schema and creating
47+
it defeats the purpose of using this encoder to move the schema out of the
48+
message payload and into the schema registry.
4649

47-
- 4 bytes: Format Indicator
50+
- `contentType`: a string of the following format `avro/binary+<Schema ID>` where
51+
the `avro/binary` part signals that this message has an Avro-encoded payload
52+
and the `<Schema Id>` part is the Schema ID the Schema Registry service assigned
53+
to the schema used to encode this payload.
4854

49-
- Currently always zero to indicate format below.
50-
51-
- 32 bytes: Schema ID
52-
53-
- UTF-8 hexadecimal representation of GUID.
54-
- 32 hex digits, no hyphens.
55-
- Same format and byte order as string from Schema Registry service.
56-
57-
- Remaining bytes: Avro payload (in general, format-specific payload)
58-
59-
- Avro Binary Encoding
60-
- NOT Avro Object Container File, which includes the schema and defeats the
61-
purpose of this serialzer to move the schema out of the message payload and
62-
into the schema registry.
55+
Not all messaging services are supporting the same message structure. To enable
56+
integration with such services, the encoder can act on custom message structures
57+
by setting the `messageAdapter` option in the constructor with a corresponding
58+
message producer and consumer. Azure messaging client libraries export default
59+
adapters for their message types.
6360

6461
## Examples
6562

66-
### Serialize and deserialize
63+
### Encode and decode an `@azure/event-hubs`'s `EventData`
6764

6865
```javascript
6966
const { DefaultAzureCredential } = require("@azure/identity");
67+
import { createEventDataAdapter } from "@azure/event-hubs";
7068
const { SchemaRegistryClient } = require("@azure/schema-registry");
71-
const { SchemaRegistryAvroSerializer } = require("@azure/schema-registry-avro");
72-
73-
const client = new SchemaRegistryClient("<fully qualified namespace>", new DefaultAzureCredential());
74-
const serializer = new SchemaRegistryAvroSerializer(client, { groupName: "<group>" });
69+
const { SchemaRegistryAvroEncoder } = require("@azure/schema-registry-avro");
70+
71+
const client = new SchemaRegistryClient(
72+
"<fully qualified namespace>",
73+
new DefaultAzureCredential()
74+
);
75+
const encoder = new SchemaRegistryAvroEncoder(client, {
76+
groupName: "<group>",
77+
messageAdapter: createEventDataAdapter(),
78+
});
7579

7680
// Example Avro schema
7781
const schema = JSON.stringify({
7882
type: "record",
7983
name: "Rating",
8084
namespace: "my.example",
81-
fields: [{ name: "score", type: "int" }]
85+
fields: [{ name: "score", type: "int" }],
8286
});
8387

8488
// Example value that matches the Avro schema above
8589
const value = { score: 42 };
8690

87-
// Serialize value to buffer
88-
const buffer = await serializer.serialize(value, schema);
91+
// Encode value to a message
92+
const message = await encoder.encodeMessageData(value, schema);
8993

90-
// Deserialize buffer to value
91-
const deserializedValue = await serializer.deserialize(buffer);
94+
// Decode a message to value
95+
const decodedValue = await encoder.decodeMessageData(message);
9296
```
9397

9498
## Troubleshooting

sdk/schemaregistry/schema-registry-avro/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@
7979
"devDependencies": {
8080
"@azure/dev-tool": "^1.0.0",
8181
"@azure/eslint-plugin-azure-sdk": "^3.0.0",
82+
"@azure/event-hubs": "^5.7.0-beta.2",
8283
"@azure/identity": "^2.0.1",
84+
"@azure/test-utils": "^1.0.0",
8385
"@azure-tools/test-recorder": "^1.0.0",
8486
"@microsoft/api-extractor": "^7.18.11",
8587
"@rollup/plugin-commonjs": "11.0.2",

sdk/schemaregistry/schema-registry-avro/review/schema-registry-avro.api.md

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,37 @@
44
55
```ts
66

7-
/// <reference types="node" />
8-
97
import { SchemaRegistry } from '@azure/schema-registry';
108

119
// @public
12-
export class SchemaRegistryAvroSerializer {
13-
constructor(client: SchemaRegistry, options?: SchemaRegistryAvroSerializerOptions);
14-
deserialize(input: Buffer | Blob | Uint8Array): Promise<unknown>;
15-
serialize(value: unknown, schema: string): Promise<Uint8Array>;
10+
export interface DecodeMessageDataOptions {
11+
schema?: string;
12+
}
13+
14+
// @public
15+
export interface MessageAdapter<MessageT> {
16+
consumeMessage: (message: MessageT) => MessageWithMetadata;
17+
produceMessage: (messageWithMetadata: MessageWithMetadata) => MessageT;
18+
}
19+
20+
// @public
21+
export interface MessageWithMetadata {
22+
body: Uint8Array;
23+
contentType: string;
24+
}
25+
26+
// @public
27+
export class SchemaRegistryAvroEncoder<MessageT = MessageWithMetadata> {
28+
constructor(client: SchemaRegistry, options?: SchemaRegistryAvroEncoderOptions<MessageT>);
29+
decodeMessageData(message: MessageT, options?: DecodeMessageDataOptions): Promise<unknown>;
30+
encodeMessageData(value: unknown, schema: string): Promise<MessageT>;
1631
}
1732

1833
// @public
19-
export interface SchemaRegistryAvroSerializerOptions {
34+
export interface SchemaRegistryAvroEncoderOptions<MessageT> {
2035
autoRegisterSchemas?: boolean;
2136
groupName?: string;
37+
messageAdapter?: MessageAdapter<MessageT>;
2238
}
2339

2440
// (No @packageDocumentation comment for this package)

sdk/schemaregistry/schema-registry-avro/sample.env

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,12 @@ SCHEMA_REGISTRY_GROUP=<Group name for schemas in registry>
99
AZURE_TENANT_ID=<AD tenant id or name>
1010
AZURE_CLIENT_ID=<ID of the user/service principal to authenticate as>
1111
AZURE_CLIENT_SECRET=<client secret used to authenticate to Azure AD>
12+
13+
# Used in samples that use Event Hubs. Retrieve these values from an Event Hub in the Azure portal.
14+
EVENTHUB_CONNECTION_STRING=<Event Hub connection string>
15+
EVENTHUB_NAME=<Event Hub name>
16+
CONSUMER_GROUP_NAME=<Event Hub Group name>
17+
18+
# Used in samples that use Event Grid. Retrieve these values from an Event Grid topic in the Azure portal
19+
EVENT_GRID_TOPIC_ENDPOINT=<Event Grid topic endpoint>
20+
EVENT_GRID_TOPIC_API_KEY=<Event Grid topic API key>

0 commit comments

Comments
 (0)