diff --git a/codecs/vlen-bytes/README.md b/codecs/vlen-bytes/README.md new file mode 100644 index 0000000..d2d6b22 --- /dev/null +++ b/codecs/vlen-bytes/README.md @@ -0,0 +1,44 @@ +# Vlen-bytes codec + +Defines an `array -> bytes` codec that serializes variable-length byte string arrays. + +## Codec name + +The value of the `name` member in the codec object MUST be `vlen-bytes`. + +## Configuration parameters + +None. + +## Example + +For example, the array metadata below specifies that the array contains variable-length byte strings: + +```json +{ + "data_type": "bytes", + "codecs": [{ + "name": "vlen-bytes" + }], +} +``` + +## Format and algorithm + +This is a `array -> bytes` codec. + +This codec is only compatible with the [`"bytes"`](../../data-types/bytes/README.md) data type. + +In the encoded format, each chunk is prefixed with a 32-bit little-endian unsigned integer (u32le) that specifies the number of elements in the chunk. +This prefix is followed by a sequence of encoded elements in lexicographical order. +Each element in the sequence is encoded by a u32le representing the number of bytes followed by the bytes themselves. + +See https://numcodecs.readthedocs.io/en/stable/other/vlen.html#vlenbytes for details about the encoding. + +## Change log + +No changes yet. + +## Current maintainers + +* [zarr-python core development team](https://github.com/orgs/zarr-developers/teams/python-core-devs) diff --git a/codecs/vlen-bytes/schema.json b/codecs/vlen-bytes/schema.json new file mode 100644 index 0000000..021eecb --- /dev/null +++ b/codecs/vlen-bytes/schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "vlen-bytes" + }, + "configuration": { + "type": "object", + "additionalProperties": false + } + }, + "required": ["name"], + "additionalProperties": false + }, + { "const": "vlen-bytes" } + ] +} diff --git a/codecs/vlen-utf8/README.md b/codecs/vlen-utf8/README.md new file mode 100644 index 0000000..2b6e830 --- /dev/null +++ b/codecs/vlen-utf8/README.md @@ -0,0 +1,45 @@ +# Vlen-utf8 codec + +Defines an `array -> bytes` codec that serializes variable-length UTF-8 string arrays. + +## Codec name + +The value of the `name` member in the codec object MUST be `vlen-utf8`. + +## Configuration parameters + +None. + +## Example + +For example, the array metadata below specifies that the array contains variable-length UTF-8 strings: + +```json +{ + "data_type": "string", + "codecs": [{ + "name": "vlen-utf8" + }], +} +``` + +## Format and algorithm + +This is a `array -> bytes` codec. + +This codec is only compatible with the [`"string"`](../../data-types/string/README.md) data type. + +In the encoded format, each chunk is prefixed with a 32-bit little-endian unsigned integer (u32le) that specifies the number of elements in the chunk. +This prefix is followed by a sequence of encoded elements in lexicographical order. +Each element in the sequence is encoded by a u32le representing the number of bytes followed by the bytes themselves. +The bytes for each element are obtained by encoding the element as UTF8 bytes. + +See https://numcodecs.readthedocs.io/en/stable/other/vlen.html#vlenutf8 for details about the encoding. + +## Change log + +No changes yet. + +## Current maintainers + +* [zarr-python core development team](https://github.com/orgs/zarr-developers/teams/python-core-devs) diff --git a/codecs/vlen-utf8/schema.json b/codecs/vlen-utf8/schema.json new file mode 100644 index 0000000..542126b --- /dev/null +++ b/codecs/vlen-utf8/schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "vlen-utf8" + }, + "configuration": { + "type": "object", + "additionalProperties": false + } + }, + "required": ["name"], + "additionalProperties": false + }, + { "const": "vlen-utf8" } + ] +} diff --git a/data-types/bytes/README.md b/data-types/bytes/README.md new file mode 100644 index 0000000..469fc93 --- /dev/null +++ b/data-types/bytes/README.md @@ -0,0 +1,33 @@ +# Bytes data type + +Defines a data type for variable-length byte strings. + +## Permitted fill values + +The value of the `fill_value` metadata key must be an array of byte values. + +## Example + +For example, the array metadata below specifies that the array contains variable-length byte strings: + +```json +{ + "data_type": "bytes", + "fill_value": [1, 2, 3], + "codecs": [{ + "name": "vlen-bytes" + }], +} +``` + +## Notes + +Currently, this data type is only compatible with the [`"vlen-bytes"`](../../codecs/vlen-bytes/README.md) codec. + +## Change log + +No changes yet. + +## Current maintainers + +* [zarr-python core development team](https://github.com/orgs/zarr-developers/teams/python-core-devs) diff --git a/data-types/bytes/schema.json b/data-types/bytes/schema.json new file mode 100644 index 0000000..d9deb92 --- /dev/null +++ b/data-types/bytes/schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "bytes" + }, + "configuration": { + "type": "object", + "additionalProperties": false + } + }, + "required": ["name"], + "additionalProperties": false + }, + { "const": "bytes" } + ] +} diff --git a/data-types/string/README.md b/data-types/string/README.md new file mode 100644 index 0000000..3ee034f --- /dev/null +++ b/data-types/string/README.md @@ -0,0 +1,33 @@ +# String data type + +Defines a data type for variable-length UTF8 strings. + +## Permitted fill values + +The value of the `fill_value` metadata key must be unicode string. + +## Example + +For example, the array metadata below specifies that the array contains variable-length byte strings: + +```json +{ + "data_type": "string", + "fill_value": "foo", + "codecs": [{ + "name": "vlen-utf8" + }], +} +``` + +## Notes + +Currently, this data type is only compatible with the [`"vlen-utf8"`](../../codecs/vlen-utf8/README.md) codec. + +## Change log + +No changes yet. + +## Current maintainers + +* [zarr-python core development team](https://github.com/orgs/zarr-developers/teams/python-core-devs) diff --git a/data-types/string/schema.json b/data-types/string/schema.json new file mode 100644 index 0000000..58366e7 --- /dev/null +++ b/data-types/string/schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "string" + }, + "configuration": { + "type": "object", + "additionalProperties": false + } + }, + "required": ["name"], + "additionalProperties": false + }, + { "const": "string" } + ] +}