From c13784b19487f4a032e3178207e620d4084b2c5b Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Thu, 3 Apr 2025 17:03:10 -0400 Subject: [PATCH 1/7] Adds test that exception is thrown if bits ignored by padding are non-zero --- .../bson-binary-vector/tests/packed_bit.json | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/source/bson-binary-vector/tests/packed_bit.json b/source/bson-binary-vector/tests/packed_bit.json index a220e7e318..8823095632 100644 --- a/source/bson-binary-vector/tests/packed_bit.json +++ b/source/bson-binary-vector/tests/packed_bit.json @@ -14,30 +14,39 @@ { "description": "Simple Vector PACKED_BIT", "valid": true, - "vector": [127, 7], + "vector": [127, 8], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 0, - "canonical_bson": "1600000005766563746F7200040000000910007F0700" + "canonical_bson": "1600000005766563746F7200040000000910007F0800" }, { - "description": "Empty Vector PACKED_BIT", + "description": "PACKED_BIT with padding", "valid": true, - "vector": [], + "vector": [127, 8], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 0, - "canonical_bson": "1400000005766563746F72000200000009100000" + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000910037F0800" }, { - "description": "PACKED_BIT with padding", - "valid": true, + "description": "PACKED_BIT with inconsistent padding", + "valid": false, "vector": [127, 7], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 3, "canonical_bson": "1600000005766563746F7200040000000910037F0700" }, + { + "description": "Empty Vector PACKED_BIT", + "valid": true, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009100000" + }, { "description": "Overflow Vector PACKED_BIT", "valid": false, From 2d9565d8d073792dd38837a8a2b29fa17574d766 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Thu, 3 Apr 2025 19:08:27 -0400 Subject: [PATCH 2/7] Add line to Validation section: For a PACKED_BIT vector, bits lower the given padding (those ignored) must be zero. --- source/bson-binary-vector/bson-binary-vector.md | 1 + 1 file changed, 1 insertion(+) diff --git a/source/bson-binary-vector/bson-binary-vector.md b/source/bson-binary-vector/bson-binary-vector.md index 1b6fdaebbf..1e3e88bb7b 100644 --- a/source/bson-binary-vector/bson-binary-vector.md +++ b/source/bson-binary-vector/bson-binary-vector.md @@ -184,6 +184,7 @@ Drivers MUST validate vector metadata and raise an error if any invariant is vio - Padding MUST be 0 for all dtypes where padding doesn’t apply, and MUST be within \[0, 7\] for PACKED_BIT. - A PACKED_BIT vector MUST NOT be empty if padding is in the range \[1, 7\]. +- For a PACKED_BIT vector, bits lower the given padding (those ignored) must be zero. - When unpacking binary data into a FLOAT32 Vector structure, the length of the binary data following the dtype and padding MUST be a multiple of 4 bytes. From f270a25898198493aff0212c121ce71589643cd7 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Fri, 4 Apr 2025 15:50:16 -0400 Subject: [PATCH 3/7] Grammatical fix. --- source/bson-binary-vector/bson-binary-vector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/bson-binary-vector/bson-binary-vector.md b/source/bson-binary-vector/bson-binary-vector.md index 1e3e88bb7b..f900ad2eb4 100644 --- a/source/bson-binary-vector/bson-binary-vector.md +++ b/source/bson-binary-vector/bson-binary-vector.md @@ -184,7 +184,7 @@ Drivers MUST validate vector metadata and raise an error if any invariant is vio - Padding MUST be 0 for all dtypes where padding doesn’t apply, and MUST be within \[0, 7\] for PACKED_BIT. - A PACKED_BIT vector MUST NOT be empty if padding is in the range \[1, 7\]. -- For a PACKED_BIT vector, bits lower the given padding (those ignored) must be zero. +- For a PACKED_BIT vector, bits lower than the given padding (those ignored) must be zero. - When unpacking binary data into a FLOAT32 Vector structure, the length of the binary data following the dtype and padding MUST be a multiple of 4 bytes. From d7ccb030ae26d7efa75da818cde003bbf57ab865 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 8 Apr 2025 08:58:20 -0400 Subject: [PATCH 4/7] Updated changelog and FAQ --- source/bson-binary-vector/bson-binary-vector.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/bson-binary-vector/bson-binary-vector.md b/source/bson-binary-vector/bson-binary-vector.md index f900ad2eb4..b9fd8b37d4 100644 --- a/source/bson-binary-vector/bson-binary-vector.md +++ b/source/bson-binary-vector/bson-binary-vector.md @@ -238,16 +238,26 @@ See the [README](tests/README.md) for tests. ## FAQ - What MongoDB Server version does this apply to? + - Files in the "specifications" repository have no version scheme. They are not tied to a MongoDB server version. + - In PACKED_BIT, why would one choose to use integers in \[0, 256)? + - This follows a well-established precedent for packing binary-valued arrays into bytes (8 bits), This technique is widely used across different fields, such as data compression, communication protocols, and file formats, where you want to store or transmit binary data more efficiently by grouping 8 bits into a single byte (uint8). For an example in Python, see [numpy.unpackbits](https://numpy.org/doc/2.0/reference/generated/numpy.unpackbits.html#numpy.unpackbits). +- In PACKED_BIT, why are ignored bits required to be zero? + + - To ensure the same data representation has the same encoding. For drivers supporting comparison operations, this + avoids comparing different unused bits. + ## Changelog +- 2025-04-08: In PACKED_BIT vectors, bits lower than the given padding (those ignored) must be zero. + - 2025-03-07: Update tests to use Extended JSON representation of +/-Infinity. (DRIVERS-3095) - 2025-02-04: Update validation for decoding into a FLOAT32 vector. From ea265ea3ed2631c5979b0b74427a37453dca43b3 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 8 Apr 2025 08:59:06 -0400 Subject: [PATCH 5/7] Reverted change to Simple Vector PACKED_BIT test case --- source/bson-binary-vector/tests/packed_bit.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/bson-binary-vector/tests/packed_bit.json b/source/bson-binary-vector/tests/packed_bit.json index 8823095632..f3493e8017 100644 --- a/source/bson-binary-vector/tests/packed_bit.json +++ b/source/bson-binary-vector/tests/packed_bit.json @@ -14,7 +14,7 @@ { "description": "Simple Vector PACKED_BIT", "valid": true, - "vector": [127, 8], + "vector": [127, 7], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 0, From 87f75e1a6d0758c6033ceb62460b05d0f2c1f913 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 8 Apr 2025 17:18:38 -0400 Subject: [PATCH 6/7] canonical_bson sync for Simple Vector PACKED_BIT test case --- source/bson-binary-vector/tests/packed_bit.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/bson-binary-vector/tests/packed_bit.json b/source/bson-binary-vector/tests/packed_bit.json index f3493e8017..3015acba66 100644 --- a/source/bson-binary-vector/tests/packed_bit.json +++ b/source/bson-binary-vector/tests/packed_bit.json @@ -18,7 +18,7 @@ "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": 0, - "canonical_bson": "1600000005766563746F7200040000000910007F0800" + "canonical_bson": "1600000005766563746F7200040000000910007F0700" }, { "description": "PACKED_BIT with padding", From 9461d620c8f089fd5a75e6006c9ec6cd3bf9eb86 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 8 Apr 2025 17:25:00 -0400 Subject: [PATCH 7/7] Updated wording according to pull-request feedback --- source/bson-binary-vector/bson-binary-vector.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/bson-binary-vector/bson-binary-vector.md b/source/bson-binary-vector/bson-binary-vector.md index b9fd8b37d4..a6421de0b5 100644 --- a/source/bson-binary-vector/bson-binary-vector.md +++ b/source/bson-binary-vector/bson-binary-vector.md @@ -184,7 +184,7 @@ Drivers MUST validate vector metadata and raise an error if any invariant is vio - Padding MUST be 0 for all dtypes where padding doesn’t apply, and MUST be within \[0, 7\] for PACKED_BIT. - A PACKED_BIT vector MUST NOT be empty if padding is in the range \[1, 7\]. -- For a PACKED_BIT vector, bits lower than the given padding (those ignored) must be zero. +- For a PACKED_BIT vector, ignored bits must be zero. - When unpacking binary data into a FLOAT32 Vector structure, the length of the binary data following the dtype and padding MUST be a multiple of 4 bytes. @@ -256,7 +256,7 @@ See the [README](tests/README.md) for tests. ## Changelog -- 2025-04-08: In PACKED_BIT vectors, bits lower than the given padding (those ignored) must be zero. +- 2025-04-08: In PACKED_BIT vectors, ignored bits must be zero. - 2025-03-07: Update tests to use Extended JSON representation of +/-Infinity. (DRIVERS-3095)