Skip to content

Commit 406eb38

Browse files
richard-vineylpil
authored andcommitted
Add bit_array.pad_to_bytes. Pad bit arrays when encoding and in bytes_tree.
1 parent 4411f58 commit 406eb38

File tree

6 files changed

+206
-25
lines changed

6 files changed

+206
-25
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55
- The performance of `string.trim`, `string.trim_start`, and `string.trim_end`
66
has been improved on JavaScript.
7+
- The `base64_encode`, `base64_url_encode`, and `base16_encode` functions in the
8+
`bit_array` module no longer throw an exception when called with a bit array
9+
which is not a whole number of bytes. Instead, the bit array is now padded
10+
with zero bits prior to being encoded.
11+
- The `bit_array` module gains the `pad_to_bytes` function.
12+
- The `bytes_tree` module now pads unaligned bit arrays with zeros when they are
13+
added to the tree.
714

815
## v0.44.0 - 2024-11-25
916

src/gleam/bit_array.gleam

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ pub fn bit_size(x: BitArray) -> Int {
2323
@external(javascript, "../gleam_stdlib.mjs", "length")
2424
pub fn byte_size(x: BitArray) -> Int
2525

26+
/// Pads a bit array with zeros so that it is a whole number of bytes.
27+
///
28+
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
29+
pub fn pad_to_bytes(x: BitArray) -> BitArray {
30+
x
31+
}
32+
2633
/// Creates a new bit array by joining two bit arrays.
2734
///
2835
/// ## Examples
@@ -104,6 +111,9 @@ fn unsafe_to_string(a: BitArray) -> String
104111
pub fn concat(bit_arrays: List(BitArray)) -> BitArray
105112

106113
/// Encodes a BitArray into a base 64 encoded string.
114+
///
115+
/// If the bit array does not contain a whole number of bytes then it is padded
116+
/// with zero bits prior to being encoded.
107117
///
108118
@external(erlang, "gleam_stdlib", "bit_array_base64_encode")
109119
@external(javascript, "../gleam_stdlib.mjs", "encode64")
@@ -123,15 +133,20 @@ pub fn base64_decode(encoded: String) -> Result(BitArray, Nil) {
123133
@external(javascript, "../gleam_stdlib.mjs", "decode64")
124134
fn decode64(a: String) -> Result(BitArray, Nil)
125135

126-
/// Encodes a `BitArray` into a base 64 encoded string with URL and filename safe alphabet.
136+
/// Encodes a `BitArray` into a base 64 encoded string with URL and filename
137+
/// safe alphabet.
138+
///
139+
/// If the bit array does not contain a whole number of bytes then it is padded
140+
/// with zero bits prior to being encoded.
127141
///
128142
pub fn base64_url_encode(input: BitArray, padding: Bool) -> String {
129143
base64_encode(input, padding)
130144
|> string.replace("+", "-")
131145
|> string.replace("/", "_")
132146
}
133147

134-
/// Decodes a base 64 encoded string with URL and filename safe alphabet into a `BitArray`.
148+
/// Decodes a base 64 encoded string with URL and filename safe alphabet into a
149+
/// `BitArray`.
135150
///
136151
pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) {
137152
encoded
@@ -140,10 +155,17 @@ pub fn base64_url_decode(encoded: String) -> Result(BitArray, Nil) {
140155
|> base64_decode()
141156
}
142157

143-
@external(erlang, "binary", "encode_hex")
158+
/// Encodes a `BitArray` into a base 16 encoded string.
159+
///
160+
/// If the bit array does not contain a whole number of bytes then it is padded
161+
/// with zero bits prior to being encoded.
162+
///
163+
@external(erlang, "gleam_stdlib", "base16_encode")
144164
@external(javascript, "../gleam_stdlib.mjs", "base16_encode")
145165
pub fn base16_encode(input: BitArray) -> String
146166

167+
/// Decodes a base 16 encoded string into a `BitArray`.
168+
///
147169
@external(erlang, "gleam_stdlib", "base16_decode")
148170
@external(javascript, "../gleam_stdlib.mjs", "base16_decode")
149171
pub fn base16_decode(input: String) -> Result(BitArray, Nil)

src/gleam/bytes_tree.gleam

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
////
2020
//// On Erlang this type is compatible with Erlang's iolists.
2121

22-
// TODO: pad bit arrays to byte boundaries when adding to a tree.
2322
import gleam/bit_array
2423
import gleam/list
2524
import gleam/string_tree.{type StringTree}
@@ -104,7 +103,6 @@ pub fn concat(trees: List(BytesTree)) -> BytesTree {
104103
///
105104
/// Runs in constant time.
106105
///
107-
@external(erlang, "gleam_stdlib", "identity")
108106
pub fn concat_bit_arrays(bits: List(BitArray)) -> BytesTree {
109107
bits
110108
|> list.map(fn(b) { from_bit_array(b) })
@@ -135,8 +133,14 @@ pub fn from_string_tree(tree: string_tree.StringTree) -> BytesTree {
135133
///
136134
/// Runs in constant time.
137135
///
138-
@external(erlang, "gleam_stdlib", "wrap_list")
139136
pub fn from_bit_array(bits: BitArray) -> BytesTree {
137+
bits
138+
|> bit_array.pad_to_bytes
139+
|> wrap_list
140+
}
141+
142+
@external(erlang, "gleam_stdlib", "wrap_list")
143+
fn wrap_list(bits: BitArray) -> BytesTree {
140144
Bytes(bits)
141145
}
142146

src/gleam_stdlib.erl

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
decode_float/1, decode_list/1, decode_option/2, decode_field/2, parse_int/1,
66
parse_float/1, less_than/2, string_pop_grapheme/1, string_pop_codeunit/1,
77
string_starts_with/2, wrap_list/1, string_ends_with/2, string_pad/4,
8-
decode_map/1, uri_parse/1, bit_array_int_to_u32/1, bit_array_int_from_u32/1,
8+
decode_map/1, uri_parse/1,
99
decode_result/1, bit_array_slice/3, decode_bit_array/1, compile_regex/2,
1010
regex_scan/2, percent_encode/1, percent_decode/1, regex_check/2,
1111
regex_split/2, base_decode64/1, parse_query/1, bit_array_concat/1,
@@ -14,8 +14,8 @@
1414
tuple_get/2, classify_dynamic/1, print/1, println/1, print_error/1,
1515
println_error/1, inspect/1, float_to_string/1, int_from_base_string/2,
1616
utf_codepoint_list_to_string/1, contains_string/2, crop_string/2,
17-
base16_decode/1, string_replace/3, regex_replace/3, slice/3,
18-
bit_array_to_int_and_size/1
17+
base16_encode/1, base16_decode/1, string_replace/3, regex_replace/3,
18+
slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1
1919
]).
2020

2121
%% Taken from OTP's uri_string module
@@ -207,12 +207,21 @@ string_pop_grapheme(String) ->
207207
string_pop_codeunit(<<Cp/integer, Rest/binary>>) -> {Cp, Rest};
208208
string_pop_codeunit(Binary) -> {0, Binary}.
209209

210+
bit_array_pad_to_bytes(Bin) ->
211+
case erlang:bit_size(Bin) rem 8 of
212+
0 -> Bin;
213+
TrailingBits ->
214+
PaddingBits = 8 - TrailingBits,
215+
<<Bin/bits, 0:PaddingBits>>
216+
end.
217+
210218
bit_array_concat(BitArrays) ->
211219
list_to_bitstring(BitArrays).
212220

213221
-if(?OTP_RELEASE >= 26).
214222
bit_array_base64_encode(Bin, Padding) ->
215-
base64:encode(Bin, #{padding => Padding}).
223+
PaddedBin = bit_array_pad_to_bytes(Bin),
224+
base64:encode(PaddedBin, #{padding => Padding}).
216225
-else.
217226
bit_array_base64_encode(_Bin, _Padding) ->
218227
erlang:error(<<"Erlang OTP/26 or higher is required to use base64:encode">>).
@@ -223,16 +232,6 @@ bit_array_slice(Bin, Pos, Len) ->
223232
catch error:badarg -> {error, nil}
224233
end.
225234

226-
bit_array_int_to_u32(I) when 0 =< I, I < 4294967296 ->
227-
{ok, <<I:32>>};
228-
bit_array_int_to_u32(_) ->
229-
{error, nil}.
230-
231-
bit_array_int_from_u32(<<I:32>>) ->
232-
{ok, I};
233-
bit_array_int_from_u32(_) ->
234-
{error, nil}.
235-
236235
compile_regex(String, Options) ->
237236
{options, Caseless, Multiline} = Options,
238237
OptionsList = [
@@ -552,6 +551,10 @@ crop_string(String, Prefix) ->
552551
contains_string(String, Substring) ->
553552
is_bitstring(string:find(String, Substring)).
554553

554+
base16_encode(Bin) ->
555+
PaddedBin = bit_array_pad_to_bytes(Bin),
556+
binary:encode_hex(PaddedBin).
557+
555558
base16_decode(String) ->
556559
try
557560
{ok, binary:decode_hex(String)}

test/gleam/bit_array_test.gleam

Lines changed: 126 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,50 @@ pub fn bit_size_erlang_only_test() {
3636
}
3737

3838
pub fn byte_size_test() {
39-
bit_array.byte_size(bit_array.from_string("hello"))
39+
bit_array.byte_size(<<>>)
40+
|> should.equal(0)
41+
42+
bit_array.byte_size(<<0, 1, 2, 3, 4>>)
4043
|> should.equal(5)
44+
}
4145

42-
bit_array.byte_size(bit_array.from_string(""))
43-
|> should.equal(0)
46+
// This test is target specific since it's using non byte-aligned BitArrays
47+
// and those are not supported on the JavaScript target.
48+
@target(erlang)
49+
pub fn byte_size_erlang_only_test() {
50+
bit_array.byte_size(<<1, 2, 3:6>>)
51+
|> should.equal(3)
52+
}
53+
54+
pub fn pad_to_bytes_test() {
55+
<<>>
56+
|> bit_array.pad_to_bytes
57+
|> should.equal(<<>>)
58+
59+
<<0xAB>>
60+
|> bit_array.pad_to_bytes
61+
|> should.equal(<<0xAB>>)
62+
63+
<<0xAB, 0x12>>
64+
|> bit_array.pad_to_bytes
65+
|> should.equal(<<0xAB, 0x12>>)
66+
}
67+
68+
// This test is target specific since it's using non byte-aligned BitArrays
69+
// and those are not supported on the JavaScript target.
70+
@target(erlang)
71+
pub fn pad_to_bytes_erlang_only_test() {
72+
<<1:1>>
73+
|> bit_array.pad_to_bytes
74+
|> should.equal(<<0x80>>)
75+
76+
<<-1:7>>
77+
|> bit_array.pad_to_bytes
78+
|> should.equal(<<0xFE>>)
79+
80+
<<0xAB, 0x12, 3:3>>
81+
|> bit_array.pad_to_bytes
82+
|> should.equal(<<0xAB, 0x12, 0x60>>)
4483
}
4584

4685
pub fn not_equal_test() {
@@ -85,9 +124,25 @@ pub fn concat_test() {
85124
// and those are not supported on the JavaScript target.
86125
@target(erlang)
87126
pub fn concat_erlang_only_test() {
127+
[<<-1:32>>, <<0:1>>, <<0:0>>]
128+
|> bit_array.concat
129+
|> should.equal(<<255, 255, 255, 255, 0:1>>)
130+
131+
[<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>]
132+
|> bit_array.concat
133+
|> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>)
134+
88135
[<<1, 2:4>>, <<3>>]
89136
|> bit_array.concat
90137
|> should.equal(<<1, 2:4, 3>>)
138+
139+
[<<-1:32>>, <<0:1>>, <<0:0>>]
140+
|> bit_array.concat
141+
|> should.equal(<<255, 255, 255, 255, 0:1>>)
142+
143+
[<<-20:6, 2>>, <<3:4>>, <<7:3>>, <<-1:64>>]
144+
|> bit_array.concat
145+
|> should.equal(<<176, 8, 255, 255, 255, 255, 255, 255, 255, 255, 31:size(5)>>)
91146
}
92147

93148
pub fn slice_test() {
@@ -133,6 +188,19 @@ pub fn slice_test() {
133188
|> should.equal(Ok(<<"b":utf8>>))
134189
}
135190

191+
// This test is target specific since it's using non byte-aligned BitArrays
192+
// and those are not supported on the JavaScript target.
193+
@target(erlang)
194+
pub fn slice_erlang_onyl_test() {
195+
<<0, 1, 2:7>>
196+
|> bit_array.slice(0, 3)
197+
|> should.equal(Error(Nil))
198+
199+
<<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>>
200+
|> bit_array.slice(8, 12)
201+
|> should.equal(Error(Nil))
202+
}
203+
136204
pub fn to_string_test() {
137205
<<>>
138206
|> bit_array.to_string
@@ -155,6 +223,15 @@ pub fn to_string_test() {
155223
|> should.equal(Error(Nil))
156224
}
157225

226+
// This test is target specific since it's using non byte-aligned BitArrays
227+
// and those are not supported on the JavaScript target.
228+
@target(erlang)
229+
pub fn to_string_erlang_only_test() {
230+
<<"ø":utf8, 50:4>>
231+
|> bit_array.to_string
232+
|> should.equal(Error(Nil))
233+
}
234+
158235
pub fn is_utf8_test() {
159236
<<>>
160237
|> bit_array.is_utf8
@@ -207,6 +284,23 @@ pub fn base64_encode_test() {
207284
))
208285
}
209286

287+
// This test is target specific since it's using non byte-aligned BitArrays
288+
// and those are not supported on the JavaScript target.
289+
@target(erlang)
290+
pub fn base64_erlang_only_encode_test() {
291+
<<-1:7>>
292+
|> bit_array.base64_encode(True)
293+
|> should.equal("/g==")
294+
295+
<<0xFA, 5:3>>
296+
|> bit_array.base64_encode(True)
297+
|> should.equal("+qA=")
298+
299+
<<0xFA, 0xBC, 0x6D, 1:1>>
300+
|> bit_array.base64_encode(True)
301+
|> should.equal("+rxtgA==")
302+
}
303+
210304
pub fn base64_decode_test() {
211305
"/3/+/A=="
212306
|> bit_array.base64_decode()
@@ -305,6 +399,27 @@ pub fn base16_test() {
305399
|> should.equal("A1B2C3D4E5F67891")
306400
}
307401

402+
// This test is target specific since it's using non byte-aligned BitArrays
403+
// and those are not supported on the JavaScript target.
404+
@target(erlang)
405+
pub fn base16_encode_erlang_only_test() {
406+
<<-1:7>>
407+
|> bit_array.base16_encode()
408+
|> should.equal("FE")
409+
410+
<<0xFA, 5:3>>
411+
|> bit_array.base16_encode()
412+
|> should.equal("FAA0")
413+
414+
<<0xFA, 5:4>>
415+
|> bit_array.base16_encode()
416+
|> should.equal("FA50")
417+
418+
<<0xFA, 0xBC, 0x6D, 1:1>>
419+
|> bit_array.base16_encode()
420+
|> should.equal("FABC6D80")
421+
}
422+
308423
pub fn base16_decode_test() {
309424
bit_array.base16_decode("")
310425
|> should.equal(Ok(<<>>))
@@ -353,7 +468,7 @@ pub fn inspect_test() {
353468
// This test is target specific since it's using non byte-aligned BitArrays
354469
// and those are not supported on the JavaScript target.
355470
@target(erlang)
356-
pub fn inspect_partial_bytes_test() {
471+
pub fn inspect_erlang_only_test() {
357472
bit_array.inspect(<<4:5>>)
358473
|> should.equal("<<4:size(5)>>")
359474

@@ -365,7 +480,7 @@ pub fn inspect_partial_bytes_test() {
365480
}
366481

367482
@target(erlang)
368-
pub fn compare_different_sizes_test() {
483+
pub fn compare_test() {
369484
bit_array.compare(<<4:5>>, <<4:5>>)
370485
|> should.equal(order.Eq)
371486

@@ -458,4 +573,10 @@ pub fn starts_with_erlang_only_test() {
458573

459574
bit_array.starts_with(<<0:127>>, <<1:127>>)
460575
|> should.be_false
576+
577+
bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 1:1>>)
578+
|> should.be_true
579+
580+
bit_array.starts_with(<<0xFF, 0x81>>, <<0xFF, 0:1>>)
581+
|> should.be_false
461582
}

0 commit comments

Comments
 (0)