Skip to content

Commit 6f034f2

Browse files
committed
Add chunked base64 encoding
1 parent 1ca77f7 commit 6f034f2

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

transform/encode.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"encoding/base64"
77
"encoding/binary"
88
"io"
9+
"slices"
910
"strconv"
1011
"strings"
1112

@@ -44,6 +45,68 @@ func EncodeBase64URL(s string) string {
4445
return base64.URLEncoding.EncodeToString([]byte(s))
4546
}
4647

48+
// EncodeBase64Chunks creates a slice of maxiumum size base64 strings, where the maxChunkSize is
49+
// the calculated base64 chunk size and not of the original data. This is useful when you know you
50+
// need to send some data in a chunked manner and the target contains a data size restriction, but
51+
// you do not want to guess at the size of encoded data.
52+
//
53+
// If a chunk size maximum is requested that is larger than the encoded string will be the only
54+
// chunk.
55+
func EncodeBase64Chunks(s string, maxChunkSize uint) []string {
56+
// An example helps demonstrate why this is useful. Take the following string:
57+
//
58+
// 1234567890123456789012345678901234567890
59+
//
60+
// If you need to send this data to a target and the target limits you to 10 bytes of
61+
// base64 data, you cannot just split the string of base64 if the parser is strict, and you
62+
// also can't just split the raw data before encoding in a predictable manner due to
63+
// padding. For example:
64+
//
65+
// $ printf '1234567890' | base64 -w0 | wc -c
66+
// 16
67+
//
68+
// 1/3 is often stated, but misses padding, as you can see in the 10-10/3 example:
69+
//
70+
// $ printf '1234567' | base64 -w0 | wc -c
71+
// 12
72+
//
73+
// The optimal size is actually to ensure the block smaller fits, which 1234, 12345, and
74+
// 123456 all fit into. This means the optimal fit for the first block to use the most
75+
// space possible is 123456:
76+
//
77+
// $ printf '123456' | base64 -w0 | wc -c
78+
// 8
79+
//
80+
// While the n/3-1 rule works for most cases of pre-base64 encoded data, there is the need
81+
// to ensure you minimize requests by figuring out what the best block size is. That's what
82+
// all *this* (hand waving) does.
83+
84+
// corner case, fail exit early
85+
if len(s) == 0 {
86+
return []string{}
87+
}
88+
// calculate the maximum base64 size with padding
89+
maxSize := func(n int) int {
90+
return (((4 * n / 3) + 3) & ^3)
91+
}
92+
// start with a chunk size that is 2/3 the size and subtract one, this normally gives the
93+
// closest fit, but because of "computer numbers" rounding can be iffy so this ensures that
94+
// the chunk size calculation iterates to the best block size.
95+
chunkSize := (len(s) / int(maxChunkSize)) - (len(s) / int(maxChunkSize) / 3) - 1
96+
for {
97+
if maxSize(chunkSize) >= int(maxChunkSize) {
98+
chunkSize--
99+
break
100+
}
101+
chunkSize++
102+
}
103+
var chunks []string
104+
for c := range slices.Chunk([]byte(s), chunkSize) {
105+
chunks = append(chunks, base64.StdEncoding.EncodeToString([]byte(c)))
106+
}
107+
return chunks
108+
}
109+
47110
// DecodeBase64 decodes base64 with standard encoding.
48111
func DecodeBase64(s string) string {
49112
decoded, err := base64.StdEncoding.DecodeString(s)

transform/encode_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,33 @@ func TestEncodeBase64(t *testing.T) {
2828
t.Log(encoded)
2929
}
3030

31+
func TestEncodeBase64Chunks(t *testing.T) {
32+
chunks := EncodeBase64Chunks("1234567890123456789012345678901234567890", 10)
33+
expected := []string{"MTIzNDU2", "Nzg5MDEy", "MzQ1Njc4", "OTAxMjM0", "NTY3ODkw", "MTIzNDU2", "Nzg5MA=="}
34+
for i, c := range chunks {
35+
if c != expected[i] {
36+
t.Fatal(chunks)
37+
}
38+
}
39+
40+
t.Log(chunks)
41+
}
42+
43+
func TestEncodeBase64Chunks_EmptyString(t *testing.T) {
44+
chunks := EncodeBase64Chunks("", 10)
45+
if len(chunks) != 0 {
46+
t.Fatal(len(""))
47+
}
48+
}
49+
50+
func TestEncodeBase64Chunks_SmallerThanMaxsize(t *testing.T) {
51+
chunks := EncodeBase64Chunks("a", 10)
52+
if chunks[0] != "YQ==" || len(chunks) != 1 {
53+
t.Fatal(chunks)
54+
}
55+
t.Log(chunks)
56+
}
57+
3158
func TestEncodeBase64URL(t *testing.T) {
3259
encoded := EncodeBase64URL(urlTestString)
3360

0 commit comments

Comments
 (0)