Skip to content

Commit e2d01c0

Browse files
authored
Merge pull request #277 from vulncheck-oss/transform/chunked-b64
Adds `EncodeBase64Chunks` string chunk generation
2 parents cbdb783 + 250e9db commit e2d01c0

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

transform/encode.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"encoding/base64"
77
"encoding/binary"
88
"io"
9+
"slices"
910
"strconv"
1011
"strings"
1112

@@ -44,6 +45,70 @@ func EncodeBase64URL(s string) string {
4445
return base64.URLEncoding.EncodeToString([]byte(s))
4546
}
4647

48+
// EncodeBase64Chunks creates a slice of maxiumum size base64 strings, where the maxChunkSize is
49+
// the calculated base64 chunk size and not of the original data. This is useful when you know you
50+
// need to send some data in a chunked manner and the target contains a data size restriction, but
51+
// you do not want to guess at the size of encoded data.
52+
//
53+
// If a chunk size maximum is requested that is larger than the encoded string will be the only
54+
// chunk.
55+
func EncodeBase64Chunks(s string, maxChunkSize uint) []string {
56+
// An example helps demonstrate why this is useful. Take the following string:
57+
//
58+
// 1234567890123456789012345678901234567890
59+
//
60+
// If you need to send this data to a target and the target limits you to 10 bytes of
61+
// base64 data, you cannot just split the string of base64 if the parser is strict, and you
62+
// also can't just split the raw data before encoding in a predictable manner due to
63+
// padding. For example:
64+
//
65+
// $ printf '1234567890' | base64 -w0 | wc -c
66+
// 16
67+
//
68+
// 1/3 is often stated, but misses padding, as you can see in the 10-10/3 example:
69+
//
70+
// $ printf '1234567' | base64 -w0 | wc -c
71+
// 12
72+
//
73+
// The optimal size is actually to ensure the block smaller fits, which 1234, 12345, and
74+
// 123456 all fit into. This means the optimal fit for the first block to use the most
75+
// space possible is 123456:
76+
//
77+
// $ printf '123456' | base64 -w0 | wc -c
78+
// 8
79+
//
80+
// While the n/3-1 rule works for most cases of pre-base64 encoded data, there is the need
81+
// to ensure you minimize requests by figuring out what the best block size is. That's what
82+
// all *this* (hand waving) does.
83+
84+
// corner case, fail exit early
85+
if len(s) == 0 {
86+
return []string{}
87+
}
88+
// calculate the maximum base64 size with padding
89+
maxSize := func(n int) int {
90+
return (((4 * n / 3) + 3) & ^3)
91+
}
92+
// start with a chunk size that is 2/3 the size and subtract one, this normally gives the
93+
// closest fit, but because of "computer numbers" rounding can be iffy so this ensures that
94+
// the chunk size calculation iterates to the best block size.
95+
chunkSize := (len(s) / int(maxChunkSize)) - (len(s) / int(maxChunkSize) / 3) - 1
96+
for {
97+
if maxSize(chunkSize) > int(maxChunkSize) {
98+
chunkSize--
99+
100+
break
101+
}
102+
chunkSize++
103+
}
104+
chunks := []string{}
105+
for c := range slices.Chunk([]byte(s), chunkSize) {
106+
chunks = append(chunks, base64.StdEncoding.EncodeToString(c))
107+
}
108+
109+
return chunks
110+
}
111+
47112
// DecodeBase64 decodes base64 with standard encoding.
48113
func DecodeBase64(s string) string {
49114
decoded, err := base64.StdEncoding.DecodeString(s)

transform/encode_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,52 @@ func TestEncodeBase64(t *testing.T) {
2828
t.Log(encoded)
2929
}
3030

31+
func TestEncodeBase64Chunks(t *testing.T) {
32+
chunks := EncodeBase64Chunks("1234567890123456789012345678901234567890", 10)
33+
expected := []string{"MTIzNDU2", "Nzg5MDEy", "MzQ1Njc4", "OTAxMjM0", "NTY3ODkw", "MTIzNDU2", "Nzg5MA=="}
34+
for i, c := range chunks {
35+
if c != expected[i] {
36+
t.Fatal(chunks)
37+
}
38+
}
39+
40+
t.Log(chunks)
41+
42+
chunks = EncodeBase64Chunks("1234567890123456789012345678901234567890", 12)
43+
expected = []string{"MTIzNDU2Nzg5", "MDEyMzQ1Njc4", "OTAxMjM0NTY3", "ODkwMTIzNDU2", "Nzg5MA=="}
44+
for i, c := range chunks {
45+
if c != expected[i] {
46+
t.Fatal(chunks)
47+
}
48+
}
49+
50+
t.Log(chunks)
51+
52+
chunks = EncodeBase64Chunks("1234567890123456789012345678901234567890", 13)
53+
for i, c := range chunks {
54+
if c != expected[i] {
55+
t.Fatal(chunks)
56+
}
57+
}
58+
59+
t.Log(chunks)
60+
}
61+
62+
func TestEncodeBase64Chunks_EmptyString(t *testing.T) {
63+
chunks := EncodeBase64Chunks("", 10)
64+
if len(chunks) != 0 {
65+
t.Fatal(len(""))
66+
}
67+
}
68+
69+
func TestEncodeBase64Chunks_SmallerThanMaxsize(t *testing.T) {
70+
chunks := EncodeBase64Chunks("a", 10)
71+
if chunks[0] != "YQ==" || len(chunks) != 1 {
72+
t.Fatal(chunks)
73+
}
74+
t.Log(chunks)
75+
}
76+
3177
func TestEncodeBase64URL(t *testing.T) {
3278
encoded := EncodeBase64URL(urlTestString)
3379

0 commit comments

Comments
 (0)