Merge pull request #277 from vulncheck-oss/transform/chunked-b64

j-baines · web-flow · commit e2d01c007bea · 2024-11-22T03:58:54.000-05:00
Adds `EncodeBase64Chunks` string chunk generation
diff --git a/transform/encode.go b/transform/encode.go
@@ -6,6 +6,7 @@ import (
 	"encoding/base64"
 	"encoding/binary"
 	"io"
+	"slices"
 	"strconv"
 	"strings"
 
@@ -44,6 +45,70 @@ func EncodeBase64URL(s string) string {
 	return base64.URLEncoding.EncodeToString([]byte(s))
 }
 
+// EncodeBase64Chunks creates a slice of maxiumum size base64 strings, where the maxChunkSize is
+// the calculated base64 chunk size and not of the original data. This is useful when you know you
+// need to send some data in a chunked manner and the target contains a data size restriction, but
+// you do not want to guess at the size of encoded data.
+//
+// If a chunk size maximum is requested that is larger than the encoded string will be the only
+// chunk.
+func EncodeBase64Chunks(s string, maxChunkSize uint) []string {
+	// An example helps demonstrate why this is useful. Take the following string:
+	//
+	// 1234567890123456789012345678901234567890
+	//
+	// If you need to send this data to a target and the target limits you to 10 bytes of
+	// base64 data, you cannot just split the string of base64 if the parser is strict, and you
+	// also can't just split the raw data before encoding in a predictable manner due to
+	// padding. For example:
+	//
+	// $ printf '1234567890' | base64 -w0 | wc -c
+	// 16
+	//
+	// 1/3 is often stated, but misses padding, as you can see in the 10-10/3 example:
+	//
+	// $ printf '1234567' | base64 -w0 | wc -c
+	// 12
+	//
+	// The optimal size is actually to ensure the block smaller fits, which 1234, 12345, and
+	// 123456 all fit into. This means the optimal fit for the first block to use the most
+	// space possible is 123456:
+	//
+	// $ printf '123456' | base64 -w0 | wc -c
+	// 8
+	//
+	// While the n/3-1 rule works for most cases of pre-base64 encoded data, there is the need
+	// to ensure you minimize requests by figuring out what the best block size is. That's what
+	// all *this* (hand waving) does.
+
+	// corner case, fail exit early
+	if len(s) == 0 {
+		return []string{}
+	}
+	// calculate the maximum base64 size with padding
+	maxSize := func(n int) int {
+		return (((4 * n / 3) + 3) & ^3)
+	}
+	// start with a chunk size that is 2/3 the size and subtract one, this normally gives the
+	// closest fit, but because of "computer numbers" rounding can be iffy so this ensures that
+	// the chunk size calculation iterates to the best block size.
+	chunkSize := (len(s) / int(maxChunkSize)) - (len(s) / int(maxChunkSize) / 3) - 1
+	for {
+		if maxSize(chunkSize) > int(maxChunkSize) {
+			chunkSize--
+
+			break
+		}
+		chunkSize++
+	}
+	chunks := []string{}
+	for c := range slices.Chunk([]byte(s), chunkSize) {
+		chunks = append(chunks, base64.StdEncoding.EncodeToString(c))
+	}
+
+	return chunks
+}
+
 // DecodeBase64 decodes base64 with standard encoding.
 func DecodeBase64(s string) string {
 	decoded, err := base64.StdEncoding.DecodeString(s)
diff --git a/transform/encode_test.go b/transform/encode_test.go
@@ -28,6 +28,52 @@ func TestEncodeBase64(t *testing.T) {
 	t.Log(encoded)
 }
 
+func TestEncodeBase64Chunks(t *testing.T) {
+	chunks := EncodeBase64Chunks("1234567890123456789012345678901234567890", 10)
+	expected := []string{"MTIzNDU2", "Nzg5MDEy", "MzQ1Njc4", "OTAxMjM0", "NTY3ODkw", "MTIzNDU2", "Nzg5MA=="}
+	for i, c := range chunks {
+		if c != expected[i] {
+			t.Fatal(chunks)
+		}
+	}
+
+	t.Log(chunks)
+
+	chunks = EncodeBase64Chunks("1234567890123456789012345678901234567890", 12)
+	expected = []string{"MTIzNDU2Nzg5", "MDEyMzQ1Njc4", "OTAxMjM0NTY3", "ODkwMTIzNDU2", "Nzg5MA=="}
+	for i, c := range chunks {
+		if c != expected[i] {
+			t.Fatal(chunks)
+		}
+	}
+
+	t.Log(chunks)
+
+	chunks = EncodeBase64Chunks("1234567890123456789012345678901234567890", 13)
+	for i, c := range chunks {
+		if c != expected[i] {
+			t.Fatal(chunks)
+		}
+	}
+
+	t.Log(chunks)
+}
+
+func TestEncodeBase64Chunks_EmptyString(t *testing.T) {
+	chunks := EncodeBase64Chunks("", 10)
+	if len(chunks) != 0 {
+		t.Fatal(len(""))
+	}
+}
+
+func TestEncodeBase64Chunks_SmallerThanMaxsize(t *testing.T) {
+	chunks := EncodeBase64Chunks("a", 10)
+	if chunks[0] != "YQ==" || len(chunks) != 1 {
+		t.Fatal(chunks)
+	}
+	t.Log(chunks)
+}
+
 func TestEncodeBase64URL(t *testing.T) {
 	encoded := EncodeBase64URL(urlTestString)