Skip to content

Commit ef2377a

Browse files
authored
Merge pull request #23 from hatamiarash7/add-b64-decode-encode
Add `base64_encode` / `base64_decode` functions
2 parents 6b68513 + 1d8cb68 commit ef2377a

File tree

8 files changed

+648
-1
lines changed

8 files changed

+648
-1
lines changed

README.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Table of Contents
3838
- [IP to Integer / Integer to IP](#ip-to-integer--integer-to-ip)
3939
- [Normalize URL](#normalize-url)
4040
- [Domain Depth](#domain-depth)
41+
- [Base64 Encode / Decode](#base64-encode--decode)
4142
- [Get Extension Version](#get-extension-version)
4243
- [Build Requirements](#build-requirements)
4344
- [Debugging](#debugging)
@@ -700,6 +701,36 @@ D SELECT domain_depth('http://a.b.c.example.co.uk/page') AS depth;
700701
└───────┘
701702
```
702703

704+
### Base64 Encode / Decode
705+
706+
The `base64_encode` function encodes a string into Base64 format. The `base64_decode` function decodes a Base64-encoded string back to its original form.
707+
708+
```sql
709+
D SELECT base64_encode('Hello World') AS encoded;
710+
┌──────────────────┐
711+
│ encoded │
712+
varchar
713+
├──────────────────┤
714+
│ SGVsbG8gV29ybGQ=
715+
└──────────────────┘
716+
717+
D SELECT base64_decode('SGVsbG8gV29ybGQ=') AS decoded;
718+
┌─────────────┐
719+
│ decoded │
720+
varchar
721+
├─────────────┤
722+
│ Hello World │
723+
└─────────────┘
724+
725+
D SELECT base64_decode(base64_encode('https://example.com')) AS roundtrip;
726+
┌─────────────────────┐
727+
│ roundtrip │
728+
varchar
729+
├─────────────────────┤
730+
│ https://example.com
731+
└─────────────────────┘
732+
```
733+
703734
### Get Extension Version
704735

705736
You can use the `netquack_version` function to get the extension version.
@@ -748,7 +779,6 @@ Also, there will be stdout errors for background tasks like CURL.
748779
- [ ] Support internationalized domain names (IDNs)
749780
- [ ] Implement `punycode_encode` / `punycode_decode` functions - Convert internationalized domain names to/from ASCII-compatible encoding
750781
- [ ] Implement `is_valid_domain` function - Validate a domain name against RFC rules
751-
- [ ] Implement `base64_encode` / `base64_decode` functions - Encode and decode Base64 strings
752782
- [ ] Implement `extract_path_segments` table function - Split a URL path into individual segment rows
753783

754784
## Contributing 🤝

docs/SUMMARY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
* [Extract Fragment](functions/extract-fragment.md)
2424
* [Normalize URL](functions/normalize-url.md)
2525
* [Domain Depth](functions/domain-depth.md)
26+
* [Base64 Encode / Decode](functions/base64-functions.md)
2627
* [Tranco](functions/tranco/README.md)
2728
* [Get Tranco Rank](functions/tranco/get-tranco-rank.md)
2829
* [Download / Update Tranco](functions/tranco/download-update-tranco.md)

docs/functions/base64-functions.md

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
---
2+
layout:
3+
title:
4+
visible: true
5+
description:
6+
visible: false
7+
tableOfContents:
8+
visible: true
9+
outline:
10+
visible: true
11+
pagination:
12+
visible: true
13+
---
14+
15+
# Base64 Encode / Decode
16+
17+
The `base64_encode` function encodes a string into Base64 format. The `base64_decode` function decodes a Base64-encoded string back to its original form.
18+
19+
## base64\_encode
20+
21+
```sql
22+
D SELECT base64_encode('Hello World') AS encoded;
23+
┌──────────────────┐
24+
│ encoded │
25+
varchar
26+
├──────────────────┤
27+
│ SGVsbG8gV29ybGQ=
28+
└──────────────────┘
29+
```
30+
31+
```sql
32+
D SELECT base64_encode('https://example.com/path?q=1') AS encoded;
33+
┌──────────────────────────────────────────┐
34+
│ encoded │
35+
varchar
36+
├──────────────────────────────────────────┤
37+
│ aHR0cHM6Ly9leGFtcGxlLmNvbS9wYXRoP3E9MQ==
38+
└──────────────────────────────────────────┘
39+
```
40+
41+
## base64\_decode
42+
43+
```sql
44+
D SELECT base64_decode('SGVsbG8gV29ybGQ=') AS decoded;
45+
┌─────────────┐
46+
│ decoded │
47+
varchar
48+
├─────────────┤
49+
│ Hello World │
50+
└─────────────┘
51+
```
52+
53+
```sql
54+
D SELECT base64_decode('INVALID!') AS decoded;
55+
┌────────────────┐
56+
│ decoded │
57+
varchar
58+
├────────────────┤
59+
│ INVALID_BASE64 │
60+
└────────────────┘
61+
```
62+
63+
## Round-trip
64+
65+
You can combine both functions to verify encoding and decoding:
66+
67+
```sql
68+
D SELECT base64_decode(base64_encode('café ☕')) AS roundtrip;
69+
┌───────────┐
70+
│ roundtrip │
71+
varchar
72+
├───────────┤
73+
│ café ☕ │
74+
└───────────┘
75+
```

src/functions/base64_functions.cpp

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// Copyright 2026 Arash Hatami
2+
3+
#include "base64_functions.hpp"
4+
5+
#include <array>
6+
7+
namespace duckdb {
8+
9+
// Base64 alphabet
10+
static const char BASE64_CHARS[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11+
12+
// Reverse lookup table: maps ASCII byte value to Base64 index (255 = invalid)
13+
static constexpr std::array<uint8_t, 256> BuildBase64DecodeTable() {
14+
std::array<uint8_t, 256> table = {};
15+
for (auto &v : table) {
16+
v = 255;
17+
}
18+
for (uint8_t i = 0; i < 64; i++) {
19+
const char *chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
20+
table[static_cast<uint8_t>(chars[i])] = i;
21+
}
22+
return table;
23+
}
24+
25+
static constexpr auto BASE64_DECODE_TABLE = BuildBase64DecodeTable();
26+
27+
void Base64EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
28+
auto &input_vector = args.data[0];
29+
auto result_data = FlatVector::GetData<string_t>(result);
30+
auto &result_validity = FlatVector::Validity(result);
31+
32+
for (idx_t i = 0; i < args.size(); i++) {
33+
auto value = input_vector.GetValue(i);
34+
if (value.IsNull()) {
35+
result_validity.SetInvalid(i);
36+
continue;
37+
}
38+
39+
auto input = value.ToString();
40+
41+
try {
42+
auto encoded = netquack::Base64Encode(input);
43+
result_data[i] = StringVector::AddString(result, encoded);
44+
} catch (const std::exception &e) {
45+
result_data[i] = StringVector::AddString(result, "Error encoding base64: " + std::string(e.what()));
46+
}
47+
}
48+
}
49+
50+
void Base64DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
51+
auto &input_vector = args.data[0];
52+
auto result_data = FlatVector::GetData<string_t>(result);
53+
auto &result_validity = FlatVector::Validity(result);
54+
55+
for (idx_t i = 0; i < args.size(); i++) {
56+
auto value = input_vector.GetValue(i);
57+
if (value.IsNull()) {
58+
result_validity.SetInvalid(i);
59+
continue;
60+
}
61+
62+
auto input = value.ToString();
63+
64+
try {
65+
auto decoded = netquack::Base64Decode(input);
66+
result_data[i] = StringVector::AddString(result, decoded);
67+
} catch (const std::exception &e) {
68+
result_data[i] = StringVector::AddString(result, "Error decoding base64: " + std::string(e.what()));
69+
}
70+
}
71+
}
72+
73+
namespace netquack {
74+
75+
std::string Base64Encode(const std::string &input) {
76+
if (input.empty()) {
77+
return "";
78+
}
79+
80+
const auto *data = reinterpret_cast<const uint8_t *>(input.data());
81+
size_t len = input.size();
82+
83+
// Calculate output size: 4 output chars per 3 input bytes, rounded up
84+
size_t output_len = 4 * ((len + 2) / 3);
85+
std::string result;
86+
result.reserve(output_len);
87+
88+
for (size_t i = 0; i < len; i += 3) {
89+
uint32_t octet_a = data[i];
90+
uint32_t octet_b = (i + 1 < len) ? data[i + 1] : 0;
91+
uint32_t octet_c = (i + 2 < len) ? data[i + 2] : 0;
92+
93+
uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c;
94+
95+
result += BASE64_CHARS[(triple >> 18) & 0x3F];
96+
result += BASE64_CHARS[(triple >> 12) & 0x3F];
97+
result += (i + 1 < len) ? BASE64_CHARS[(triple >> 6) & 0x3F] : '=';
98+
result += (i + 2 < len) ? BASE64_CHARS[triple & 0x3F] : '=';
99+
}
100+
101+
return result;
102+
}
103+
104+
std::string Base64Decode(const std::string &input) {
105+
if (input.empty()) {
106+
return "";
107+
}
108+
109+
// Strip whitespace (spaces, tabs, newlines, carriage returns)
110+
std::string cleaned;
111+
cleaned.reserve(input.size());
112+
for (char c : input) {
113+
if (c != ' ' && c != '\t' && c != '\n' && c != '\r') {
114+
cleaned += c;
115+
}
116+
}
117+
118+
if (cleaned.empty()) {
119+
return "";
120+
}
121+
122+
// Validate length (must be multiple of 4 for standard base64)
123+
if (cleaned.size() % 4 != 0) {
124+
return "INVALID_BASE64";
125+
}
126+
127+
// Validate characters
128+
for (size_t i = 0; i < cleaned.size(); i++) {
129+
char c = cleaned[i];
130+
if (c == '=') {
131+
// Padding only allowed at the end (last 1-2 chars)
132+
if (i < cleaned.size() - 2) {
133+
return "INVALID_BASE64";
134+
}
135+
} else if (BASE64_DECODE_TABLE[static_cast<uint8_t>(c)] == 255) {
136+
return "INVALID_BASE64";
137+
}
138+
}
139+
140+
// Count padding
141+
size_t padding = 0;
142+
if (!cleaned.empty() && cleaned[cleaned.size() - 1] == '=') {
143+
padding++;
144+
}
145+
if (cleaned.size() > 1 && cleaned[cleaned.size() - 2] == '=') {
146+
padding++;
147+
}
148+
149+
// Calculate output size
150+
size_t output_len = (cleaned.size() / 4) * 3 - padding;
151+
std::string result;
152+
result.reserve(output_len);
153+
154+
for (size_t i = 0; i < cleaned.size(); i += 4) {
155+
uint32_t sextet_a = BASE64_DECODE_TABLE[static_cast<uint8_t>(cleaned[i])];
156+
uint32_t sextet_b = BASE64_DECODE_TABLE[static_cast<uint8_t>(cleaned[i + 1])];
157+
uint32_t sextet_c = (cleaned[i + 2] == '=') ? 0 : BASE64_DECODE_TABLE[static_cast<uint8_t>(cleaned[i + 2])];
158+
uint32_t sextet_d = (cleaned[i + 3] == '=') ? 0 : BASE64_DECODE_TABLE[static_cast<uint8_t>(cleaned[i + 3])];
159+
160+
uint32_t triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d;
161+
162+
result += static_cast<char>((triple >> 16) & 0xFF);
163+
if (cleaned[i + 2] != '=') {
164+
result += static_cast<char>((triple >> 8) & 0xFF);
165+
}
166+
if (cleaned[i + 3] != '=') {
167+
result += static_cast<char>(triple & 0xFF);
168+
}
169+
}
170+
171+
return result;
172+
}
173+
174+
} // namespace netquack
175+
} // namespace duckdb

src/functions/base64_functions.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2026 Arash Hatami
2+
3+
#pragma once
4+
5+
#include "duckdb.hpp"
6+
7+
namespace duckdb {
8+
// Function to encode a string to Base64
9+
void Base64EncodeFunction(DataChunk &args, ExpressionState &state, Vector &result);
10+
11+
// Function to decode a Base64 string
12+
void Base64DecodeFunction(DataChunk &args, ExpressionState &state, Vector &result);
13+
14+
namespace netquack {
15+
// Encode a string to Base64
16+
std::string Base64Encode(const std::string &input);
17+
18+
// Decode a Base64 string
19+
std::string Base64Decode(const std::string &input);
20+
} // namespace netquack
21+
} // namespace duckdb

src/netquack_extension.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "duckdb/common/exception.hpp"
88
#include "duckdb/function/scalar_function.hpp"
9+
#include "functions/base64_functions.hpp"
910
#include "functions/extract_domain.hpp"
1011
#include "functions/domain_depth.hpp"
1112
#include "functions/extract_extension.hpp"
@@ -120,6 +121,14 @@ static void LoadInternal(ExtensionLoader &loader) {
120121
ScalarFunction("normalize_url", {LogicalType::VARCHAR}, LogicalType::VARCHAR, NormalizeURLFunction);
121122
loader.RegisterFunction(normalize_url_function);
122123

124+
auto base64_encode_function =
125+
ScalarFunction("base64_encode", {LogicalType::VARCHAR}, LogicalType::VARCHAR, Base64EncodeFunction);
126+
loader.RegisterFunction(base64_encode_function);
127+
128+
auto base64_decode_function =
129+
ScalarFunction("base64_decode", {LogicalType::VARCHAR}, LogicalType::VARCHAR, Base64DecodeFunction);
130+
loader.RegisterFunction(base64_decode_function);
131+
123132
auto version_function =
124133
TableFunction("netquack_version", {}, netquack::VersionFunc::Scan, netquack::VersionFunc::Bind,
125134
netquack::VersionFunc::InitGlobal, netquack::VersionFunc::InitLocal);

0 commit comments

Comments
 (0)