Skip to content

Commit 67ae7f6

Browse files
authored
Merge pull request ClickHouse#79809 from jh0x/jh-bas32
Add Base32 support
2 parents b58fb41 + c4d8027 commit 67ae7f6

25 files changed

+1490
-405
lines changed

docs/en/sql-reference/functions/string-functions.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,6 +1140,102 @@ Returns string `s` converted from the encoding `from` to encoding `to`.
11401140
convertCharset(s, from, to)
11411141
```
11421142

1143+
## base32Encode {#base32encode}
1144+
1145+
Encodes a string using [Base32](https://datatracker.ietf.org/doc/html/rfc4648#section-6).
1146+
1147+
**Syntax**
1148+
1149+
```sql
1150+
base32Encode(plaintext)
1151+
```
1152+
1153+
**Arguments**
1154+
1155+
- `plaintext`[String](../data-types/string.md) column or constant.
1156+
1157+
**Returned value**
1158+
1159+
- A string containing the encoded value of the argument. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
1160+
1161+
**Example**
1162+
1163+
```sql
1164+
SELECT base32Encode('Encoded');
1165+
```
1166+
1167+
Result:
1168+
1169+
```result
1170+
┌─base32Encode('Encoded')─┐
1171+
│ IVXGG33EMVSA==== │
1172+
└─────────────────────────┘
1173+
```
1174+
1175+
## base32Decode {#base32decode}
1176+
1177+
Accepts a string and decodes it using [Base32](https://datatracker.ietf.org/doc/html/rfc4648#section-6) encoding scheme.
1178+
1179+
**Syntax**
1180+
1181+
```sql
1182+
base32Decode(encoded)
1183+
```
1184+
1185+
**Arguments**
1186+
1187+
- `encoded`[String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). If the string is not a valid Base32-encoded value, an exception is thrown.
1188+
1189+
**Returned value**
1190+
1191+
- A string containing the decoded value of the argument. [String](../data-types/string.md).
1192+
1193+
**Example**
1194+
1195+
```sql
1196+
SELECT base32Decode('IVXGG33EMVSA====');
1197+
```
1198+
1199+
Result:
1200+
1201+
```result
1202+
┌─base32Decode('IVXGG33EMVSA====')─┐
1203+
│ Encoded │
1204+
└──────────────────────────────────┘
1205+
```
1206+
1207+
## tryBase32Decode {#trybase32decode}
1208+
1209+
Like `base32Decode` but returns an empty string in case of error.
1210+
1211+
**Syntax**
1212+
1213+
```sql
1214+
tryBase32Decode(encoded)
1215+
```
1216+
1217+
**Parameters**
1218+
1219+
- `encoded`: [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). If the string is not a valid Base32-encoded value, returns an empty string in case of error.
1220+
1221+
**Returned value**
1222+
1223+
- A string containing the decoded value of the argument.
1224+
1225+
**Examples**
1226+
1227+
Query:
1228+
1229+
```sql
1230+
SELECT tryBase32Decode('IVXGG33EMVSA====') as res, tryBase32Decode('invalid') as res_invalid;
1231+
```
1232+
1233+
```response
1234+
┌─res─────┬─res_invalid─┐
1235+
│ Encoded │ │
1236+
└─────────┴─────────────┘
1237+
```
1238+
11431239
## base58Encode {#base58encode}
11441240

11451241
Encodes a string using [Base58](https://datatracker.ietf.org/doc/html/draft-msporny-base58) in the "Bitcoin" alphabet.

src/Common/Base32.h

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#pragma once
2+
3+
#include <optional>
4+
#include <base/types.h>
5+
#include "base/defines.h"
6+
7+
namespace DB
8+
{
9+
10+
struct Base32Rfc4648
11+
{
12+
static constexpr char encodeChar(UInt8 c)
13+
{
14+
chassert(c < 32);
15+
if (c < 26)
16+
return 'A' + c;
17+
return '2' + (c - 26);
18+
}
19+
static constexpr UInt8 decodeChar(UInt8 c)
20+
{
21+
if (c >= 'A' && c <= 'Z')
22+
return c - 'A';
23+
24+
// Handle lowercase letters the same as uppercase
25+
if (c >= 'a' && c <= 'z')
26+
return c - 'a';
27+
28+
if (c >= '2' && c <= '7')
29+
return (c - '2') + 26;
30+
31+
return 0xFF;
32+
}
33+
static constexpr Int8 padding_char = '=';
34+
};
35+
36+
template <typename Traits, typename Tag>
37+
struct Base32;
38+
39+
struct Base32NaiveTag;
40+
41+
template <typename Traits>
42+
struct Base32<Traits, Base32NaiveTag>
43+
{
44+
static size_t encodeBase32(const UInt8 * src, size_t src_length, UInt8 * dst)
45+
{
46+
// in: [01010101] [11001100] [11110000]
47+
48+
// out: 01010 | 11100 | 11001 | 11100 | 000
49+
// [ 5b ] [ 5b ] [ 5b ] [ 5b ] ...
50+
51+
size_t ipos = 0;
52+
size_t opos = 0;
53+
uint32_t buffer = 0;
54+
uint8_t bits_left = 0;
55+
56+
while (ipos < src_length)
57+
{
58+
buffer = (buffer << 8) | src[ipos++];
59+
bits_left += 8;
60+
61+
while (bits_left >= 5)
62+
{
63+
dst[opos++] = Traits::encodeChar((buffer >> (bits_left - 5)) & 0x1F);
64+
bits_left -= 5;
65+
}
66+
}
67+
68+
if (bits_left > 0)
69+
{
70+
dst[opos++] = Traits::encodeChar((buffer << (5 - bits_left)) & 0x1F);
71+
}
72+
73+
while (opos % 8 != 0)
74+
{
75+
dst[opos++] = Traits::padding_char;
76+
}
77+
78+
return opos;
79+
}
80+
81+
/// This function might write into dst even if decoding fails (nullopt returned)
82+
static std::optional<size_t> decodeBase32(const UInt8 * src, size_t src_length, UInt8 * dst)
83+
{
84+
if (src_length % 8 != 0)
85+
{
86+
return std::nullopt;
87+
}
88+
89+
size_t dst_pos = 0;
90+
size_t buffer = 0;
91+
int bits = 0;
92+
size_t pad_count = 0;
93+
bool padding_started = false;
94+
95+
for (size_t i = 0; i < src_length; ++i)
96+
{
97+
UInt8 c = src[i];
98+
99+
if (c == Traits::padding_char)
100+
{
101+
padding_started = true;
102+
pad_count++;
103+
continue;
104+
}
105+
106+
if (padding_started)
107+
{
108+
return std::nullopt; // Only padding was expected
109+
}
110+
111+
UInt8 value = Traits::decodeChar(c);
112+
if (value == 0xFF)
113+
{
114+
return std::nullopt; // Invalid symbol
115+
}
116+
117+
// Stuff in decoded bits, write out if there's enough
118+
buffer = (buffer << 5) | value;
119+
bits += 5;
120+
121+
if (bits >= 8)
122+
{
123+
bits -= 8;
124+
dst[dst_pos++] = (buffer >> bits) & 0xFF;
125+
}
126+
}
127+
128+
if (pad_count > 0)
129+
{
130+
if (!(pad_count == 1 || pad_count == 3 || pad_count == 4 || pad_count == 6))
131+
{
132+
return std::nullopt;
133+
}
134+
135+
if (bits > 0 && (buffer & ((1 << bits) - 1)) != 0)
136+
{
137+
return std::nullopt;
138+
}
139+
}
140+
141+
return dst_pos;
142+
}
143+
};
144+
145+
inline size_t encodeBase32(const UInt8 * src, size_t src_length, UInt8 * dst)
146+
{
147+
return Base32<Base32Rfc4648, Base32NaiveTag>::encodeBase32(src, src_length, dst);
148+
}
149+
150+
inline std::optional<size_t> decodeBase32(const UInt8 * src, size_t src_length, UInt8 * dst)
151+
{
152+
return Base32<Base32Rfc4648, Base32NaiveTag>::decodeBase32(src, src_length, dst);
153+
}
154+
155+
}

0 commit comments

Comments
 (0)