Skip to content

Commit 64ae0a1

Browse files
[libc] implement l64a (#129099)
Adds l64a, which generates the base 64 string expected by a64l.
1 parent 6ce41db commit 64ae0a1

File tree

8 files changed

+215
-0
lines changed

8 files changed

+215
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ set(TARGET_LIBC_ENTRYPOINTS
185185
libc.src.stdlib.atoll
186186
libc.src.stdlib.bsearch
187187
libc.src.stdlib.div
188+
libc.src.stdlib.l64a
188189
libc.src.stdlib.labs
189190
libc.src.stdlib.ldiv
190191
libc.src.stdlib.llabs

libc/docs/dev/undefined_behavior.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ The C standard does not specify behavior for ``printf("%s", NULL)``. We will
8181
print the string literal ``(null)`` unless using the
8282
``LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS`` option described in :ref:`printf
8383
behavior<printf_behavior>`.
84+
TODO: Move this to printf_behavior.
8485

8586
Unknown Math Rounding Direction
8687
-------------------------------
@@ -143,3 +144,15 @@ More specific flags take precedence over less specific flags (i.e. '+' takes pre
143144
Any conversion with a minimum width is padded with the padding character until it is at least as long as the minimum width.
144145
Modifiers are applied, then the result is padded if necessary.
145146
Any composite conversion will pass along all flags to the component conversions.
147+
148+
a64l and l64a
149+
-------------
150+
These functions convert to and from a posix-specified base64 encoding. There are
151+
a few cases left undefined. For a64l, the behavior is undefined if the input
152+
pointer (s) is a null pointer. For LLVM-libc this will cause a null pointer
153+
dereference. It's also undefined if the input pointer to a64l wasn't generated
154+
by l64a. For LLVM-libc, if the user passes a valid base 64 string, it will be
155+
parsed as normal. For l64a it's unspecified what happens if the input value is
156+
negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints.
157+
Additionally, the return of l64a is in a thread-local buffer that's overwritten
158+
on each call.

libc/src/stdlib/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,17 @@ add_entrypoint_object(
195195
libc.hdr.types.size_t
196196
)
197197

198+
add_entrypoint_object(
199+
l64a
200+
SRCS
201+
l64a.cpp
202+
HDRS
203+
l64a.h
204+
DEPENDS
205+
libc.src.__support.ctype_utils
206+
libc.hdr.types.size_t
207+
)
208+
198209
add_entrypoint_object(
199210
abs
200211
SRCS

libc/src/stdlib/a64l.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ constexpr static int32_t b64_char_to_int(char ch) {
4040

4141
// This function takes a base 64 string and writes it to the low 32 bits of a
4242
// long.
43+
// TODO: use LIBC_ADD_NULL_CHECKS for checking if the input is a null pointer.
4344
LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
4445
// the standard says to only use up to 6 characters.
4546
constexpr size_t MAX_LENGTH = 6;

libc/src/stdlib/l64a.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===-- Implementation of l64a --------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdlib/l64a.h"
10+
#include "hdr/types/size_t.h"
11+
#include "src/__support/common.h"
12+
#include "src/__support/ctype_utils.h"
13+
#include "src/__support/libc_assert.h"
14+
#include "src/__support/macros/config.h"
15+
16+
#include <stdint.h>
17+
18+
namespace LIBC_NAMESPACE_DECL {
19+
20+
// the standard says to only use up to 6 characters. Null terminator is
21+
// unnecessary, but we'll add it for ease-of-use. Also going from 48 -> 56 bits
22+
// probably won't matter since it's likely 32-bit aligned anyways.
23+
constexpr size_t MAX_BASE64_LENGTH = 6;
24+
LIBC_THREAD_LOCAL char BASE64_BUFFER[MAX_BASE64_LENGTH + 1];
25+
26+
constexpr static char b64_int_to_char(uint32_t num) {
27+
// from the standard: "The characters used to represent digits are '.' (dot)
28+
// for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
29+
// and 'a' through 'z' for [38,63]."
30+
LIBC_ASSERT(num < 64);
31+
if (num == 0)
32+
return '.';
33+
if (num == 1)
34+
return '/';
35+
if (num < 38)
36+
return static_cast<char>(
37+
internal::toupper(internal::int_to_b36_char(num - 2)));
38+
39+
// this tolower is technically unnecessary, but it provides safety if we
40+
// change the default behavior of int_to_b36_char. Also the compiler
41+
// completely elides it so there's no performance penalty, see:
42+
// https://godbolt.org/z/o5ennv7fc
43+
return static_cast<char>(
44+
internal::tolower(internal::int_to_b36_char(num - 2 - 26)));
45+
}
46+
47+
// This function takes a long and converts the low 32 bits of it into at most 6
48+
// characters. It's returned as a pointer to a static buffer.
49+
LLVM_LIBC_FUNCTION(char *, l64a, (long value)) {
50+
// static cast to uint32_t to get just the low 32 bits in a consistent way.
51+
// The standard says negative values are undefined, so I'm just defining them
52+
// to be treated as unsigned.
53+
uint32_t cur_value = static_cast<uint32_t>(value);
54+
for (size_t i = 0; i < MAX_BASE64_LENGTH; ++i) {
55+
uint32_t cur_char = cur_value % 64;
56+
BASE64_BUFFER[i] = b64_int_to_char(cur_char);
57+
cur_value /= 64;
58+
}
59+
60+
BASE64_BUFFER[MAX_BASE64_LENGTH] = '\0'; // force null termination.
61+
return BASE64_BUFFER;
62+
}
63+
64+
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdlib/l64a.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===-- Implementation header for l64a --------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STDLIB_l64a_H
10+
#define LLVM_LIBC_SRC_STDLIB_l64a_H
11+
12+
#include "src/__support/macros/config.h"
13+
14+
namespace LIBC_NAMESPACE_DECL {
15+
16+
char *l64a(long value);
17+
18+
} // namespace LIBC_NAMESPACE_DECL
19+
20+
#endif // LLVM_LIBC_SRC_STDLIB_l64a_H

libc/test/src/stdlib/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,17 @@ add_libc_test(
231231
libc.src.stdlib.a64l
232232
)
233233

234+
add_libc_test(
235+
l64a_test
236+
SUITE
237+
libc-stdlib-tests
238+
SRCS
239+
l64a_test.cpp
240+
DEPENDS
241+
libc.src.stdlib.l64a
242+
libc.src.__support.CPP.limits
243+
)
244+
234245
add_libc_test(
235246
abs_test
236247
SUITE

libc/test/src/stdlib/l64a_test.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
//===-- Unittests for l64a ------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/CPP/limits.h"
10+
#include "src/stdlib/l64a.h"
11+
#include "test/UnitTest/Test.h"
12+
13+
TEST(LlvmLibcL64aTest, Zero) {
14+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(0), "......");
15+
}
16+
TEST(LlvmLibcL64aTest, Max) {
17+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(
18+
LIBC_NAMESPACE::cpp::numeric_limits<uint32_t>::max()),
19+
"zzzzz1");
20+
}
21+
22+
constexpr char B64_CHARS[64] = {
23+
'.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
24+
'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
25+
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
26+
'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
27+
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
28+
};
29+
30+
TEST(LlvmLibcL64aTest, OneCharacter) {
31+
// The trailing null is technically unnecessary, but it means it won't look
32+
// bad when we print it.
33+
char expected_str[7] = {'\0', '.', '.', '.', '.', '.', '\0'};
34+
35+
for (size_t i = 0; i < 64; ++i) {
36+
expected_str[0] = B64_CHARS[i];
37+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(i), expected_str);
38+
}
39+
}
40+
41+
TEST(LlvmLibcL64aTest, TwoCharacters) {
42+
char expected_str[7] = {'\0', '\0', '.', '.', '.', '.', '\0'};
43+
44+
for (size_t first = 0; first < 64; ++first) {
45+
expected_str[0] = B64_CHARS[first];
46+
for (size_t second = 0; second < 64; ++second) {
47+
expected_str[1] = B64_CHARS[second];
48+
49+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(first + (second * 64)), expected_str);
50+
}
51+
}
52+
}
53+
54+
TEST(LlvmLibcL64aTest, FiveSameCharacters) {
55+
// Only using 5 because those are the only digits that can be any character.
56+
char expected_str[7] = {'\0', '\0', '\0', '\0', '\0', '.', '\0'};
57+
58+
// set every 6th bit
59+
const long BASE_NUM = 0b1000001000001000001000001;
60+
61+
for (size_t char_val = 0; char_val < 64; ++char_val) {
62+
for (size_t i = 0; i < 5; ++i)
63+
expected_str[i] = B64_CHARS[char_val];
64+
65+
const long input_num = BASE_NUM * char_val;
66+
67+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(input_num), expected_str);
68+
}
69+
}
70+
71+
TEST(LlvmLibcL64aTest, OneOfSixCharacters) {
72+
char expected_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};
73+
74+
for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
75+
// clear the string, set all the chars to b64(0)
76+
for (size_t i = 0; i < 6; ++i)
77+
expected_str[i] = B64_CHARS[0];
78+
79+
for (size_t char_val = 0; char_val < 64; ++char_val) {
80+
// Since each base64 character holds 6 bits and we're only using 32 bits
81+
// of input, the 6th character only gets 2 bits, so it can never be
82+
// greater than 3.
83+
if (char_val > 3 && cur_char == 5)
84+
break;
85+
expected_str[cur_char] = B64_CHARS[char_val];
86+
87+
// Need to limit to 32 bits, since that's what the standard says the
88+
// function does.
89+
const long input_num = static_cast<int32_t>(char_val << (6 * cur_char));
90+
91+
ASSERT_STREQ(LIBC_NAMESPACE::l64a(input_num), expected_str);
92+
}
93+
}
94+
}

0 commit comments

Comments
 (0)