diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index eaceb15c47291..22f747f24d92a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -177,6 +177,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdbit.stdc_trailing_zeros_us # stdlib.h entrypoints + libc.src.stdlib.a64l libc.src.stdlib.abs libc.src.stdlib.atof libc.src.stdlib.atoi diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml index 8d2b3f357e1a9..b308df98a6090 100644 --- a/libc/include/stdlib.yaml +++ b/libc/include/stdlib.yaml @@ -24,6 +24,12 @@ functions: return_type: _Noreturn void arguments: - type: int + - name: a64l + standards: + - posix + return_type: long + arguments: + - type: const char * - name: abort standards: - stdc diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 73a9fbf1e2ddc..361f2305358c9 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -184,6 +184,17 @@ add_entrypoint_object( libc.src.__support.str_to_integer ) +add_entrypoint_object( + a64l + SRCS + a64l.cpp + HDRS + a64l.h + DEPENDS + libc.src.__support.ctype_utils + libc.hdr.types.size_t +) + add_entrypoint_object( abs SRCS diff --git a/libc/src/stdlib/a64l.cpp b/libc/src/stdlib/a64l.cpp new file mode 100644 index 0000000000000..5c1b819732abf --- /dev/null +++ b/libc/src/stdlib/a64l.cpp @@ -0,0 +1,64 @@ +//===-- Implementation of a64l --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/a64l.h" +#include "hdr/types/size_t.h" +#include "src/__support/common.h" +#include "src/__support/ctype_utils.h" +#include "src/__support/macros/config.h" + +#include + +namespace LIBC_NAMESPACE_DECL { + +// I'm not sure this should go in ctype_utils since the specific ordering of +// base64 is so very implementation specific, and also this set is unusual. +// Returns -1 on any char without a specified value. +constexpr static int32_t b64_char_to_int(char ch) { + // from the standard: "The characters used to represent digits are '.' (dot) + // for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37], + // and 'a' through 'z' for [38,63]." + if (ch == '.') + return 0; + if (ch == '/') + return 1; + + // handle the case of an unspecified char. + if (!internal::isalnum(ch)) + return -1; + + bool is_lower = internal::islower(ch); + // add 2 to account for '.' and '/', then b36_char_to_int is case insensitive + // so add case sensitivity back. + return internal::b36_char_to_int(ch) + 2 + (is_lower ? 26 : 0); +} + +// This function takes a base 64 string and writes it to the low 32 bits of a +// long. +LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) { + // the standard says to only use up to 6 characters. + constexpr size_t MAX_LENGTH = 6; + int32_t result = 0; + + for (size_t i = 0; i < MAX_LENGTH && s[i] != '\0'; ++i) { + int32_t cur_val = b64_char_to_int(s[i]); + // The standard says what happens on an unspecified character is undefined, + // here we treat it as the end of the string. + if (cur_val == -1) + break; + + // the first digit is the least significant, so for each subsequent digit we + // shift it more. 6 bits since 2^6 = 64 + result += (cur_val << (6 * i)); + } + + // standard says to sign extend from 32 bits. + return static_cast(result); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/a64l.h b/libc/src/stdlib/a64l.h new file mode 100644 index 0000000000000..024be058f756c --- /dev/null +++ b/libc/src/stdlib/a64l.h @@ -0,0 +1,20 @@ +//===-- Implementation header for a64l --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_A64L_H +#define LLVM_LIBC_SRC_STDLIB_A64L_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +long a64l(const char *s); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_A64L_H diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index e6c8a629c71fa..848100442c88b 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -221,6 +221,16 @@ add_libc_test( ${strfrom_test_copts} ) +add_libc_test( + a64l_test + SUITE + libc-stdlib-tests + SRCS + a64l_test.cpp + DEPENDS + libc.src.stdlib.a64l +) + add_libc_test( abs_test SUITE diff --git a/libc/test/src/stdlib/a64l_test.cpp b/libc/test/src/stdlib/a64l_test.cpp new file mode 100644 index 0000000000000..acdef5d69543d --- /dev/null +++ b/libc/test/src/stdlib/a64l_test.cpp @@ -0,0 +1,87 @@ +//===-- Unittests for a64l ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/a64l.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcA64lTest, EmptyString) { ASSERT_EQ(LIBC_NAMESPACE::a64l(""), 0l); } +TEST(LlvmLibcA64lTest, FullString) { + ASSERT_EQ(LIBC_NAMESPACE::a64l("AbC12/"), 1141696972l); +} + +constexpr char B64_CHARS[64] = { + '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', + 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +TEST(LlvmLibcA64lTest, OneCharacter) { + char example_str[2] = {'\0', '\0'}; + + for (size_t i = 0; i < 64; ++i) { + example_str[0] = B64_CHARS[i]; + ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), static_cast(i)); + } +} + +TEST(LlvmLibcA64lTest, TwoCharacters) { + char example_str[3] = {'\0', '\0', '\0'}; + + for (size_t first = 0; first < 64; ++first) { + example_str[0] = B64_CHARS[first]; + for (size_t second = 0; second < 64; ++second) { + example_str[1] = B64_CHARS[second]; + + ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), + static_cast(first + (second * 64))); + } + } +} + +TEST(LlvmLibcA64lTest, FiveSameCharacters) { + // Technically the last digit can be parsed to give the last two bits. Not + // handling that here. + char example_str[6] = { + '\0', '\0', '\0', '\0', '\0', '\0', + }; + + // set every 6th bit + const long BASE_NUM = 0b1000001000001000001000001; + + for (size_t char_val = 0; char_val < 64; ++char_val) { + for (size_t i = 0; i < 5; ++i) + example_str[i] = B64_CHARS[char_val]; + + const long expected_result = BASE_NUM * char_val; + + ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result); + } +} + +TEST(LlvmLibcA64lTest, OneOfSixCharacters) { + char example_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'}; + + for (size_t cur_char = 0; cur_char < 6; ++cur_char) { + // clear the string, set all the chars to b64(0) + for (size_t i = 0; i < 6; ++i) + example_str[i] = B64_CHARS[0]; + + for (size_t char_val = 0; char_val < 64; ++char_val) { + example_str[cur_char] = B64_CHARS[char_val]; + + // Need to limit to 32 bits, since that's what the standard says the + // function does. + const long expected_result = + static_cast(char_val << (6 * cur_char)); + + ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result); + } + } +}