-
Notifications
You must be signed in to change notification settings - Fork 15k
reduced the number of multiplication #154503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
llvm#152655 first i tested the different ways i can do this , i think this was the least modifications to make that would result in in improvement in my tests
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-libcxx Author: mjz (Mjz86) Changes#152655 Full diff: https://github.com/llvm/llvm-project/pull/154503.diff 1 Files Affected:
diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index d90952ea71f35..94e9255ad79ba 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -38,31 +38,102 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char*
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
+ const uint32_t __shift = 14;
+ const uint32_t __mul_inv10 = 1639;
+ const uint32_t __mask = (uint32_t(1) << __shift) - 1;
+ const uint32_t __n = __mul_inv10 * __value;
+ __append2(__first, __n >> __shift);
+ __append1(__firs + 2, ((__n & __mask) * 5) >> (__shift - 1));
+ return __first + 3;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
+ const uint32_t __shift = 19;
+ const uint32_t __mul_inv10 = 5243;
+ const uint32_t __mask = (uint32_t(1) << __shift) - 1;
+ const uint32_t __n = __mul_inv10 * __value;
+ __append2(__first, __n >> __shift);
+ __append2(__first + 2, uint32_t((__n & __mask) * 25) >> (__shift - 2));
+ return __first + 4;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
+ const uint64_t __inv1000_val = 67109;
+ const uint64_t __shift_val = 26;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv1000_val * __value;
+ __append2(__first, __res >> __shift_val);
+ const uint64_t __second_res = uint64_t(__mask_val & __res) * 25;
+ const uint64_t __second_shift_val = __shift_val - 2;
+ const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
+ __append2(__first + 2, __second_res >> __second_shift_val);
+ const uint64_t __third_res = (__second_mask_val & (__second_res)) * 5;
+ const uint64_t __third_shift_val = __second_shift_val - 1;
+ __append1(__first + 4, __third_res >> __third_shift_val);
+ return __first + 5;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
+ const uint64_t __inv10000_val = 1717987;
+ const uint64_t __shift_val = 34;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv10000_val * __value;
+ __append2(__first, __res >> __shift_val);
+ const uint64_t __second_res = (__mask_val & __res) * 25;
+ const uint64_t __second_shift_val = __shift_val - 2;
+ const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
+ __append2(__first + 2, __second_res >> __second_shift_val);
+ const uint64_t __third_res = (__second_mask_val & (__second_res)) * 25;
+ const uint64_t __third_shift_val = __second_shift_val - 2;
+ __append2(__first + 4, __third_res >> __third_shift_val);
+ return __first + 6;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
+ const uint64_t __inv100000_val = 21990233;
+ const uint64_t __shift_val = 41;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv100000_val * __value;
+ __append2(__first, __res >> __shift_val);
+ const uint64_t __res2 = (__res & __mask_val) * 25;
+ __append2(__first + 2, __res2 >> (__shift_val - 2));
+ const uint64_t __res3 = (__res2 & (__mask_val >> 2)) * 25;
+ __append2(__first + 4, __res3 >> (__shift_val - 4));
+ const uint64_t __res4 = (__res3 & (__mask_val >> 4)) * 5;
+ __append1(__first + 6, __res4 >> (__shift_val - 5));
+ return __first + 7;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
+ const uint64_t __inv1000000_val = 140737489;
+ const uint64_t __shift_val = 47;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv1000000_val * __value;
+ __append2(__first, __res >> __shift_val);
+ const uint64_t __res2 = (__res & __mask_val) * 25;
+ __append2(__first + 2, __res2 >> (__shift_val - 2));
+ const uint64_t __res3 = (__res2 & (__mask_val >> 2)) * 25;
+ __append2(__first + 4, __res3 >> (__shift_val - 4));
+ const uint64_t __res4 = (__res3 & (__mask_val >> 4)) * 25;
+ __append2(__first + 6, __res4 >> (__shift_val - 6));
+ return __first + 8;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) _NOEXCEPT {
- return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
+ const uint64_t __inv10000000_val = 1801439851;
+ const uint64_t __shift_val = 54;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv10000000_val * __value;
+ __append2(__first, __res >> __shift_val);
+ const uint64_t __res2 = (__res & __mask_val) * 25;
+ __append2(__first + 2, __res2 >> (__shift_val - 2));
+ const uint64_t __res3 = (__res2 & (__mask_val >> 2)) * 25;
+ __append2(__first + 4, __res3 >> (__shift_val - 4));
+ const uint64_t __res4 = (__res3 & (__mask_val >> 4)) * 25;
+ __append2(__first + 6, __res4 >> (__shift_val - 6));
+ const uint64_t __res5 = (__res4 & (__mask_val >> 6)) * 5;
+ __append1(__first + 8, __res5 >> (__shift_val - 7));
+ return __first + 9;
}
template <class _Tp>
|
|
idk why this gives an error when built?! |
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h -- libcxx/include/__charconv/to_chars_base_10.hView the diff from clang-format here.diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h
index 27a8d98f3..c2df6845f 100644
--- a/libcxx/include/__charconv/to_chars_base_10.h
+++ b/libcxx/include/__charconv/to_chars_base_10.h
@@ -38,20 +38,20 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char*
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) _NOEXCEPT {
- const uint32_t __shift = 14;
+ const uint32_t __shift = 14;
const uint32_t __mul_inv10 = 1639;
- const uint32_t __mask = (uint32_t(1) << __shift) - 1;
- const uint32_t __n = __mul_inv10 * __value;
+ const uint32_t __mask = (uint32_t(1) << __shift) - 1;
+ const uint32_t __n = __mul_inv10 * __value;
__itoa::__append2(__first, __n >> __shift);
__itoa::__append1(__first + 2, ((__n & __mask) * 5) >> (__shift - 1));
return __first + 3;
}
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) _NOEXCEPT {
- const uint32_t __shift = 19;
+ const uint32_t __shift = 19;
const uint32_t __mul_inv10 = 5243;
- const uint32_t __mask = (uint32_t(1) << __shift) - 1;
- const uint32_t __n = __mul_inv10 * __value;
+ const uint32_t __mask = (uint32_t(1) << __shift) - 1;
+ const uint32_t __n = __mul_inv10 * __value;
__itoa::__append2(__first, __n >> __shift);
__itoa::__append2(__first + 2, uint32_t((__n & __mask) * 25) >> (__shift - 2));
return __first + 4;
@@ -59,15 +59,15 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char*
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) _NOEXCEPT {
const uint64_t __inv1000_val = 67109;
- const uint64_t __shift_val = 26;
- const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
- const uint64_t __res = __inv1000_val * __value;
+ const uint64_t __shift_val = 26;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv1000_val * __value;
__itoa::__append2(__first, __res >> __shift_val);
- const uint64_t __second_res = uint64_t(__mask_val & __res) * 25;
+ const uint64_t __second_res = uint64_t(__mask_val & __res) * 25;
const uint64_t __second_shift_val = __shift_val - 2;
- const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
+ const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
__itoa::__append2(__first + 2, __second_res >> __second_shift_val);
- const uint64_t __third_res = (__second_mask_val & (__second_res)) * 5;
+ const uint64_t __third_res = (__second_mask_val & (__second_res)) * 5;
const uint64_t __third_shift_val = __second_shift_val - 1;
__itoa::__append1(__first + 4, __third_res >> __third_shift_val);
return __first + 5;
@@ -75,15 +75,15 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char*
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) _NOEXCEPT {
const uint64_t __inv10000_val = 1717987;
- const uint64_t __shift_val = 34;
- const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
- const uint64_t __res = __inv10000_val * __value;
+ const uint64_t __shift_val = 34;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv10000_val * __value;
__itoa::__append2(__first, __res >> __shift_val);
- const uint64_t __second_res = (__mask_val & __res) * 25;
+ const uint64_t __second_res = (__mask_val & __res) * 25;
const uint64_t __second_shift_val = __shift_val - 2;
- const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
+ const uint64_t __second_mask_val = (uint64_t(1) << __second_shift_val) - 1;
__itoa::__append2(__first + 2, __second_res >> __second_shift_val);
- const uint64_t __third_res = (__second_mask_val & (__second_res)) * 25;
+ const uint64_t __third_res = (__second_mask_val & (__second_res)) * 25;
const uint64_t __third_shift_val = __second_shift_val - 2;
__itoa::__append2(__first + 4, __third_res >> __third_shift_val);
return __first + 6;
@@ -91,9 +91,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char*
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) _NOEXCEPT {
const uint64_t __inv100000_val = 21990233;
- const uint64_t __shift_val = 41;
- const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
- const uint64_t __res = __inv100000_val * __value;
+ const uint64_t __shift_val = 41;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv100000_val * __value;
__itoa::__append2(__first, __res >> __shift_val);
const uint64_t __res2 = (__res & __mask_val) * 25;
__itoa::__append2(__first + 2, __res2 >> (__shift_val - 2));
@@ -106,9 +106,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char*
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) _NOEXCEPT {
const uint64_t __inv1000000_val = 140737489;
- const uint64_t __shift_val = 47;
- const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
- const uint64_t __res = __inv1000000_val * __value;
+ const uint64_t __shift_val = 47;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv1000000_val * __value;
__itoa::__append2(__first, __res >> __shift_val);
const uint64_t __res2 = (__res & __mask_val) * 25;
__itoa::__append2(__first + 2, __res2 >> (__shift_val - 2));
@@ -121,9 +121,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char*
_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) _NOEXCEPT {
const uint64_t __inv10000000_val = 1801439851;
- const uint64_t __shift_val = 54;
- const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
- const uint64_t __res = __inv10000000_val * __value;
+ const uint64_t __shift_val = 54;
+ const uint64_t __mask_val = (uint64_t(1) << __shift_val) - 1;
+ const uint64_t __res = __inv10000000_val * __value;
__itoa::__append2(__first, __res >> __shift_val);
const uint64_t __res2 = (__res & __mask_val) * 25;
__itoa::__append2(__first + 2, __res2 >> (__shift_val - 2));
|
Could you please improve the title and also the commit message, which is this top comment to what is this PR implementing. Also please use github syntax to associate the PR to the issue: "Closes #152655". The title should have a prefix [libc++]. |
#152655
first i tested the different ways i can do this , i think this was the least modifications to make that would result in
improvement in my tests