Skip to content

Conversation

@mordante
Copy link
Member

@mordante mordante commented Mar 22, 2025

This is a proof-of-concept patch to implement caching in basic_format_string. This is only intended for this Discourse discussion.

The real patch needs to do things cleaner.

lit -a -Doptimization=speed libcxx/test/benchmarks/format/parsing.bench.cpp BEFORE

| ----------------------------------------------------------------------------
| Benchmark                                  Time             CPU   Iterations
| ----------------------------------------------------------------------------
| BM_empty                                6.18 ns         6.17 ns     87897649
| BM_curly_open                           11.2 ns         11.2 ns     62229450
| BM_curly_close                          10.7 ns         10.7 ns     65368952
| BM_pipe                                 12.9 ns         12.9 ns     54477724
| BM_int                                  47.6 ns         47.6 ns     14718165
| BM_int_formatted                        97.4 ns         97.4 ns      6953144
| BM_3_ints                                141 ns          141 ns      4944337
| BM_3_ints_formatted                      283 ns          283 ns      2464184
| BM_prefix_5_and_empty                   16.5 ns         16.5 ns     42569346
| BM_prefix_5_and_curly_open              17.8 ns         17.8 ns     39273542
| BM_prefix_5_and_curly_close             17.8 ns         17.8 ns     39140130
| BM_prefix_5_and_pipe                    19.5 ns         19.5 ns     35803584
| BM_prefix_5_and_int                     51.1 ns         51.0 ns     13436274
| BM_prefix_5_and_int_formatted            102 ns          102 ns      6793303
| BM_prefix_5_and_3_ints                   151 ns          151 ns      4573472
| BM_prefix_5_and_3_ints_formatted         291 ns          291 ns      2351850
| BM_prefix_10_and_empty                  24.9 ns         24.9 ns     28109036
| BM_prefix_10_and_curly_open             27.2 ns         27.2 ns     25713490
| BM_prefix_10_and_curly_close            30.2 ns         30.2 ns     23176332
| BM_prefix_10_and_pipe                   32.7 ns         32.6 ns     21420861
| BM_prefix_10_and_int                    61.6 ns         61.6 ns     11034101
| BM_prefix_10_and_int_formatted           120 ns          120 ns      5610531
| BM_prefix_10_and_3_ints                  156 ns          156 ns      4470738
| BM_prefix_10_and_3_ints_formatted        312 ns          311 ns      2217207
| BM_prefix_20_and_empty                  52.7 ns         52.7 ns     13138425
| BM_prefix_20_and_curly_open             58.8 ns         58.8 ns     11863712
| BM_prefix_20_and_curly_close            56.6 ns         56.6 ns     12407764
| BM_prefix_20_and_pipe                   56.5 ns         56.4 ns     12411003
| BM_prefix_20_and_int                    86.9 ns         86.8 ns      8089463
| BM_prefix_20_and_int_formatted           153 ns          153 ns      4582443
| BM_prefix_20_and_3_ints                  204 ns          204 ns      3354539
| BM_prefix_20_and_3_ints_formatted        349 ns          349 ns      2002457
| BM_prefix_40_and_empty                   116 ns          116 ns      6018618
| BM_prefix_40_and_curly_open              122 ns          122 ns      5655627
| BM_prefix_40_and_curly_close             121 ns          121 ns      5722255
| BM_prefix_40_and_pipe                    120 ns          120 ns      5823239
| BM_prefix_40_and_int                     158 ns          158 ns      4476432
| BM_prefix_40_and_int_formatted           207 ns          207 ns      3306182
| BM_prefix_40_and_3_ints                  255 ns          255 ns      2759277
| BM_prefix_40_and_3_ints_formatted        371 ns          370 ns      1884154

AFTER

| ----------------------------------------------------------------------------
| Benchmark                                  Time             CPU   Iterations
| ----------------------------------------------------------------------------
| BM_empty                                21.6 ns         21.6 ns     32391462
| BM_curly_open                           22.9 ns         22.9 ns     30588441
| BM_curly_close                          22.9 ns         22.9 ns     30592394
| BM_pipe                                 26.0 ns         26.0 ns     26953859
| BM_int                                  60.5 ns         60.5 ns     11577625
| BM_int_formatted                        81.3 ns         81.2 ns      8594596
| BM_3_ints                                156 ns          156 ns      4409940
| BM_3_ints_formatted                      220 ns          220 ns      3187392
| BM_prefix_5_and_empty                   26.0 ns         26.0 ns     26936925
| BM_prefix_5_and_curly_open              26.0 ns         26.0 ns     26965917
| BM_prefix_5_and_curly_close             26.0 ns         26.0 ns     26961356
| BM_prefix_5_and_pipe                    26.0 ns         26.0 ns     26959641
| BM_prefix_5_and_int                     64.1 ns         64.1 ns     10920953
| BM_prefix_5_and_int_formatted           84.4 ns         84.4 ns      8037453
| BM_prefix_5_and_3_ints                   165 ns          165 ns      4217532
| BM_prefix_5_and_3_ints_formatted         247 ns          247 ns      2823553
| BM_prefix_10_and_empty                  26.0 ns         26.0 ns     26949246
| BM_prefix_10_and_curly_open             26.0 ns         26.0 ns     26955524
| BM_prefix_10_and_curly_close            26.0 ns         26.0 ns     26956279
| BM_prefix_10_and_pipe                   26.0 ns         26.0 ns     26947963
| BM_prefix_10_and_int                    59.8 ns         59.8 ns     11674226
| BM_prefix_10_and_int_formatted          87.1 ns         87.0 ns      7814714
| BM_prefix_10_and_3_ints                  165 ns          165 ns      4149378
| BM_prefix_10_and_3_ints_formatted        245 ns          245 ns      2884163
| BM_prefix_20_and_empty                  29.3 ns         29.3 ns     23873593
| BM_prefix_20_and_curly_open             28.5 ns         28.4 ns     24625298
| BM_prefix_20_and_curly_close            27.5 ns         27.5 ns     25438870
| BM_prefix_20_and_pipe                   29.4 ns         29.3 ns     23863557
| BM_prefix_20_and_int                    57.6 ns         57.6 ns     12247413
| BM_prefix_20_and_int_formatted           104 ns          104 ns      6690343
| BM_prefix_20_and_3_ints                  183 ns          183 ns      3828112
| BM_prefix_20_and_3_ints_formatted        244 ns          244 ns      2833578
| BM_prefix_40_and_empty                  37.5 ns         37.5 ns     18650804
| BM_prefix_40_and_curly_open             36.8 ns         36.8 ns     19094990
| BM_prefix_40_and_curly_close            37.1 ns         37.0 ns     18904266
| BM_prefix_40_and_pipe                   36.5 ns         36.5 ns     19147265
| BM_prefix_40_and_int                    81.1 ns         81.0 ns      8611919
| BM_prefix_40_and_int_formatted           102 ns          102 ns      6854642
| BM_prefix_40_and_3_ints                  181 ns          181 ns      3828217
| BM_prefix_40_and_3_ints_formatted        243 ns          243 ns      2814977

This is a proof-of-concept patch to implement caching in
basic_format_string. This is only intended for --this Discourse--
discussion.

The real patch needs to do things cleaner.

lit -a -Doptimization=speed libcxx/test/benchmarks/format/parsing.bench.cpp
BEFORE
| ----------------------------------------------------------------------------
| Benchmark                                  Time             CPU   Iterations
| ----------------------------------------------------------------------------
| BM_empty                                6.18 ns         6.17 ns     87897649
| BM_curly_open                           11.2 ns         11.2 ns     62229450
| BM_curly_close                          10.7 ns         10.7 ns     65368952
| BM_pipe                                 12.9 ns         12.9 ns     54477724
| BM_int                                  47.6 ns         47.6 ns     14718165
| BM_int_formatted                        97.4 ns         97.4 ns      6953144
| BM_3_ints                                141 ns          141 ns      4944337
| BM_3_ints_formatted                      283 ns          283 ns      2464184
| BM_prefix_5_and_empty                   16.5 ns         16.5 ns     42569346
| BM_prefix_5_and_curly_open              17.8 ns         17.8 ns     39273542
| BM_prefix_5_and_curly_close             17.8 ns         17.8 ns     39140130
| BM_prefix_5_and_pipe                    19.5 ns         19.5 ns     35803584
| BM_prefix_5_and_int                     51.1 ns         51.0 ns     13436274
| BM_prefix_5_and_int_formatted            102 ns          102 ns      6793303
| BM_prefix_5_and_3_ints                   151 ns          151 ns      4573472
| BM_prefix_5_and_3_ints_formatted         291 ns          291 ns      2351850
| BM_prefix_10_and_empty                  24.9 ns         24.9 ns     28109036
| BM_prefix_10_and_curly_open             27.2 ns         27.2 ns     25713490
| BM_prefix_10_and_curly_close            30.2 ns         30.2 ns     23176332
| BM_prefix_10_and_pipe                   32.7 ns         32.6 ns     21420861
| BM_prefix_10_and_int                    61.6 ns         61.6 ns     11034101
| BM_prefix_10_and_int_formatted           120 ns          120 ns      5610531
| BM_prefix_10_and_3_ints                  156 ns          156 ns      4470738
| BM_prefix_10_and_3_ints_formatted        312 ns          311 ns      2217207
| BM_prefix_20_and_empty                  52.7 ns         52.7 ns     1313842
| BM_prefix_20_and_curly_open             58.8 ns         58.8 ns     11863712
| BM_prefix_20_and_curly_close            56.6 ns         56.6 ns     12407764
| BM_prefix_20_and_pipe                   56.5 ns         56.4 ns     12411003
| BM_prefix_20_and_int                    86.9 ns         86.8 ns      8089463
| BM_prefix_20_and_int_formatted           153 ns          153 ns      4582443
| BM_prefix_20_and_3_ints                  204 ns          204 ns      3354539
| BM_prefix_20_and_3_ints_formatted        349 ns          349 ns      2002457
| BM_prefix_40_and_empty                   116 ns          116 ns      6018618
| BM_prefix_40_and_curly_open              122 ns          122 ns      5655627
| BM_prefix_40_and_curly_close             121 ns          121 ns      5722255
| BM_prefix_40_and_pipe                    120 ns          120 ns      5823239
| BM_prefix_40_and_int                     158 ns          158 ns      4476432
| BM_prefix_40_and_int_formatted           207 ns          207 ns      3306182
| BM_prefix_40_and_3_ints                  255 ns          255 ns      2759277
| BM_prefix_40_and_3_ints_formatted        371 ns          370 ns      1884154

AFTER
| ----------------------------------------------------------------------------
| Benchmark                                  Time             CPU   Iterations
| ----------------------------------------------------------------------------
| BM_empty                                21.6 ns         21.6 ns     32391462
| BM_curly_open                           22.9 ns         22.9 ns     30588441
| BM_curly_close                          22.9 ns         22.9 ns     30592394
| BM_pipe                                 26.0 ns         26.0 ns     26953859
| BM_int                                  60.5 ns         60.5 ns     11577625
| BM_int_formatted                        81.3 ns         81.2 ns      8594596
| BM_3_ints                                156 ns          156 ns      4409940
| BM_3_ints_formatted                      220 ns          220 ns      3187392
| BM_prefix_5_and_empty                   26.0 ns         26.0 ns     26936925
| BM_prefix_5_and_curly_open              26.0 ns         26.0 ns     26965917
| BM_prefix_5_and_curly_close             26.0 ns         26.0 ns     26961356
| BM_prefix_5_and_pipe                    26.0 ns         26.0 ns     26959641
| BM_prefix_5_and_int                     64.1 ns         64.1 ns     10920953
| BM_prefix_5_and_int_formatted           84.4 ns         84.4 ns      8037453
| BM_prefix_5_and_3_ints                   165 ns          165 ns      4217532
| BM_prefix_5_and_3_ints_formatted         247 ns          247 ns      2823553
| BM_prefix_10_and_empty                  26.0 ns         26.0 ns     26949246
| BM_prefix_10_and_curly_open             26.0 ns         26.0 ns     26955524
| BM_prefix_10_and_curly_close            26.0 ns         26.0 ns     26956279
| BM_prefix_10_and_pipe                   26.0 ns         26.0 ns     26947963
| BM_prefix_10_and_int                    59.8 ns         59.8 ns     11674226
| BM_prefix_10_and_int_formatted          87.1 ns         87.0 ns      7814714
| BM_prefix_10_and_3_ints                  165 ns          165 ns      4149378
| BM_prefix_10_and_3_ints_formatted        245 ns          245 ns      2884163
| BM_prefix_20_and_empty                  29.3 ns         29.3 ns     23873593
| BM_prefix_20_and_curly_open             28.5 ns         28.4 ns     24625298
| BM_prefix_20_and_curly_close            27.5 ns         27.5 ns     25438870
| BM_prefix_20_and_pipe                   29.4 ns         29.3 ns     23863557
| BM_prefix_20_and_int                    57.6 ns         57.6 ns     12247413
| BM_prefix_20_and_int_formatted           104 ns          104 ns      6690343
| BM_prefix_20_and_3_ints                  183 ns          183 ns      3828112
| BM_prefix_20_and_3_ints_formatted        244 ns          244 ns      2833578
| BM_prefix_40_and_empty                  37.5 ns         37.5 ns     18650804
| BM_prefix_40_and_curly_open             36.8 ns         36.8 ns     19094990
| BM_prefix_40_and_curly_close            37.1 ns         37.0 ns     18904266
| BM_prefix_40_and_pipe                   36.5 ns         36.5 ns     19147265
| BM_prefix_40_and_int                    81.1 ns         81.0 ns      8611919
| BM_prefix_40_and_int_formatted           102 ns          102 ns      6854642
| BM_prefix_40_and_3_ints                  181 ns          181 ns      3828217
| BM_prefix_40_and_3_ints_formatted        243 ns          243 ns      2814977
@github-actions
Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff dd97324f119af515944b7ec79fe627cfa70d4a2a e02e5bac0bbdd2e62d8cd9d07c2f0e7810b393b9 --extensions cpp,h -- libcxx/test/benchmarks/format/parsing.bench.cpp libcxx/test/std/utilities/format/format.functions/cache.pass.cpp libcxx/include/__format/format_functions.h
View the diff from clang-format here.
diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h
index 98e2d97718..d092561743 100644
--- a/libcxx/include/__format/format_functions.h
+++ b/libcxx/include/__format/format_functions.h
@@ -725,8 +725,7 @@ private:
         break;
 
       switch (__element.__type) {
-      case __element_type::__formatter_std:
-      {
+      case __element_type::__formatter_std: {
         std::__visit_format_arg(
             [&](auto __arg) {
               if constexpr (same_as<decltype(__arg), monostate>)
diff --git a/libcxx/test/std/utilities/format/format.functions/cache.pass.cpp b/libcxx/test/std/utilities/format/format.functions/cache.pass.cpp
index a1d8ced4ae..bd6e07a726 100644
--- a/libcxx/test/std/utilities/format/format.functions/cache.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/cache.pass.cpp
@@ -8,7 +8,6 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
 
-
 // TODO FMT This test should not require std::to_chars(floating-point)
 // XFAIL: availability-fp_to_chars-missing
 
@@ -16,7 +15,6 @@
 
 // <format>
 
-
 // This is a test for the new caching mechanism.
 
 #include <format>
@@ -84,8 +82,8 @@ static void test() {
   check(SV("FULL:0x0042"), SV("{:#06x}"), 0x42);
   check(SV("FULL:0x0042=answer"), SV("{:#06x}={}"), 0x42, SV("answer"));
 
-//  check(SV("FULL:hello world"), SV("{} world"), String<char>{"hello"});
-//  check(SV("FULL:hello world"), SV("{0:} world"), String<char>{"hello"});
+  //  check(SV("FULL:hello world"), SV("{} world"), String<char>{"hello"});
+  //  check(SV("FULL:hello world"), SV("{0:} world"), String<char>{"hello"});
 
   // TODO TEST WITH ARG EATER
 }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant