|
16 | 16 | // under the License. |
17 | 17 |
|
18 | 18 | #include <benchmark/benchmark.h> |
| 19 | + |
19 | 20 | #include <string> |
20 | 21 | #include <vector> |
21 | 22 |
|
22 | 23 | #include "vec/exec/format/file_reader/new_plain_text_line_reader.h" |
23 | 24 |
|
24 | 25 | namespace doris { |
25 | 26 |
|
26 | | -static std::string create_test_data(size_t length, const std::string& delimiter = "", char fill_char = 'a') { |
| 27 | +static std::string create_test_data(size_t length, const std::string& delimiter = "", |
| 28 | + char fill_char = 'a') { |
27 | 29 | return std::string(length, fill_char) + delimiter; |
28 | 30 | } |
29 | 31 |
|
30 | 32 | static void BM_FindLfCrlfLineSep(benchmark::State& state) { |
31 | 33 | size_t data_size = state.range(0); |
32 | 34 | size_t delimiter_type = state.range(1); |
33 | | - |
| 35 | + |
34 | 36 | std::string test_data; |
35 | | - switch(delimiter_type) { |
36 | | - case 0: // No delimiter |
37 | | - test_data = create_test_data(data_size); |
38 | | - break; |
39 | | - case 1: // Delimiter is \n |
40 | | - test_data = create_test_data(data_size, "\n"); |
41 | | - break; |
42 | | - case 2: // Delimiter is \r\n |
43 | | - test_data = create_test_data(data_size, "\r\n"); |
44 | | - break; |
45 | | - default: |
46 | | - test_data = create_test_data(data_size); |
47 | | - break; |
| 37 | + switch (delimiter_type) { |
| 38 | + case 0: // No delimiter |
| 39 | + test_data = create_test_data(data_size); |
| 40 | + break; |
| 41 | + case 1: // Delimiter is \n |
| 42 | + test_data = create_test_data(data_size, "\n"); |
| 43 | + break; |
| 44 | + case 2: // Delimiter is \r\n |
| 45 | + test_data = create_test_data(data_size, "\r\n"); |
| 46 | + break; |
| 47 | + default: |
| 48 | + test_data = create_test_data(data_size); |
| 49 | + break; |
48 | 50 | } |
49 | 51 |
|
50 | 52 | PlainTextLineReaderCtx ctx("\n", 1, false); |
51 | 53 | const auto* data = reinterpret_cast<const uint8_t*>(test_data.c_str()); |
52 | 54 | const size_t size = test_data.size(); |
53 | | - |
| 55 | + |
54 | 56 | for (auto _ : state) { |
55 | 57 | const auto* result = ctx.find_lf_crlf_line_sep(data, size); |
56 | 58 | benchmark::DoNotOptimize(result); |
57 | 59 | } |
58 | | - |
| 60 | + |
59 | 61 | state.SetBytesProcessed(state.iterations() * test_data.size()); |
60 | | - |
| 62 | + |
61 | 63 | std::string label = "size_" + std::to_string(data_size); |
62 | 64 | switch (delimiter_type) { |
63 | | - case 0: label += "_delim_no"; break; |
64 | | - case 1: label += "_delim_lf"; break; |
65 | | - case 2: label += "_delim_crlf"; break; |
66 | | - default: label += "_delim_no"; break; |
| 65 | + case 0: |
| 66 | + label += "_delim_no"; |
| 67 | + break; |
| 68 | + case 1: |
| 69 | + label += "_delim_lf"; |
| 70 | + break; |
| 71 | + case 2: |
| 72 | + label += "_delim_crlf"; |
| 73 | + break; |
| 74 | + default: |
| 75 | + label += "_delim_no"; |
| 76 | + break; |
67 | 77 | } |
68 | 78 | state.SetLabel(label); |
69 | 79 | } |
70 | 80 |
|
71 | 81 | BENCHMARK(BM_FindLfCrlfLineSep) |
72 | | - ->Unit(benchmark::kNanosecond) |
73 | | - ->Args({16, 0}) // 16 bytes, no delimiter |
74 | | - ->Args({16, 1}) // 16 bytes, delimiter is \n |
75 | | - ->Args({16, 2}) // 16 bytes, delimiter is \r\n |
76 | | - ->Args({32, 0}) // 32 bytes, no delimiter |
77 | | - ->Args({32, 1}) // 32 bytes, delimiter is \n |
78 | | - ->Args({32, 2}) // 32 bytes, delimiter is \r\n |
79 | | - ->Args({64, 0}) // 64 bytes, no delimiter |
80 | | - ->Args({64, 1}) // 64 bytes, delimiter is \n |
81 | | - ->Args({64, 2}) // 64 bytes, delimiter is \r\n |
82 | | - ->Args({128, 0}) // 128 bytes, no delimiter |
83 | | - ->Args({128, 1}) // 128 bytes, delimiter is \n |
84 | | - ->Args({128, 2}) // 128 bytes, delimiter is \r\n |
85 | | - ->Args({256, 0}) // 256 bytes, no delimiter |
86 | | - ->Args({256, 1}) // 256 bytes, delimiter is \n |
87 | | - ->Args({256, 2}) // 256 bytes, delimiter is \r\n |
88 | | - ->Args({512, 0}) // 512 bytes, no delimiter |
89 | | - ->Args({512, 1}) // 512 bytes, delimiter is \n |
90 | | - ->Args({512, 2}) // 512 bytes, delimiter is \r\n |
91 | | - ->Args({1024, 0}) // 1KB, no delimiter |
92 | | - ->Args({1024, 1}) // 1KB, delimiter is \n |
93 | | - ->Args({1024, 2}) // 1KB, delimiter is \r\n |
94 | | - ->Args({64 * 1024, 0}) // 64KB, no delimiter |
95 | | - ->Args({64 * 1024, 1}) // 64KB, delimiter is \n |
96 | | - ->Args({64 * 1024, 2}) // 64KB, delimiter is \r\n |
97 | | - ->Args({1024 * 1024, 0}) // 1MB, no delimiter |
98 | | - ->Args({1024 * 1024, 1}) // 1MB, delimiter is \n |
99 | | - ->Args({1024 * 1024, 2}) // 1MB, delimiter is \r\n |
100 | | - ->Repetitions(5) |
101 | | - ->DisplayAggregatesOnly(); |
| 82 | + ->Unit(benchmark::kNanosecond) |
| 83 | + ->Args({16, 0}) // 16 bytes, no delimiter |
| 84 | + ->Args({16, 1}) // 16 bytes, delimiter is \n |
| 85 | + ->Args({16, 2}) // 16 bytes, delimiter is \r\n |
| 86 | + ->Args({32, 0}) // 32 bytes, no delimiter |
| 87 | + ->Args({32, 1}) // 32 bytes, delimiter is \n |
| 88 | + ->Args({32, 2}) // 32 bytes, delimiter is \r\n |
| 89 | + ->Args({64, 0}) // 64 bytes, no delimiter |
| 90 | + ->Args({64, 1}) // 64 bytes, delimiter is \n |
| 91 | + ->Args({64, 2}) // 64 bytes, delimiter is \r\n |
| 92 | + ->Args({128, 0}) // 128 bytes, no delimiter |
| 93 | + ->Args({128, 1}) // 128 bytes, delimiter is \n |
| 94 | + ->Args({128, 2}) // 128 bytes, delimiter is \r\n |
| 95 | + ->Args({256, 0}) // 256 bytes, no delimiter |
| 96 | + ->Args({256, 1}) // 256 bytes, delimiter is \n |
| 97 | + ->Args({256, 2}) // 256 bytes, delimiter is \r\n |
| 98 | + ->Args({512, 0}) // 512 bytes, no delimiter |
| 99 | + ->Args({512, 1}) // 512 bytes, delimiter is \n |
| 100 | + ->Args({512, 2}) // 512 bytes, delimiter is \r\n |
| 101 | + ->Args({1024, 0}) // 1KB, no delimiter |
| 102 | + ->Args({1024, 1}) // 1KB, delimiter is \n |
| 103 | + ->Args({1024, 2}) // 1KB, delimiter is \r\n |
| 104 | + ->Args({64 * 1024, 0}) // 64KB, no delimiter |
| 105 | + ->Args({64 * 1024, 1}) // 64KB, delimiter is \n |
| 106 | + ->Args({64 * 1024, 2}) // 64KB, delimiter is \r\n |
| 107 | + ->Args({1024 * 1024, 0}) // 1MB, no delimiter |
| 108 | + ->Args({1024 * 1024, 1}) // 1MB, delimiter is \n |
| 109 | + ->Args({1024 * 1024, 2}) // 1MB, delimiter is \r\n |
| 110 | + ->Repetitions(5) |
| 111 | + ->DisplayAggregatesOnly(); |
102 | 112 |
|
103 | 113 | } // namespace doris |
0 commit comments