1
- #if defined(__linux__) || (__APPLE__ && __aarch64__)
1
+ #if defined(__linux__) || (__APPLE__ && __aarch64__)
2
2
#define USING_COUNTERS
3
3
#include " event_counter.h"
4
4
#endif
22
22
#include < vector>
23
23
#include < locale.h>
24
24
25
-
26
25
template <typename CharT>
27
26
double findmax_fastfloat64 (std::vector<std::basic_string<CharT>> &s) {
28
27
double answer = 0 ;
@@ -55,8 +54,9 @@ event_collector collector{};
55
54
56
55
#ifdef USING_COUNTERS
57
56
template <class T , class CharT >
58
- std::vector<event_count> time_it_ns (std::vector<std::basic_string<CharT>> &lines,
59
- T const &function, size_t repeat) {
57
+ std::vector<event_count>
58
+ time_it_ns (std::vector<std::basic_string<CharT>> &lines, T const &function,
59
+ size_t repeat) {
60
60
std::vector<event_count> aggregate;
61
61
bool printed_bug = false ;
62
62
for (size_t i = 0 ; i < repeat; i++) {
@@ -71,7 +71,8 @@ std::vector<event_count> time_it_ns(std::vector<std::basic_string<CharT>> &lines
71
71
return aggregate;
72
72
}
73
73
74
- void pretty_print (double volume, size_t number_of_floats, std::string name, std::vector<event_count> events) {
74
+ void pretty_print (double volume, size_t number_of_floats, std::string name,
75
+ std::vector<event_count> events) {
75
76
double volumeMB = volume / (1024 . * 1024 .);
76
77
double average_ns{0 };
77
78
double min_ns{DBL_MAX};
@@ -83,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, std:
83
84
double branches_avg{0 };
84
85
double branch_misses_min{0 };
85
86
double branch_misses_avg{0 };
86
- for (event_count e : events) {
87
+ for (event_count e : events) {
87
88
double ns = e.elapsed_ns ();
88
89
average_ns += ns;
89
90
min_ns = min_ns < ns ? min_ns : ns;
@@ -94,51 +95,46 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, std:
94
95
95
96
double instructions = e.instructions ();
96
97
instructions_avg += instructions;
97
- instructions_min = instructions_min < instructions ? instructions_min : instructions;
98
+ instructions_min =
99
+ instructions_min < instructions ? instructions_min : instructions;
98
100
99
101
double branches = e.branches ();
100
102
branches_avg += branches;
101
103
branches_min = branches_min < branches ? branches_min : branches;
102
104
103
105
double branch_misses = e.missed_branches ();
104
106
branch_misses_avg += branch_misses;
105
- branch_misses_min = branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
107
+ branch_misses_min =
108
+ branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
106
109
}
107
110
cycles_avg /= events.size ();
108
111
instructions_avg /= events.size ();
109
112
average_ns /= events.size ();
110
113
branches_avg /= events.size ();
111
114
printf (" %-40s: %8.2f MB/s (+/- %.1f %%) " , name.data (),
112
- volumeMB * 1000000000 / min_ns,
113
- (average_ns - min_ns) * 100.0 / average_ns);
114
- printf (" %8.2f Mfloat/s " ,
115
- number_of_floats * 1000 / min_ns);
116
- if (instructions_min > 0 ) {
117
- printf (" %8.2f i/B %8.2f i/f (+/- %.1f %%) " ,
118
- instructions_min / volume,
119
- instructions_min / number_of_floats,
115
+ volumeMB * 1000000000 / min_ns,
116
+ (average_ns - min_ns) * 100.0 / average_ns);
117
+ printf (" %8.2f Mfloat/s " , number_of_floats * 1000 / min_ns);
118
+ if (instructions_min > 0 ) {
119
+ printf (" %8.2f i/B %8.2f i/f (+/- %.1f %%) " , instructions_min / volume,
120
+ instructions_min / number_of_floats,
120
121
(instructions_avg - instructions_min) * 100.0 / instructions_avg);
121
122
122
- printf (" %8.2f c/B %8.2f c/f (+/- %.1f %%) " ,
123
- cycles_min / volume,
124
- cycles_min / number_of_floats,
123
+ printf (" %8.2f c/B %8.2f c/f (+/- %.1f %%) " , cycles_min / volume,
124
+ cycles_min / number_of_floats,
125
125
(cycles_avg - cycles_min) * 100.0 / cycles_avg);
126
- printf (" %8.2f i/c " ,
127
- instructions_min /cycles_min);
128
- printf (" %8.2f b/f " ,
129
- branches_avg /number_of_floats);
130
- printf (" %8.2f bm/f " ,
131
- branch_misses_avg /number_of_floats);
132
- printf (" %8.2f GHz " ,
133
- cycles_min / min_ns);
126
+ printf (" %8.2f i/c " , instructions_min / cycles_min);
127
+ printf (" %8.2f b/f " , branches_avg / number_of_floats);
128
+ printf (" %8.2f bm/f " , branch_misses_avg / number_of_floats);
129
+ printf (" %8.2f GHz " , cycles_min / min_ns);
134
130
}
135
131
printf (" \n " );
136
-
137
132
}
138
133
#else
139
134
template <class T , class CharT >
140
- std::pair<double , double > time_it_ns (std::vector<std::basic_string<CharT>> &lines,
141
- T const &function, size_t repeat) {
135
+ std::pair<double , double >
136
+ time_it_ns (std::vector<std::basic_string<CharT>> &lines, T const &function,
137
+ size_t repeat) {
142
138
std::chrono::high_resolution_clock::time_point t1, t2;
143
139
double average = 0 ;
144
140
double min_value = DBL_MAX;
@@ -160,21 +156,16 @@ std::pair<double, double> time_it_ns(std::vector<std::basic_string<CharT>> &line
160
156
return std::make_pair (min_value, average);
161
157
}
162
158
163
-
164
-
165
-
166
- void pretty_print (double volume, size_t number_of_floats, std::string name, std::pair<double ,double > result) {
159
+ void pretty_print (double volume, size_t number_of_floats, std::string name,
160
+ std::pair<double , double > result) {
167
161
double volumeMB = volume / (1024 . * 1024 .);
168
162
printf (" %-40s: %8.2f MB/s (+/- %.1f %%) " , name.data (),
169
- volumeMB * 1000000000 / result.first ,
170
- (result.second - result.first ) * 100.0 / result.second );
171
- printf (" %8.2f Mfloat/s " ,
172
- number_of_floats * 1000 / result.first );
173
- printf (" %8.2f ns/f \n " ,
174
- double (result.first ) /number_of_floats );
163
+ volumeMB * 1000000000 / result.first ,
164
+ (result.second - result.first ) * 100.0 / result.second );
165
+ printf (" %8.2f Mfloat/s " , number_of_floats * 1000 / result.first );
166
+ printf (" %8.2f ns/f \n " , double (result.first ) / number_of_floats);
175
167
}
176
- #endif
177
-
168
+ #endif
178
169
179
170
// this is okay, all chars are ASCII
180
171
inline std::u16string widen (std::string line) {
@@ -195,21 +186,23 @@ std::vector<std::u16string> widen(const std::vector<std::string> &lines) {
195
186
return u16lines;
196
187
}
197
188
198
-
199
189
void process (std::vector<std::string> &lines, size_t volume) {
200
190
size_t repeat = 100 ;
201
191
double volumeMB = volume / (1024 . * 1024 .);
202
192
std::cout << " ASCII volume = " << volumeMB << " MB " << std::endl;
203
- pretty_print (volume, lines.size (), " fastfloat (64)" , time_it_ns (lines, findmax_fastfloat64<char >, repeat));
204
- pretty_print (volume, lines.size (), " fastfloat (32)" , time_it_ns (lines, findmax_fastfloat32<char >, repeat));
193
+ pretty_print (volume, lines.size (), " fastfloat (64)" ,
194
+ time_it_ns (lines, findmax_fastfloat64<char >, repeat));
195
+ pretty_print (volume, lines.size (), " fastfloat (32)" ,
196
+ time_it_ns (lines, findmax_fastfloat32<char >, repeat));
205
197
206
198
std::vector<std::u16string> lines16 = widen (lines);
207
199
volume = 2 * volume;
208
200
volumeMB = volume / (1024 . * 1024 .);
209
201
std::cout << " UTF-16 volume = " << volumeMB << " MB " << std::endl;
210
- pretty_print (volume, lines.size (), " fastfloat (64)" , time_it_ns (lines16, findmax_fastfloat64<char16_t >, repeat));
211
- pretty_print (volume, lines.size (), " fastfloat (32)" , time_it_ns (lines16, findmax_fastfloat32<char16_t >, repeat));
212
-
202
+ pretty_print (volume, lines.size (), " fastfloat (64)" ,
203
+ time_it_ns (lines16, findmax_fastfloat64<char16_t >, repeat));
204
+ pretty_print (volume, lines.size (), " fastfloat (32)" ,
205
+ time_it_ns (lines16, findmax_fastfloat32<char16_t >, repeat));
213
206
}
214
207
215
208
void fileload (std::string filename) {
@@ -233,13 +226,14 @@ void fileload(std::string filename) {
233
226
process (lines, volume);
234
227
}
235
228
236
-
237
229
int main (int argc, char **argv) {
238
- if (collector.has_events ()) {
230
+ if (collector.has_events ()) {
239
231
std::cout << " # Using hardware counters" << std::endl;
240
232
} else {
241
- #if defined(__linux__) || (__APPLE__ && __aarch64__)
242
- std::cout << " # Hardware counters not available, try to run in privileged mode (e.g., sudo)." << std::endl;
233
+ #if defined(__linux__) || (__APPLE__ && __aarch64__)
234
+ std::cout << " # Hardware counters not available, try to run in privileged "
235
+ " mode (e.g., sudo)."
236
+ << std::endl;
243
237
#endif
244
238
}
245
239
fileload (std::string (BENCHMARK_DATA_DIR) + " /canada.txt" );
0 commit comments