33
33
#include " pti_assert.h"
34
34
#include < inttypes.h>
35
35
36
-
37
36
constexpr static uint64_t min_dummy_instance_id = 1024 * 1024 ; // min dummy instance id if idle sampling is enabled
38
- constexpr static uint32_t max_metric_size = 512 ;
39
- static uint32_t max_metric_samples = 32768 ;
37
+ constexpr static uint32_t max_metric_samples = 32768 ;
40
38
41
- #define MAX_METRIC_BUFFER (max_metric_samples * max_metric_size* 2 )
39
+ #define MAX_METRIC_BUFFER (8ULL * 1024ULL * 1024ULL )
42
40
43
41
inline void PrintDeviceList () {
44
42
ze_result_t status = zeInit (ZE_INIT_FLAG_GPU_ONLY);
@@ -203,7 +201,6 @@ struct ZeDeviceDescriptor {
203
201
std::atomic<ZeProfilerState> profiling_state_;
204
202
std::string metric_file_name_;
205
203
std::ofstream metric_file_stream_;
206
- std::vector<uint8_t > metric_data_;
207
204
bool stall_sampling_;
208
205
};
209
206
@@ -526,7 +523,7 @@ class ZeMetricProfiler {
526
523
}
527
524
528
525
void ComputeMetrics () {
529
- uint8_t *raw_metrics = ( uint8_t *) malloc (sizeof (uint8_t ) * ( MAX_METRIC_BUFFER + 512 ));
526
+ auto *raw_metrics = static_cast < uint8_t *>( malloc (sizeof (uint8_t )* MAX_METRIC_BUFFER));
530
527
UniMemory::ExitIfOutOfMemory ((void *)raw_metrics);
531
528
532
529
for (auto it = device_descriptors_.begin (); it != device_descriptors_.end (); ++it) {
@@ -611,8 +608,27 @@ class ZeMetricProfiler {
611
608
}
612
609
613
610
while (!inf.eof ()) {
614
- inf.read (reinterpret_cast <char *>(raw_metrics), MAX_METRIC_BUFFER + 512 );
611
+ // Read metric data in two stages, first actual size (in bytes), followed by actual metrics
612
+ uint64_t data_size;
613
+ inf.read (reinterpret_cast <char *>(&data_size), sizeof (data_size));
614
+ if (inf.eof ()) {
615
+ // If we reached EOF, we can stop processing
616
+ break ;
617
+ }
618
+ if (inf.gcount () != sizeof (data_size)) {
619
+ std::cerr << " [WARNING] Intermediate metrics file is invalid. Cannot find the size of the next data segment. Output likely to be incomplete." << std::endl;
620
+ break ;
621
+ }
622
+ if (data_size > MAX_METRIC_BUFFER) {
623
+ std::cerr << " [WARNING] Intermediate metrics file is invalid. Next chunk cannot be larger than the allocated buffer. Output likely to be incomplete." << std::endl;
624
+ break ;
625
+ }
626
+ inf.read (reinterpret_cast <char *>(raw_metrics), data_size);
615
627
int raw_size = inf.gcount ();
628
+ if (raw_size < data_size) {
629
+ std::cerr << " [WARNING] Intermediate metrics file is incomplete. Expecting " << data_size << " bytes but only " << raw_size << " bytes were found. Output likely to be incomplete." << std::endl;
630
+ break ;
631
+ }
616
632
if (raw_size > 0 ) {
617
633
uint32_t num_samples = 0 ;
618
634
uint32_t num_metrics = 0 ;
@@ -838,8 +854,27 @@ class ZeMetricProfiler {
838
854
uint64_t cur_sampling_ts = 0 ;
839
855
auto kit = kinfo.begin ();
840
856
while (!inf.eof ()) {
841
- inf.read (reinterpret_cast <char *>(raw_metrics), MAX_METRIC_BUFFER + 512 );
857
+ // Read metric data in two stages, first actual size (in bytes), followed by actual metrics
858
+ uint64_t data_size;
859
+ inf.read (reinterpret_cast <char *>(&data_size), sizeof (data_size));
860
+ if (inf.eof ()) {
861
+ // If we reached EOF, we can stop processing
862
+ break ;
863
+ }
864
+ if (inf.gcount () != sizeof (data_size)) {
865
+ std::cerr << " [WARNING] Intermediate metrics file is invalid. Cannot find the size of the next data segment. Output likely to be incomplete." << std::endl;
866
+ break ;
867
+ }
868
+ if (data_size > MAX_METRIC_BUFFER) {
869
+ std::cerr << " [WARNING] Intermediate metrics file is invalid. Next chunk cannot be larger than the allocated buffer. Output likely to be incomplete." << std::endl;
870
+ break ;
871
+ }
872
+ inf.read (reinterpret_cast <char *>(raw_metrics), data_size);
842
873
int raw_size = inf.gcount ();
874
+ if (raw_size < data_size) {
875
+ std::cerr << " [WARNING] Intermediate metrics file is incomplete. Expecting " << data_size << " bytes but only " << raw_size << " bytes were found. Output likely to be incomplete." << std::endl;
876
+ break ;
877
+ }
843
878
if (raw_size > 0 ) {
844
879
uint32_t num_samples = 0 ;
845
880
uint32_t num_metrics = 0 ;
@@ -951,7 +986,7 @@ class ZeMetricProfiler {
951
986
inf.close ();
952
987
}
953
988
}
954
- free (raw_metrics);
989
+ free (raw_metrics);
955
990
}
956
991
957
992
private:
@@ -1054,6 +1089,7 @@ class ZeMetricProfiler {
1054
1089
PTI_ASSERT (status == ZE_RESULT_SUCCESS);
1055
1090
}
1056
1091
else {
1092
+ // if (status == ZE_RESULT_NOT_READY)
1057
1093
return 0 ;
1058
1094
}
1059
1095
@@ -1075,12 +1111,12 @@ class ZeMetricProfiler {
1075
1111
ze_event_pool_desc_t event_pool_desc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr , ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1 };
1076
1112
status = zeEventPoolCreate (context, &event_pool_desc, 1 , &device, &event_pool);
1077
1113
PTI_ASSERT (status == ZE_RESULT_SUCCESS);
1078
-
1114
+
1079
1115
ze_event_handle_t event = nullptr ;
1080
1116
ze_event_desc_t event_desc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr , 0 , ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST};
1081
1117
status = zeEventCreate (event_pool, &event_desc, &event);
1082
1118
PTI_ASSERT (status == ZE_RESULT_SUCCESS);
1083
-
1119
+
1084
1120
zet_metric_streamer_handle_t streamer = nullptr ;
1085
1121
uint32_t interval = std::stoi (utils::GetEnv (" UNITRACE_SamplingInterval" )) * 1000 ; // convert us to ns
1086
1122
@@ -1102,37 +1138,36 @@ class ZeMetricProfiler {
1102
1138
desc->profiling_state_ .store (PROFILER_ENABLED, std::memory_order_release);
1103
1139
return ;
1104
1140
}
1105
-
1106
- if (streamer_desc.notifyEveryNReports > max_metric_samples) {
1107
- max_metric_samples = streamer_desc.notifyEveryNReports ;
1108
- }
1109
-
1141
+
1110
1142
std::vector<std::string> metrics_list;
1111
1143
metrics_list = GetMetricList (group);
1112
1144
PTI_ASSERT (!metrics_list.empty ());
1113
1145
1114
- uint8_t *raw_metrics = ( uint8_t *) malloc (sizeof (uint8_t ) * ( MAX_METRIC_BUFFER + 512 ));
1146
+ auto *raw_metrics = static_cast < uint8_t *>( malloc (sizeof (uint8_t )* MAX_METRIC_BUFFER));
1115
1147
UniMemory::ExitIfOutOfMemory ((void *)raw_metrics);
1116
1148
1149
+ auto dump_metrics = [](uint8_t *buffer, uint64_t size, std::ofstream *f) {
1150
+ // Write metric data in two stages, first actual size (in bytes), followed by actual metrics
1151
+ f->write (reinterpret_cast <char *>(&size), sizeof (size));
1152
+ f->write (reinterpret_cast <char *>(buffer), size);
1153
+ };
1154
+
1117
1155
desc->profiling_state_ .store (PROFILER_ENABLED, std::memory_order_release);
1118
1156
while (desc->profiling_state_ .load (std::memory_order_acquire) != PROFILER_DISABLED) {
1119
- uint64_t size = EventBasedReadMetrics (event, streamer, raw_metrics, (MAX_METRIC_BUFFER + 512 ));
1120
- if (size == 0 ) {
1121
- if (!desc->metric_data_ .empty ()) {
1122
- desc->metric_file_stream_ .write (reinterpret_cast <char *>(desc->metric_data_ .data ()), desc->metric_data_ .size ());
1123
- desc->metric_data_ .clear ();
1124
- }
1125
- continue ;
1157
+ auto size = EventBasedReadMetrics (event, streamer, raw_metrics, MAX_METRIC_BUFFER);
1158
+ if (size > 0 ) {
1159
+ // If we have data, dump it to the intermediate file
1160
+ dump_metrics (raw_metrics, size, &desc->metric_file_stream_ );
1126
1161
}
1127
- desc->metric_data_ .insert (desc->metric_data_ .end (), raw_metrics, raw_metrics + size);
1128
1162
}
1129
- auto size = ReadMetrics (streamer, raw_metrics, (MAX_METRIC_BUFFER + 512 ));
1130
- desc->metric_data_ .insert (desc->metric_data_ .end (), raw_metrics, raw_metrics + size);
1131
- if (!desc->metric_data_ .empty ()) {
1132
- desc->metric_file_stream_ .write (reinterpret_cast <char *>(desc->metric_data_ .data ()), desc->metric_data_ .size ());
1133
- desc->metric_data_ .clear ();
1163
+
1164
+ // Flush the remaining metrics after the profiler has stopped
1165
+ auto size = ReadMetrics (streamer, raw_metrics, MAX_METRIC_BUFFER);
1166
+ while (size > 0 ) {
1167
+ dump_metrics (raw_metrics, size, &desc->metric_file_stream_ );
1168
+ size = ReadMetrics (streamer, raw_metrics, MAX_METRIC_BUFFER);
1134
1169
}
1135
- free (raw_metrics);
1170
+ free (raw_metrics);
1136
1171
1137
1172
status = zetMetricStreamerClose (streamer);
1138
1173
PTI_ASSERT (status == ZE_RESULT_SUCCESS);
0 commit comments