|
2 | 2 | #include "Context/Context.h" |
3 | 3 | #include "Data/Metric.h" |
4 | 4 | #include "Device.h" |
5 | | -#include "nlohmann/json.hpp" |
6 | 5 |
|
7 | 6 | #include <limits> |
8 | 7 | #include <map> |
9 | 8 | #include <mutex> |
10 | 9 | #include <set> |
11 | 10 | #include <stdexcept> |
12 | 11 |
|
13 | | -using json = nlohmann::json; |
14 | | - |
15 | 12 | namespace proton { |
16 | 13 |
|
17 | 14 | class TreeData::Tree { |
@@ -106,6 +103,134 @@ class TreeData::Tree { |
106 | 103 | std::map<size_t, TreeNode> treeNodeMap; |
107 | 104 | }; |
108 | 105 |
|
| 106 | +json TreeData::buildHatchetJson(TreeData::Tree *tree) const { |
| 107 | + std::map<size_t, json *> jsonNodes; |
| 108 | + json output = json::array(); |
| 109 | + output.push_back(json::object()); |
| 110 | + jsonNodes[TreeData::Tree::TreeNode::RootId] = &(output.back()); |
| 111 | + std::set<std::string> inclusiveValueNames; |
| 112 | + std::map<uint64_t, std::set<uint64_t>> deviceIds; |
| 113 | + tree->template walk<TreeData::Tree::WalkPolicy::PreOrder>( |
| 114 | + [&](TreeData::Tree::TreeNode &treeNode) { |
| 115 | + const auto contextName = treeNode.name; |
| 116 | + auto contextId = treeNode.id; |
| 117 | + json *jsonNode = jsonNodes[contextId]; |
| 118 | + (*jsonNode)["frame"] = {{"name", contextName}, {"type", "function"}}; |
| 119 | + (*jsonNode)["metrics"] = json::object(); |
| 120 | + for (auto [metricKind, metric] : treeNode.metrics) { |
| 121 | + if (metricKind == MetricKind::Kernel) { |
| 122 | + std::shared_ptr<KernelMetric> kernelMetric = |
| 123 | + std::dynamic_pointer_cast<KernelMetric>(metric); |
| 124 | + uint64_t duration = std::get<uint64_t>( |
| 125 | + kernelMetric->getValue(KernelMetric::Duration)); |
| 126 | + uint64_t invocations = std::get<uint64_t>( |
| 127 | + kernelMetric->getValue(KernelMetric::Invocations)); |
| 128 | + uint64_t deviceId = std::get<uint64_t>( |
| 129 | + kernelMetric->getValue(KernelMetric::DeviceId)); |
| 130 | + uint64_t deviceType = std::get<uint64_t>( |
| 131 | + kernelMetric->getValue(KernelMetric::DeviceType)); |
| 132 | + std::string deviceTypeName = |
| 133 | + getDeviceTypeString(static_cast<DeviceType>(deviceType)); |
| 134 | + (*jsonNode)["metrics"] |
| 135 | + [kernelMetric->getValueName(KernelMetric::Duration)] = |
| 136 | + duration; |
| 137 | + (*jsonNode)["metrics"] |
| 138 | + [kernelMetric->getValueName(KernelMetric::Invocations)] = |
| 139 | + invocations; |
| 140 | + (*jsonNode)["metrics"] |
| 141 | + [kernelMetric->getValueName(KernelMetric::DeviceId)] = |
| 142 | + std::to_string(deviceId); |
| 143 | + (*jsonNode)["metrics"] |
| 144 | + [kernelMetric->getValueName(KernelMetric::DeviceType)] = |
| 145 | + deviceTypeName; |
| 146 | + inclusiveValueNames.insert( |
| 147 | + kernelMetric->getValueName(KernelMetric::Duration)); |
| 148 | + inclusiveValueNames.insert( |
| 149 | + kernelMetric->getValueName(KernelMetric::Invocations)); |
| 150 | + deviceIds[deviceType].insert(deviceId); |
| 151 | + } else if (metricKind == MetricKind::PCSampling) { |
| 152 | + auto pcSamplingMetric = |
| 153 | + std::dynamic_pointer_cast<PCSamplingMetric>(metric); |
| 154 | + for (size_t i = 0; i < PCSamplingMetric::Count; i++) { |
| 155 | + auto valueName = pcSamplingMetric->getValueName(i); |
| 156 | + inclusiveValueNames.insert(valueName); |
| 157 | + std::visit( |
| 158 | + [&](auto &&value) { |
| 159 | + (*jsonNode)["metrics"][valueName] = value; |
| 160 | + }, |
| 161 | + pcSamplingMetric->getValues()[i]); |
| 162 | + } |
| 163 | + } else if (metricKind == MetricKind::Cycle) { |
| 164 | + auto cycleMetric = std::dynamic_pointer_cast<CycleMetric>(metric); |
| 165 | + uint64_t duration = std::get<uint64_t>( |
| 166 | + cycleMetric->getValue(CycleMetric::Duration)); |
| 167 | + double normalizedDuration = std::get<double>( |
| 168 | + cycleMetric->getValue(CycleMetric::NormalizedDuration)); |
| 169 | + uint64_t deviceId = std::get<uint64_t>( |
| 170 | + cycleMetric->getValue(CycleMetric::DeviceId)); |
| 171 | + uint64_t deviceType = std::get<uint64_t>( |
| 172 | + cycleMetric->getValue(CycleMetric::DeviceType)); |
| 173 | + (*jsonNode)["metrics"] |
| 174 | + [cycleMetric->getValueName(CycleMetric::Duration)] = |
| 175 | + duration; |
| 176 | + (*jsonNode)["metrics"][cycleMetric->getValueName( |
| 177 | + CycleMetric::NormalizedDuration)] = normalizedDuration; |
| 178 | + (*jsonNode)["metrics"] |
| 179 | + [cycleMetric->getValueName(CycleMetric::DeviceId)] = |
| 180 | + std::to_string(deviceId); |
| 181 | + (*jsonNode)["metrics"] |
| 182 | + [cycleMetric->getValueName(CycleMetric::DeviceType)] = |
| 183 | + std::to_string(deviceType); |
| 184 | + deviceIds[deviceType].insert(deviceId); |
| 185 | + } else if (metricKind == MetricKind::Flexible) { |
| 186 | + // Flexible metrics are handled in a different way |
| 187 | + } else { |
| 188 | + throw std::runtime_error("MetricKind not supported"); |
| 189 | + } |
| 190 | + } |
| 191 | + for (auto [_, flexibleMetric] : treeNode.flexibleMetrics) { |
| 192 | + auto valueName = flexibleMetric.getValueName(0); |
| 193 | + if (!flexibleMetric.isExclusive(0)) |
| 194 | + inclusiveValueNames.insert(valueName); |
| 195 | + std::visit( |
| 196 | + [&](auto &&value) { (*jsonNode)["metrics"][valueName] = value; }, |
| 197 | + flexibleMetric.getValues()[0]); |
| 198 | + } |
| 199 | + (*jsonNode)["children"] = json::array(); |
| 200 | + auto children = treeNode.children; |
| 201 | + for (auto _ : children) { |
| 202 | + (*jsonNode)["children"].push_back(json::object()); |
| 203 | + } |
| 204 | + auto idx = 0; |
| 205 | + for (auto child : children) { |
| 206 | + auto [index, childId] = child; |
| 207 | + jsonNodes[childId] = &(*jsonNode)["children"][idx]; |
| 208 | + idx++; |
| 209 | + } |
| 210 | + }); |
| 211 | + for (auto valueName : inclusiveValueNames) { |
| 212 | + output[TreeData::Tree::TreeNode::RootId]["metrics"][valueName] = 0; |
| 213 | + } |
| 214 | + output.push_back(json::object()); |
| 215 | + auto &deviceJson = output.back(); |
| 216 | + for (auto [deviceType, deviceIdSet] : deviceIds) { |
| 217 | + auto deviceTypeName = |
| 218 | + getDeviceTypeString(static_cast<DeviceType>(deviceType)); |
| 219 | + if (!deviceJson.contains(deviceTypeName)) |
| 220 | + deviceJson[deviceTypeName] = json::object(); |
| 221 | + for (auto deviceId : deviceIdSet) { |
| 222 | + Device device = getDevice(static_cast<DeviceType>(deviceType), deviceId); |
| 223 | + deviceJson[deviceTypeName][std::to_string(deviceId)] = { |
| 224 | + {"clock_rate", device.clockRate}, |
| 225 | + {"memory_clock_rate", device.memoryClockRate}, |
| 226 | + {"bus_width", device.busWidth}, |
| 227 | + {"arch", device.arch}, |
| 228 | + {"num_sms", device.numSms}}; |
| 229 | + } |
| 230 | + } |
| 231 | + return output; |
| 232 | +} |
| 233 | + |
109 | 234 | void TreeData::enterScope(const Scope &scope) { |
110 | 235 | // enterOp and addMetric maybe called from different threads |
111 | 236 | std::unique_lock<std::shared_mutex> lock(mutex); |
@@ -201,136 +326,16 @@ void TreeData::clear() { |
201 | 326 | } |
202 | 327 |
|
203 | 328 | void TreeData::dumpHatchet(std::ostream &os) const { |
204 | | - std::map<size_t, json *> jsonNodes; |
205 | | - json output = json::array(); |
206 | | - output.push_back(json::object()); |
207 | | - jsonNodes[Tree::TreeNode::RootId] = &(output.back()); |
208 | | - std::set<std::string> inclusiveValueNames; |
209 | | - std::map<uint64_t, std::set<uint64_t>> deviceIds; |
210 | | - this->tree->template walk<Tree::WalkPolicy::PreOrder>([&](Tree::TreeNode |
211 | | - &treeNode) { |
212 | | - const auto contextName = treeNode.name; |
213 | | - auto contextId = treeNode.id; |
214 | | - json *jsonNode = jsonNodes[contextId]; |
215 | | - (*jsonNode)["frame"] = {{"name", contextName}, {"type", "function"}}; |
216 | | - (*jsonNode)["metrics"] = json::object(); |
217 | | - for (auto [metricKind, metric] : treeNode.metrics) { |
218 | | - if (metricKind == MetricKind::Kernel) { |
219 | | - std::shared_ptr<KernelMetric> kernelMetric = |
220 | | - std::dynamic_pointer_cast<KernelMetric>(metric); |
221 | | - uint64_t duration = |
222 | | - std::get<uint64_t>(kernelMetric->getValue(KernelMetric::Duration)); |
223 | | - uint64_t invocations = std::get<uint64_t>( |
224 | | - kernelMetric->getValue(KernelMetric::Invocations)); |
225 | | - uint64_t deviceId = |
226 | | - std::get<uint64_t>(kernelMetric->getValue(KernelMetric::DeviceId)); |
227 | | - uint64_t deviceType = std::get<uint64_t>( |
228 | | - kernelMetric->getValue(KernelMetric::DeviceType)); |
229 | | - std::string deviceTypeName = |
230 | | - getDeviceTypeString(static_cast<DeviceType>(deviceType)); |
231 | | - (*jsonNode)["metrics"] |
232 | | - [kernelMetric->getValueName(KernelMetric::Duration)] = |
233 | | - duration; |
234 | | - (*jsonNode)["metrics"] |
235 | | - [kernelMetric->getValueName(KernelMetric::Invocations)] = |
236 | | - invocations; |
237 | | - (*jsonNode)["metrics"] |
238 | | - [kernelMetric->getValueName(KernelMetric::DeviceId)] = |
239 | | - std::to_string(deviceId); |
240 | | - (*jsonNode)["metrics"] |
241 | | - [kernelMetric->getValueName(KernelMetric::DeviceType)] = |
242 | | - deviceTypeName; |
243 | | - inclusiveValueNames.insert( |
244 | | - kernelMetric->getValueName(KernelMetric::Duration)); |
245 | | - inclusiveValueNames.insert( |
246 | | - kernelMetric->getValueName(KernelMetric::Invocations)); |
247 | | - deviceIds[deviceType].insert(deviceId); |
248 | | - } else if (metricKind == MetricKind::PCSampling) { |
249 | | - auto pcSamplingMetric = |
250 | | - std::dynamic_pointer_cast<PCSamplingMetric>(metric); |
251 | | - for (size_t i = 0; i < PCSamplingMetric::Count; i++) { |
252 | | - auto valueName = pcSamplingMetric->getValueName(i); |
253 | | - inclusiveValueNames.insert(valueName); |
254 | | - std::visit( |
255 | | - [&](auto &&value) { (*jsonNode)["metrics"][valueName] = value; }, |
256 | | - pcSamplingMetric->getValues()[i]); |
257 | | - } |
258 | | - } else if (metricKind == MetricKind::Cycle) { |
259 | | - auto cycleMetric = std::dynamic_pointer_cast<CycleMetric>(metric); |
260 | | - uint64_t duration = |
261 | | - std::get<uint64_t>(cycleMetric->getValue(CycleMetric::Duration)); |
262 | | - double normalizedDuration = std::get<double>( |
263 | | - cycleMetric->getValue(CycleMetric::NormalizedDuration)); |
264 | | - uint64_t deviceId = |
265 | | - std::get<uint64_t>(cycleMetric->getValue(CycleMetric::DeviceId)); |
266 | | - uint64_t deviceType = |
267 | | - std::get<uint64_t>(cycleMetric->getValue(CycleMetric::DeviceType)); |
268 | | - (*jsonNode)["metrics"] |
269 | | - [cycleMetric->getValueName(CycleMetric::Duration)] = |
270 | | - duration; |
271 | | - (*jsonNode)["metrics"][cycleMetric->getValueName( |
272 | | - CycleMetric::NormalizedDuration)] = normalizedDuration; |
273 | | - (*jsonNode)["metrics"] |
274 | | - [cycleMetric->getValueName(CycleMetric::DeviceId)] = |
275 | | - std::to_string(deviceId); |
276 | | - (*jsonNode)["metrics"] |
277 | | - [cycleMetric->getValueName(CycleMetric::DeviceType)] = |
278 | | - std::to_string(deviceType); |
279 | | - deviceIds[deviceType].insert(deviceId); |
280 | | - } else if (metricKind == MetricKind::Flexible) { |
281 | | - // Flexible metrics are handled in a different way |
282 | | - } else { |
283 | | - throw std::runtime_error("MetricKind not supported"); |
284 | | - } |
285 | | - } |
286 | | - for (auto [_, flexibleMetric] : treeNode.flexibleMetrics) { |
287 | | - auto valueName = flexibleMetric.getValueName(0); |
288 | | - if (!flexibleMetric.isExclusive(0)) |
289 | | - inclusiveValueNames.insert(valueName); |
290 | | - std::visit( |
291 | | - [&](auto &&value) { (*jsonNode)["metrics"][valueName] = value; }, |
292 | | - flexibleMetric.getValues()[0]); |
293 | | - } |
294 | | - (*jsonNode)["children"] = json::array(); |
295 | | - auto children = treeNode.children; |
296 | | - for (auto _ : children) { |
297 | | - (*jsonNode)["children"].push_back(json::object()); |
298 | | - } |
299 | | - auto idx = 0; |
300 | | - for (auto child : children) { |
301 | | - auto [index, childId] = child; |
302 | | - jsonNodes[childId] = &(*jsonNode)["children"][idx]; |
303 | | - idx++; |
304 | | - } |
305 | | - }); |
306 | | - // Hints for all inclusive metrics |
307 | | - for (auto valueName : inclusiveValueNames) { |
308 | | - output[Tree::TreeNode::RootId]["metrics"][valueName] = 0; |
309 | | - } |
310 | | - // Prepare the device information |
311 | | - // Note that this is done from the application thread, |
312 | | - // query device information from the tool thread (e.g., CUPTI) will have |
313 | | - // problems |
314 | | - output.push_back(json::object()); |
315 | | - auto &deviceJson = output.back(); |
316 | | - for (auto [deviceType, deviceIdSet] : deviceIds) { |
317 | | - auto deviceTypeName = |
318 | | - getDeviceTypeString(static_cast<DeviceType>(deviceType)); |
319 | | - if (!deviceJson.contains(deviceTypeName)) |
320 | | - deviceJson[deviceTypeName] = json::object(); |
321 | | - for (auto deviceId : deviceIdSet) { |
322 | | - Device device = getDevice(static_cast<DeviceType>(deviceType), deviceId); |
323 | | - deviceJson[deviceTypeName][std::to_string(deviceId)] = { |
324 | | - {"clock_rate", device.clockRate}, |
325 | | - {"memory_clock_rate", device.memoryClockRate}, |
326 | | - {"bus_width", device.busWidth}, |
327 | | - {"arch", device.arch}, |
328 | | - {"num_sms", device.numSms}}; |
329 | | - } |
330 | | - } |
| 329 | + auto output = buildHatchetJson(tree.get()); |
331 | 330 | os << std::endl << output.dump(4) << std::endl; |
332 | 331 | } |
333 | 332 |
|
| 333 | +std::string TreeData::toJsonString() const { |
| 334 | + std::shared_lock<std::shared_mutex> lock(mutex); |
| 335 | + auto output = buildHatchetJson(tree.get()); |
| 336 | + return output.dump(); |
| 337 | +} |
| 338 | + |
334 | 339 | void TreeData::doDump(std::ostream &os, OutputFormat outputFormat) const { |
335 | 340 | if (outputFormat == OutputFormat::Hatchet) { |
336 | 341 | dumpHatchet(os); |
|
0 commit comments