Skip to content

Commit 360a3e6

Browse files
gianni-corjpgaribotti
authored andcommitted
vk profiling triplet
1 parent 8f6f46d commit 360a3e6

File tree

1 file changed

+196
-38
lines changed

1 file changed

+196
-38
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 196 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,51 +1253,32 @@ class vk_memory_logger {
12531253
#define VK_LOG_MEMORY(msg) ((void) 0)
12541254
#endif // GGML_VULKAN_MEMORY_DEBUG
12551255

1256-
class vk_perf_logger {
1257-
public:
1258-
void print_timings() {
1259-
if (timings.empty()) {
1260-
return;
1261-
}
1262-
std::stringstream ss;
1263-
uint64_t total_all_op_times = 0;
1264-
ss << "----------------\nVulkan Timings:" << std::endl;
1265-
for (const auto & t : timings) {
1266-
uint64_t total_op_times = 0;
1267-
for (const auto & time : t.second) {
1268-
total_op_times += time;
1269-
}
1270-
ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1271-
<< " us";
1272-
1273-
// If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1274-
auto it = flops.find(t.first);
1275-
if (it != flops.end() && (it->second).size() == t.second.size()) {
1276-
uint64_t total_op_flops = 0;
1277-
for (const auto & elem : it->second) {
1278-
total_op_flops += elem;
1279-
}
1280-
ss << " ("
1281-
<< (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1282-
(double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1283-
<< " GFLOPS/s)";
1284-
}
1256+
struct triplet_timing_info {
1257+
std::string operation_type; // Specific Vulkan matmul operation name
1258+
std::string triplet;
1259+
std::string size_combination;
1260+
std::vector<uint64_t> times;
1261+
uint64_t total_time = 0;
1262+
size_t count = 0;
1263+
double avg_time_us = 0.0;
1264+
};
12851265

1286-
total_all_op_times += total_op_times;
1266+
class vk_perf_logger {
1267+
public:
1268+
// Simplified approach without context storage
12871269

1288-
ss << std::endl;
1289-
}
1270+
void print_timings() {
1271+
GGML_LOG_DEBUG("================\nVulkan Profiling Results:\n================\n\n");
1272+
print_legacy_timings();
1273+
print_triplet_timings();
12901274

1291-
if (timings.size() > 0) {
1292-
ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1293-
}
1294-
auto ssStr = ss.str();
1295-
GGML_LOG_DEBUG("%s", ssStr.c_str());
12961275
timings.clear();
12971276
flops.clear();
1277+
triplet_timings.clear();
12981278
}
12991279

13001280
void log_timing(const ggml_tensor * node, uint64_t time) {
1281+
// Legacy timing tracking (for backward compatibility)
13011282
if (node->op == GGML_OP_UNARY) {
13021283
timings[ggml_unary_op_name(ggml_get_unary_op(node))].push_back(time);
13031284
return;
@@ -1320,6 +1301,13 @@ class vk_perf_logger {
13201301
}
13211302
timings[name].push_back(time);
13221303
flops[name].push_back(m * n * (k + (k - 1)) * batch);
1304+
1305+
// Enhanced triplet tracking for matrix operations
1306+
if (time > 0) { // Only log when we have actual timing data
1307+
// Determine operation type based on tensor characteristics
1308+
std::string op_type = determine_matmul_operation_type(node);
1309+
log_triplet_timing(node, time, op_type);
1310+
}
13231311
return;
13241312
}
13251313
if (node->op == GGML_OP_CONV_2D) {
@@ -1351,11 +1339,181 @@ class vk_perf_logger {
13511339
}
13521340
timings[ggml_op_name(node->op)].push_back(time);
13531341
}
1354-
private:
1342+
1343+
private:
13551344
std::map<std::string, std::vector<uint64_t>> timings;
13561345
std::map<std::string, std::vector<uint64_t>> flops;
1346+
std::map<std::string, triplet_timing_info> triplet_timings;
1347+
1348+
void log_triplet_timing(const ggml_tensor * node, uint64_t time, const std::string& operation_type = "generic_matmul") {
1349+
if (!node || !node->src[0] || !node->src[1] || time == 0) return;
1350+
1351+
try {
1352+
// Extract tensor type information
1353+
std::string src0_type = ggml_type_name(node->src[0]->type);
1354+
std::string src1_type = ggml_type_name(node->src[1]->type);
1355+
std::string dst_type = ggml_type_name(node->type);
1356+
1357+
// Create triplet string (using safe ASCII characters)
1358+
std::string triplet = src0_type + " x " + src1_type + " -> " + dst_type;
1359+
1360+
// Extract dimensions
1361+
const uint64_t m = node->src[0]->ne[1];
1362+
const uint64_t n = node->src[1]->ne[1];
1363+
const uint64_t k = node->src[1]->ne[0];
1364+
1365+
// Create size combination string
1366+
std::string size_combo = "[" + std::to_string(k) + "x" + std::to_string(m) + "] x " +
1367+
"[" + std::to_string(k) + "x" + std::to_string(n) + "] -> " +
1368+
"[" + std::to_string(m) + "x" + std::to_string(n) + "]";
1369+
1370+
// Create combined key including operation type
1371+
std::string full_key = operation_type + " | " + triplet + " | " + size_combo;
1372+
1373+
// Update triplet timing info
1374+
auto& info = triplet_timings[full_key];
1375+
info.operation_type = operation_type;
1376+
info.triplet = triplet;
1377+
info.size_combination = size_combo;
1378+
info.times.push_back(time);
1379+
info.total_time += time;
1380+
info.count++;
1381+
info.avg_time_us = (info.total_time / info.count) / 1000.0;
1382+
} catch (...) {
1383+
// Silently ignore errors to prevent crashes
1384+
}
1385+
}
1386+
1387+
// Determine operation type based on tensor characteristics
1388+
std::string determine_matmul_operation_type(const ggml_tensor * node) {
1389+
if (!node || !node->src[0] || !node->src[1]) return "unknown_matmul";
1390+
1391+
try {
1392+
const uint64_t n = node->src[1]->ne[1]; // Number of columns in second tensor
1393+
const bool is_permuted_src0 = ggml_is_permuted(node->src[0]);
1394+
const bool is_permuted_src1 = ggml_is_permuted(node->src[1]);
1395+
const bool is_contiguous_src0 = ggml_is_contiguous(node->src[0]);
1396+
const ggml_type src0_type = node->src[0]->type;
1397+
// const ggml_type src1_type = node->src[1]->type; // unused for now
1398+
1399+
// Logic based on ggml_vk_mul_mat function selection
1400+
if (src0_type == GGML_TYPE_F16 && is_permuted_src0 && is_permuted_src1 && n == 1) {
1401+
return "ggml_vk_mul_mat_vec_p021_f16_f32";
1402+
} else if (src0_type == GGML_TYPE_F16 && !is_contiguous_src0 && !ggml_is_transposed(node->src[1]) && n == 1 &&
1403+
!is_permuted_src0 && !is_permuted_src1) {
1404+
return "ggml_vk_mul_mat_vec_nc_f16_f32";
1405+
} else if (n == 1 && (src0_type == GGML_TYPE_F32 || src0_type == GGML_TYPE_F16 ||
1406+
src0_type == GGML_TYPE_BF16 || ggml_is_quantized(src0_type))) {
1407+
return "ggml_vk_mul_mat_vec_q_f16";
1408+
} else if (node->op == GGML_OP_MUL_MAT_ID) {
1409+
if (n == 1) {
1410+
return "ggml_vk_mul_mat_vec_id_q_f16";
1411+
} else {
1412+
return "ggml_vk_mul_mat_id_q_f16";
1413+
}
1414+
} else {
1415+
return "ggml_vk_mul_mat_q_f16";
1416+
}
1417+
} catch (...) {
1418+
return "unknown_matmul";
1419+
}
1420+
}
1421+
1422+
void print_legacy_timings() {
1423+
if (timings.empty()) {
1424+
return;
1425+
}
1426+
std::stringstream ss;
1427+
uint64_t total_all_op_times = 0;
1428+
ss << "Legacy Timing Summary:\n---------------------" << std::endl;
1429+
for (const auto & t : timings) {
1430+
uint64_t total_op_times = 0;
1431+
for (const auto & time : t.second) {
1432+
total_op_times += time;
1433+
}
1434+
ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1435+
<< " us";
1436+
1437+
// If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1438+
auto it = flops.find(t.first);
1439+
if (it != flops.end() && (it->second).size() == t.second.size()) {
1440+
uint64_t total_op_flops = 0;
1441+
for (const auto & elem : it->second) {
1442+
total_op_flops += elem;
1443+
}
1444+
ss << " ("
1445+
<< (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1446+
(double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1447+
<< " GFLOPS/s)";
1448+
}
1449+
1450+
total_all_op_times += total_op_times;
1451+
1452+
ss << std::endl;
1453+
}
1454+
1455+
if (timings.size() > 0) {
1456+
ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1457+
}
1458+
GGML_LOG_DEBUG("%s", ss.str().c_str());
1459+
}
1460+
1461+
void print_triplet_timings() {
1462+
std::stringstream ss;
1463+
try {
1464+
if (triplet_timings.empty()) {
1465+
ss << "No triplet timing data available." << std::endl;
1466+
return;
1467+
}
1468+
1469+
ss << "Enhanced Triplet Timing Analysis by Operation Type:" << std::endl;
1470+
ss << "====================================================" << std::endl;
1471+
1472+
// Group by operation type
1473+
std::map<std::string, std::vector<triplet_timing_info*>> operation_groups;
1474+
for (auto& [key, info] : triplet_timings) {
1475+
operation_groups[info.operation_type].push_back(&info);
1476+
}
1477+
1478+
// Print results organized by operation type
1479+
for (const auto& [operation_type, entries] : operation_groups) {
1480+
ss << "\n" << operation_type << ":" << std::endl;
1481+
ss << std::string(operation_type.length() + 1, '-') << std::endl;
1482+
1483+
uint64_t operation_total_time = 0;
1484+
size_t operation_total_count = 0;
1485+
1486+
for (const auto& info : entries) {
1487+
if (info->count > 0) {
1488+
ss << " " << info->triplet << " | " << info->size_combination
1489+
<< ": " << info->count << " ops, "
1490+
<< std::fixed << std::setprecision(2) << info->avg_time_us << " us avg" << std::endl;
1491+
1492+
operation_total_time += info->total_time;
1493+
operation_total_count += info->count;
1494+
}
1495+
}
1496+
1497+
if (operation_total_count > 0) {
1498+
double operation_avg_us = (operation_total_time / operation_total_count) / 1000.0;
1499+
ss << " → " << operation_type << " Summary: " << operation_total_count
1500+
<< " total ops, " << std::fixed << std::setprecision(2) << operation_avg_us << " us avg" << std::endl;
1501+
}
1502+
}
1503+
1504+
ss << "\nOverall Statistics:" << std::endl;
1505+
ss << "Total operation types: " << operation_groups.size() << std::endl;
1506+
ss << "Total variations: " << triplet_timings.size() << std::endl;
1507+
ss << std::endl;
1508+
} catch (...) {
1509+
GGML_LOG_DEBUG("%s", ss.str().c_str());
1510+
GGML_LOG_DEBUG("Error in triplet timing analysis - analysis skipped.\n");
1511+
}
1512+
}
13571513
};
13581514

1515+
// Thread-local storage definition is now handled above the class
1516+
13591517
struct ggml_backend_vk_context {
13601518
std::string name;
13611519

0 commit comments

Comments
 (0)