@@ -1253,51 +1253,32 @@ class vk_memory_logger {
12531253#define VK_LOG_MEMORY(msg) ((void) 0)
12541254#endif // GGML_VULKAN_MEMORY_DEBUG
12551255
1256- class vk_perf_logger {
1257- public:
1258- void print_timings() {
1259- if (timings.empty()) {
1260- return;
1261- }
1262- std::stringstream ss;
1263- uint64_t total_all_op_times = 0;
1264- ss << "----------------\nVulkan Timings:" << std::endl;
1265- for (const auto & t : timings) {
1266- uint64_t total_op_times = 0;
1267- for (const auto & time : t.second) {
1268- total_op_times += time;
1269- }
1270- ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1271- << " us";
1272-
1273- // If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1274- auto it = flops.find(t.first);
1275- if (it != flops.end() && (it->second).size() == t.second.size()) {
1276- uint64_t total_op_flops = 0;
1277- for (const auto & elem : it->second) {
1278- total_op_flops += elem;
1279- }
1280- ss << " ("
1281- << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1282- (double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1283- << " GFLOPS/s)";
1284- }
1256+ struct triplet_timing_info {
1257+ std::string operation_type; // Specific Vulkan matmul operation name
1258+ std::string triplet;
1259+ std::string size_combination;
1260+ std::vector<uint64_t> times;
1261+ uint64_t total_time = 0;
1262+ size_t count = 0;
1263+ double avg_time_us = 0.0;
1264+ };
12851265
1286- total_all_op_times += total_op_times;
1266+ class vk_perf_logger {
1267+ public:
1268+ // Simplified approach without context storage
12871269
1288- ss << std::endl;
1289- }
1270+ void print_timings() {
1271+ GGML_LOG_DEBUG("================\nVulkan Profiling Results:\n================\n\n");
1272+ print_legacy_timings();
1273+ print_triplet_timings();
12901274
1291- if (timings.size() > 0) {
1292- ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1293- }
1294- auto ssStr = ss.str();
1295- GGML_LOG_DEBUG("%s", ssStr.c_str());
12961275 timings.clear();
12971276 flops.clear();
1277+ triplet_timings.clear();
12981278 }
12991279
13001280 void log_timing(const ggml_tensor * node, uint64_t time) {
1281+ // Legacy timing tracking (for backward compatibility)
13011282 if (node->op == GGML_OP_UNARY) {
13021283 timings[ggml_unary_op_name(ggml_get_unary_op(node))].push_back(time);
13031284 return;
@@ -1320,6 +1301,13 @@ class vk_perf_logger {
13201301 }
13211302 timings[name].push_back(time);
13221303 flops[name].push_back(m * n * (k + (k - 1)) * batch);
1304+
1305+ // Enhanced triplet tracking for matrix operations
1306+ if (time > 0) { // Only log when we have actual timing data
1307+ // Determine operation type based on tensor characteristics
1308+ std::string op_type = determine_matmul_operation_type(node);
1309+ log_triplet_timing(node, time, op_type);
1310+ }
13231311 return;
13241312 }
13251313 if (node->op == GGML_OP_CONV_2D) {
@@ -1351,11 +1339,181 @@ class vk_perf_logger {
13511339 }
13521340 timings[ggml_op_name(node->op)].push_back(time);
13531341 }
1354- private:
1342+
1343+ private:
13551344 std::map<std::string, std::vector<uint64_t>> timings;
13561345 std::map<std::string, std::vector<uint64_t>> flops;
1346+ std::map<std::string, triplet_timing_info> triplet_timings;
1347+
1348+ void log_triplet_timing(const ggml_tensor * node, uint64_t time, const std::string& operation_type = "generic_matmul") {
1349+ if (!node || !node->src[0] || !node->src[1] || time == 0) return;
1350+
1351+ try {
1352+ // Extract tensor type information
1353+ std::string src0_type = ggml_type_name(node->src[0]->type);
1354+ std::string src1_type = ggml_type_name(node->src[1]->type);
1355+ std::string dst_type = ggml_type_name(node->type);
1356+
1357+ // Create triplet string (using safe ASCII characters)
1358+ std::string triplet = src0_type + " x " + src1_type + " -> " + dst_type;
1359+
1360+ // Extract dimensions
1361+ const uint64_t m = node->src[0]->ne[1];
1362+ const uint64_t n = node->src[1]->ne[1];
1363+ const uint64_t k = node->src[1]->ne[0];
1364+
1365+ // Create size combination string
1366+ std::string size_combo = "[" + std::to_string(k) + "x" + std::to_string(m) + "] x " +
1367+ "[" + std::to_string(k) + "x" + std::to_string(n) + "] -> " +
1368+ "[" + std::to_string(m) + "x" + std::to_string(n) + "]";
1369+
1370+ // Create combined key including operation type
1371+ std::string full_key = operation_type + " | " + triplet + " | " + size_combo;
1372+
1373+ // Update triplet timing info
1374+ auto& info = triplet_timings[full_key];
1375+ info.operation_type = operation_type;
1376+ info.triplet = triplet;
1377+ info.size_combination = size_combo;
1378+ info.times.push_back(time);
1379+ info.total_time += time;
1380+ info.count++;
1381+ info.avg_time_us = (info.total_time / info.count) / 1000.0;
1382+ } catch (...) {
1383+ // Silently ignore errors to prevent crashes
1384+ }
1385+ }
1386+
1387+ // Determine operation type based on tensor characteristics
1388+ std::string determine_matmul_operation_type(const ggml_tensor * node) {
1389+ if (!node || !node->src[0] || !node->src[1]) return "unknown_matmul";
1390+
1391+ try {
1392+ const uint64_t n = node->src[1]->ne[1]; // Number of columns in second tensor
1393+ const bool is_permuted_src0 = ggml_is_permuted(node->src[0]);
1394+ const bool is_permuted_src1 = ggml_is_permuted(node->src[1]);
1395+ const bool is_contiguous_src0 = ggml_is_contiguous(node->src[0]);
1396+ const ggml_type src0_type = node->src[0]->type;
1397+ // const ggml_type src1_type = node->src[1]->type; // unused for now
1398+
1399+ // Logic based on ggml_vk_mul_mat function selection
1400+ if (src0_type == GGML_TYPE_F16 && is_permuted_src0 && is_permuted_src1 && n == 1) {
1401+ return "ggml_vk_mul_mat_vec_p021_f16_f32";
1402+ } else if (src0_type == GGML_TYPE_F16 && !is_contiguous_src0 && !ggml_is_transposed(node->src[1]) && n == 1 &&
1403+ !is_permuted_src0 && !is_permuted_src1) {
1404+ return "ggml_vk_mul_mat_vec_nc_f16_f32";
1405+ } else if (n == 1 && (src0_type == GGML_TYPE_F32 || src0_type == GGML_TYPE_F16 ||
1406+ src0_type == GGML_TYPE_BF16 || ggml_is_quantized(src0_type))) {
1407+ return "ggml_vk_mul_mat_vec_q_f16";
1408+ } else if (node->op == GGML_OP_MUL_MAT_ID) {
1409+ if (n == 1) {
1410+ return "ggml_vk_mul_mat_vec_id_q_f16";
1411+ } else {
1412+ return "ggml_vk_mul_mat_id_q_f16";
1413+ }
1414+ } else {
1415+ return "ggml_vk_mul_mat_q_f16";
1416+ }
1417+ } catch (...) {
1418+ return "unknown_matmul";
1419+ }
1420+ }
1421+
1422+ void print_legacy_timings() {
1423+ if (timings.empty()) {
1424+ return;
1425+ }
1426+ std::stringstream ss;
1427+ uint64_t total_all_op_times = 0;
1428+ ss << "Legacy Timing Summary:\n---------------------" << std::endl;
1429+ for (const auto & t : timings) {
1430+ uint64_t total_op_times = 0;
1431+ for (const auto & time : t.second) {
1432+ total_op_times += time;
1433+ }
1434+ ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1435+ << " us";
1436+
1437+ // If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1438+ auto it = flops.find(t.first);
1439+ if (it != flops.end() && (it->second).size() == t.second.size()) {
1440+ uint64_t total_op_flops = 0;
1441+ for (const auto & elem : it->second) {
1442+ total_op_flops += elem;
1443+ }
1444+ ss << " ("
1445+ << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1446+ (double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1447+ << " GFLOPS/s)";
1448+ }
1449+
1450+ total_all_op_times += total_op_times;
1451+
1452+ ss << std::endl;
1453+ }
1454+
1455+ if (timings.size() > 0) {
1456+ ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1457+ }
1458+ GGML_LOG_DEBUG("%s", ss.str().c_str());
1459+ }
1460+
1461+ void print_triplet_timings() {
1462+ std::stringstream ss;
1463+ try {
1464+ if (triplet_timings.empty()) {
1465+ ss << "No triplet timing data available." << std::endl;
1466+ return;
1467+ }
1468+
1469+ ss << "Enhanced Triplet Timing Analysis by Operation Type:" << std::endl;
1470+ ss << "====================================================" << std::endl;
1471+
1472+ // Group by operation type
1473+ std::map<std::string, std::vector<triplet_timing_info*>> operation_groups;
1474+ for (auto& [key, info] : triplet_timings) {
1475+ operation_groups[info.operation_type].push_back(&info);
1476+ }
1477+
1478+ // Print results organized by operation type
1479+ for (const auto& [operation_type, entries] : operation_groups) {
1480+ ss << "\n" << operation_type << ":" << std::endl;
1481+ ss << std::string(operation_type.length() + 1, '-') << std::endl;
1482+
1483+ uint64_t operation_total_time = 0;
1484+ size_t operation_total_count = 0;
1485+
1486+ for (const auto& info : entries) {
1487+ if (info->count > 0) {
1488+ ss << " " << info->triplet << " | " << info->size_combination
1489+ << ": " << info->count << " ops, "
1490+ << std::fixed << std::setprecision(2) << info->avg_time_us << " us avg" << std::endl;
1491+
1492+ operation_total_time += info->total_time;
1493+ operation_total_count += info->count;
1494+ }
1495+ }
1496+
1497+ if (operation_total_count > 0) {
1498+ double operation_avg_us = (operation_total_time / operation_total_count) / 1000.0;
1499+ ss << " → " << operation_type << " Summary: " << operation_total_count
1500+ << " total ops, " << std::fixed << std::setprecision(2) << operation_avg_us << " us avg" << std::endl;
1501+ }
1502+ }
1503+
1504+ ss << "\nOverall Statistics:" << std::endl;
1505+ ss << "Total operation types: " << operation_groups.size() << std::endl;
1506+ ss << "Total variations: " << triplet_timings.size() << std::endl;
1507+ ss << std::endl;
1508+ } catch (...) {
1509+ GGML_LOG_DEBUG("%s", ss.str().c_str());
1510+ GGML_LOG_DEBUG("Error in triplet timing analysis - analysis skipped.\n");
1511+ }
1512+ }
13571513};
13581514
1515+ // Thread-local storage definition is now handled above the class
1516+
13591517struct ggml_backend_vk_context {
13601518 std::string name;
13611519
0 commit comments