@@ -94,8 +94,13 @@ static bool is_pow2(uint32_t x) { return x > 1 && (x & (x-1)) == 0; }
9494 } \
9595 } while (0)
9696
97- #ifdef GGML_VULKAN_DEBUG
98- #define VK_LOG_DEBUG(msg) std::cerr << msg << std::endl
97+ #if defined(GGML_VULKAN_DEBUG)
98+ #define VK_LOG_DEBUG(msg) \
99+ do { \
100+ std::stringstream ss; \
101+ ss << msg; \
102+ GGML_LOG_DEBUG("%s", ss.str().c_str()); \
103+ } while (0)
99104#else
100105#define VK_LOG_DEBUG(msg) ((void) 0)
101106#endif // GGML_VULKAN_DEBUG
@@ -1238,51 +1243,32 @@ class vk_memory_logger {
12381243#define VK_LOG_MEMORY(msg) ((void) 0)
12391244#endif // GGML_VULKAN_MEMORY_DEBUG
12401245
1241- class vk_perf_logger {
1242- public:
1243- void print_timings() {
1244- if (timings.empty()) {
1245- return;
1246- }
1247- std::stringstream ss;
1248- uint64_t total_all_op_times = 0;
1249- ss << "----------------\nVulkan Timings:" << std::endl;
1250- for (const auto & t : timings) {
1251- uint64_t total_op_times = 0;
1252- for (const auto & time : t.second) {
1253- total_op_times += time;
1254- }
1255- ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1256- << " us";
1257-
1258- // If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1259- auto it = flops.find(t.first);
1260- if (it != flops.end() && (it->second).size() == t.second.size()) {
1261- uint64_t total_op_flops = 0;
1262- for (const auto & elem : it->second) {
1263- total_op_flops += elem;
1264- }
1265- ss << " ("
1266- << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1267- (double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1268- << " GFLOPS/s)";
1269- }
1246+ struct triplet_timing_info {
1247+ std::string operation_type; // Specific Vulkan matmul operation name
1248+ std::string triplet;
1249+ std::string size_combination;
1250+ std::vector<uint64_t> times;
1251+ uint64_t total_time = 0;
1252+ size_t count = 0;
1253+ double avg_time_us = 0.0;
1254+ };
12701255
1271- total_all_op_times += total_op_times;
1256+ class vk_perf_logger {
1257+ public:
1258+ // Simplified approach without context storage
12721259
1273- ss << std::endl;
1274- }
1260+ void print_timings() {
1261+ GGML_LOG_DEBUG("================\nVulkan Profiling Results:\n================\n\n");
1262+ print_legacy_timings();
1263+ print_triplet_timings();
12751264
1276- if (timings.size() > 0) {
1277- ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1278- }
1279- auto ssStr = ss.str();
1280- GGML_LOG_DEBUG("%s", ssStr.c_str());
12811265 timings.clear();
12821266 flops.clear();
1267+ triplet_timings.clear();
12831268 }
12841269
12851270 void log_timing(const ggml_tensor * node, uint64_t time) {
1271+ // Legacy timing tracking (for backward compatibility)
12861272 if (node->op == GGML_OP_UNARY) {
12871273 timings[ggml_unary_op_name(ggml_get_unary_op(node))].push_back(time);
12881274 return;
@@ -1305,6 +1291,13 @@ class vk_perf_logger {
13051291 }
13061292 timings[name].push_back(time);
13071293 flops[name].push_back(m * n * (k + (k - 1)) * batch);
1294+
1295+ // Enhanced triplet tracking for matrix operations
1296+ if (time > 0) { // Only log when we have actual timing data
1297+ // Determine operation type based on tensor characteristics
1298+ std::string op_type = determine_matmul_operation_type(node);
1299+ log_triplet_timing(node, time, op_type);
1300+ }
13081301 return;
13091302 }
13101303 if (node->op == GGML_OP_CONV_2D) {
@@ -1336,11 +1329,182 @@ class vk_perf_logger {
13361329 }
13371330 timings[ggml_op_name(node->op)].push_back(time);
13381331 }
1339- private:
1332+
1333+ private:
13401334 std::map<std::string, std::vector<uint64_t>> timings;
13411335 std::map<std::string, std::vector<uint64_t>> flops;
1336+ std::map<std::string, triplet_timing_info> triplet_timings;
1337+
1338+ void log_triplet_timing(const ggml_tensor * node, uint64_t time, const std::string& operation_type = "generic_matmul") {
1339+ if (!node || !node->src[0] || !node->src[1] || time == 0) return;
1340+
1341+ try {
1342+ // Extract tensor type information
1343+ std::string src0_type = ggml_type_name(node->src[0]->type);
1344+ std::string src1_type = ggml_type_name(node->src[1]->type);
1345+ std::string dst_type = ggml_type_name(node->type);
1346+
1347+ // Create triplet string (using safe ASCII characters)
1348+ std::string triplet = src0_type + " x " + src1_type + " -> " + dst_type;
1349+
1350+ // Extract dimensions
1351+ const uint64_t m = node->src[0]->ne[1];
1352+ const uint64_t n = node->src[1]->ne[1];
1353+ const uint64_t k = node->src[1]->ne[0];
1354+
1355+ // Create size combination string
1356+ std::string size_combo = "[" + std::to_string(k) + "x" + std::to_string(m) + "] x " +
1357+ "[" + std::to_string(k) + "x" + std::to_string(n) + "] -> " +
1358+ "[" + std::to_string(m) + "x" + std::to_string(n) + "]";
1359+
1360+ // Create combined key including operation type
1361+ std::string full_key = operation_type + " | " + triplet + " | " + size_combo;
1362+
1363+ // Update triplet timing info
1364+ auto& info = triplet_timings[full_key];
1365+ info.operation_type = operation_type;
1366+ info.triplet = triplet;
1367+ info.size_combination = size_combo;
1368+ info.times.push_back(time);
1369+ info.total_time += time;
1370+ info.count++;
1371+ info.avg_time_us = (info.total_time / info.count) / 1000.0;
1372+ } catch (...) {
1373+ // Silently ignore errors to prevent crashes
1374+ }
1375+ }
1376+
1377+ // Determine operation type based on tensor characteristics
1378+ std::string determine_matmul_operation_type(const ggml_tensor * node) {
1379+ if (!node || !node->src[0] || !node->src[1]) return "unknown_matmul";
1380+
1381+ try {
1382+ const uint64_t n = node->src[1]->ne[1]; // Number of columns in second tensor
1383+ const bool is_permuted_src0 = ggml_is_permuted(node->src[0]);
1384+ const bool is_permuted_src1 = ggml_is_permuted(node->src[1]);
1385+ const bool is_contiguous_src0 = ggml_is_contiguous(node->src[0]);
1386+ const ggml_type src0_type = node->src[0]->type;
1387+ // const ggml_type src1_type = node->src[1]->type; // unused for now
1388+
1389+ // Logic based on ggml_vk_mul_mat function selection
1390+ if (src0_type == GGML_TYPE_F16 && is_permuted_src0 && is_permuted_src1 && n == 1) {
1391+ return "ggml_vk_mul_mat_vec_p021_f16_f32";
1392+ } else if (src0_type == GGML_TYPE_F16 && !is_contiguous_src0 && !ggml_is_transposed(node->src[1]) && n == 1 &&
1393+ !is_permuted_src0 && !is_permuted_src1) {
1394+ return "ggml_vk_mul_mat_vec_nc_f16_f32";
1395+ } else if (n == 1 && (src0_type == GGML_TYPE_F32 || src0_type == GGML_TYPE_F16 ||
1396+ src0_type == GGML_TYPE_BF16 || ggml_is_quantized(src0_type))) {
1397+ return "ggml_vk_mul_mat_vec_q_f16";
1398+ } else if (node->op == GGML_OP_MUL_MAT_ID) {
1399+ if (n == 1) {
1400+ return "ggml_vk_mul_mat_vec_id_q_f16";
1401+ } else {
1402+ return "ggml_vk_mul_mat_id_q_f16";
1403+ }
1404+ } else {
1405+ return "ggml_vk_mul_mat_q_f16";
1406+ }
1407+ } catch (...) {
1408+ return "unknown_matmul";
1409+ }
1410+ }
1411+
1412+ void print_legacy_timings() {
1413+ if (timings.empty()) {
1414+ return;
1415+ }
1416+ std::stringstream ss;
1417+ uint64_t total_all_op_times = 0;
1418+ ss << "Legacy Timing Summary:\n---------------------" << std::endl;
1419+ for (const auto & t : timings) {
1420+ uint64_t total_op_times = 0;
1421+ for (const auto & time : t.second) {
1422+ total_op_times += time;
1423+ }
1424+ ss << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0)
1425+ << " us";
1426+
1427+ // If we have as many flops entries as timing entries for the op, then compute and log the flops/S.
1428+ auto it = flops.find(t.first);
1429+ if (it != flops.end() && (it->second).size() == t.second.size()) {
1430+ uint64_t total_op_flops = 0;
1431+ for (const auto & elem : it->second) {
1432+ total_op_flops += elem;
1433+ }
1434+ ss << " ("
1435+ << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) /
1436+ (double(total_op_times) / (1000.0 * 1000.0 * 1000.0))
1437+ << " GFLOPS/s)";
1438+ }
1439+
1440+ total_all_op_times += total_op_times;
1441+
1442+ ss << std::endl;
1443+ }
1444+
1445+ if (timings.size() > 0) {
1446+ ss << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl;
1447+ }
1448+ ss << std::endl;
1449+ GGML_LOG_DEBUG("%s", ss.str().c_str());
1450+ }
1451+
1452+ void print_triplet_timings() {
1453+ std::stringstream ss;
1454+ try {
1455+ if (triplet_timings.empty()) {
1456+ ss << "No triplet timing data available." << std::endl;
1457+ return;
1458+ }
1459+
1460+ ss << "Enhanced Triplet Timing Analysis by Operation Type:" << std::endl;
1461+ ss << "====================================================" << std::endl;
1462+
1463+ // Group by operation type
1464+ std::map<std::string, std::vector<triplet_timing_info*>> operation_groups;
1465+ for (auto& [key, info] : triplet_timings) {
1466+ operation_groups[info.operation_type].push_back(&info);
1467+ }
1468+
1469+ // Print results organized by operation type
1470+ for (const auto& [operation_type, entries] : operation_groups) {
1471+ ss << "\n" << operation_type << ":" << std::endl;
1472+ ss << std::string(operation_type.length() + 1, '-') << std::endl;
1473+
1474+ uint64_t operation_total_time = 0;
1475+ size_t operation_total_count = 0;
1476+
1477+ for (const auto& info : entries) {
1478+ if (info->count > 0) {
1479+ ss << " " << info->triplet << " | " << info->size_combination
1480+ << ": " << info->count << " ops, "
1481+ << std::fixed << std::setprecision(2) << info->avg_time_us << " us avg" << std::endl;
1482+
1483+ operation_total_time += info->total_time;
1484+ operation_total_count += info->count;
1485+ }
1486+ }
1487+
1488+ if (operation_total_count > 0) {
1489+ double operation_avg_us = (operation_total_time / operation_total_count) / 1000.0;
1490+ ss << " → " << operation_type << " Summary: " << operation_total_count
1491+ << " total ops, " << std::fixed << std::setprecision(2) << operation_avg_us << " us avg" << std::endl;
1492+ }
1493+ }
1494+
1495+ ss << "\nOverall Statistics:" << std::endl;
1496+ ss << "Total operation types: " << operation_groups.size() << std::endl;
1497+ ss << "Total variations: " << triplet_timings.size() << std::endl;
1498+ ss << std::endl;
1499+ GGML_LOG_DEBUG("%s", ss.str().c_str());
1500+ } catch (...) {
1501+ GGML_LOG_DEBUG("Error in triplet timing analysis - analysis skipped.\n");
1502+ }
1503+ }
13421504};
13431505
1506+ // Thread-local storage definition is now handled above the class
1507+
13441508struct ggml_backend_vk_context {
13451509 std::string name;
13461510
0 commit comments