55
66#include < array>
77#include < cassert>
8+ #include < cinttypes>
89#include < stdexcept>
910
1011#include " cache.h"
@@ -272,10 +273,10 @@ static TLoad& coop_tload_find_partner(Hart& hart, const TLoad& tload)
272273 LOG_HART (ERR, hart, " coop tload: tensor_coop does not match: expected 0x%08x, found 0x%08x" , tload.tcoop , other.tcoop );
273274 }
274275 if (tload.value != other.value ) {
275- WARN_HART (tensors, hart, " coop tload: CSR does not match: expected 0x%016lx , found 0x%016lx " , tload.value , other.value );
276+ WARN_HART (tensors, hart, " coop tload: CSR does not match: expected 0x%016 " PRIx64 " , found 0x%016 " PRIx64 , tload.value , other.value );
276277 }
277278 if (tload.stride != other.stride ) {
278- WARN_HART (tensors, hart, " coop tload: x31 does not match: expected 0x%016lx , found 0x%016lx " , tload.stride , other.stride );
279+ WARN_HART (tensors, hart, " coop tload: x31 does not match: expected 0x%016 " PRIx64 " , found 0x%016 " PRIx64 , tload.stride , other.stride );
279280 }
280281 return other;
281282}
@@ -354,7 +355,7 @@ void tensor_load_start(Hart& cpu, uint64_t control)
354355 LOG_REG (" :" , 31 );
355356
356357 const auto uuid = (tload.uuid = ++(cpu.core ->tensor_uuid ));
357- LOG_HART (DEBUG, cpu, " \t (TL-H%u-%lu ) Start TensorLoad with msk: %d, coop: %d, cmd: %d, "
358+ LOG_HART (DEBUG, cpu, " \t (TL-H%u-%" PRIu64 " ) Start TensorLoad with msk: %d, coop: %d, cmd: %d, "
358359 " start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: %u, rows: %d, "
359360 " stride: 0x%" PRIx64 " , id: %d" , cpu.mhartid , uuid, int (msk), int (coop),
360361 cmd, start, tenb, addr, boffset, rows, stride, id);
@@ -503,7 +504,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
503504
504505 switch (cmd) {
505506 case tload_cmd_load:
506- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoad with msk: %d, coop: %d, "
507+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoad with msk: %d, coop: %d, "
507508 " start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: %u, "
508509 " rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
509510 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -530,7 +531,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
530531 break ;
531532 case tload_cmd_interleave8:
532533 boffset *= 16 ;
533- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoadInterleave8 with msk: %d, "
534+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoadInterleave8 with msk: %d, "
534535 " coop: %d, start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: "
535536 " %u, rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
536537 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -567,7 +568,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
567568 break ;
568569 case tload_cmd_interleave16:
569570 boffset = (boffset & 0x2 ) * 16 ;
570- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoadInterleave16 with msk: %d, "
571+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoadInterleave16 with msk: %d, "
571572 " coop: %d, start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: "
572573 " %u, rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
573574 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -604,7 +605,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
604605 break ;
605606 case tload_cmd_transpose8:
606607 boffset *= 16 ;
607- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoadTranspose8 with msk: %d, "
608+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoadTranspose8 with msk: %d, "
608609 " coop: %d, start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: "
609610 " %u, rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
610611 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -638,7 +639,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
638639 break ;
639640 case tload_cmd_transpose16:
640641 boffset = (boffset & 0x2 ) * 8 ;
641- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoadTranspose16 with msk: %d, "
642+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoadTranspose16 with msk: %d, "
642643 " coop: %d, start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: "
643644 " %u, rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
644645 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -671,7 +672,7 @@ void tensor_load_execute(Hart& cpu, int tlid, bool tenb)
671672 }
672673 break ;
673674 case tload_cmd_transpose32:
674- LOG_HART (DEBUG, cpu, " (TL-H%u-%lu ) Execute TensorLoadTranspose32 with msk: %d, "
675+ LOG_HART (DEBUG, cpu, " (TL-H%u-%" PRIu64 " ) Execute TensorLoadTranspose32 with msk: %d, "
675676 " coop: %d, start: %d, tenb: %d, addr: 0x%" PRIx64 " , boffset: "
676677 " %u, rows: %d, stride: 0x%" PRIx64 " , id: %d, tmask: 0x%lx" ,
677678 cpu.mhartid , tload.uuid , int (msk), int (coop), start, tenb, addr, boffset,
@@ -789,7 +790,7 @@ void tensor_quant_start(Hart& cpu, uint64_t value)
789790 set_rounding_mode (cpu, FRM);
790791
791792 const auto uuid = (cpu.core ->tquant .uuid = ++(cpu.core ->tensor_uuid ));
792- LOG_HART (DEBUG, cpu, " \t (TQ-H%u-%lu ) Start TensorQuant with start %u, arows: %u, "
793+ LOG_HART (DEBUG, cpu, " \t (TQ-H%u-%" PRIu64 " ) Start TensorQuant with start %u, arows: %u, "
793794 " acols: %u, freg: %u, frm: %s" , cpu.mhartid , uuid, start, arows, acols, freg,
794795 get_rounding_mode (cpu, FRM));
795796
@@ -857,7 +858,7 @@ void tensor_quant_execute(Hart& cpu)
857858 start = start % L1_SCP_ENTRIES;
858859
859860 const auto uuid = cpu.core ->tquant .uuid ;
860- LOG_HART (DEBUG, cpu, " (TQ-H%u-%lu ) Execute TensorQuant with start: %u, arows: %u, "
861+ LOG_HART (DEBUG, cpu, " (TQ-H%u-%" PRIu64 " ) Execute TensorQuant with start: %u, arows: %u, "
861862 " acols: %u, freg: %u, frm: %s" , cpu.mhartid , uuid, start, arows, acols, freg,
862863 get_rounding_mode (cpu, cpu.core ->tquant .frm ));
863864
@@ -1096,7 +1097,7 @@ void tensor_store_start(Hart& cpu, uint64_t tstorereg)
10961097 LOG_REG (" :" , 31 );
10971098
10981099 const auto uuid = (cpu.core ->tstore .uuid = ++(cpu.core ->tensor_uuid ));
1099- LOG_HART (DEBUG, cpu, " \t (TS-H%u-%lu ) Start TensorStore with addr: %016" PRIx64 " , "
1100+ LOG_HART (DEBUG, cpu, " \t (TS-H%u-%" PRIu64 " ) Start TensorStore with addr: %016" PRIx64 " , "
11001101 " stride: %016" PRIx64 " , regstart: %d, rows: %d, cols: %d, "
11011102 " srcinc: %d, coop: %d" , cpu.mhartid , uuid, addr, stride, regstart, rows,
11021103 cols, srcinc, coop);
@@ -1167,7 +1168,7 @@ void tensor_store_execute(Hart& cpu)
11671168 notify_tensor_store (cpu, false , rows, cols, coop);
11681169
11691170 const auto uuid = cpu.core ->tstore .uuid ;
1170- LOG_HART (DEBUG, cpu, " (TS-H%u-%lu ) Execute TensorStore with addr: %016" PRIx64 " , "
1171+ LOG_HART (DEBUG, cpu, " (TS-H%u-%" PRIu64 " ) Execute TensorStore with addr: %016" PRIx64 " , "
11711172 " stride: %016" PRIx64 " , regstart: %d, rows: %d, cols: %d, srcinc: %d, "
11721173 " coop: %d" , cpu.mhartid , uuid, addr, stride, regstart, rows, cols, srcinc, coop);
11731174
@@ -1238,7 +1239,7 @@ static void tensor_fma32_execute(Hart& cpu)
12381239 }
12391240
12401241 const auto uuid = cpu.core ->tmul .uuid ;
1241- LOG_HART (DEBUG, cpu, " (TM-H%u-%lu ) Execute TensorFMA32 with msk: %d, bcols: %d, "
1242+ LOG_HART (DEBUG, cpu, " (TM-H%u-%" PRIu64 " ) Execute TensorFMA32 with msk: %d, bcols: %d, "
12421243 " arows: %d, acols: %d, aoffset: %d, tenb: %d, bstart: %d, "
12431244 " astart: %d, mul: %d, rm: %s, tmask: 0x%lx" , cpu.mhartid , uuid, usemsk,
12441245 bcols, arows, acols, aoffset, tenb, bstart, astart, first_pass,
@@ -1337,7 +1338,7 @@ static void tensor_fma16a32_execute(Hart& cpu)
13371338 }
13381339
13391340 const auto uuid = cpu.core ->tmul .uuid ;
1340- LOG_HART (DEBUG, cpu, " (TM-H%u-%lu ) Execute TensorFMA16A32 with msk: %d, bcols: %d, "
1341+ LOG_HART (DEBUG, cpu, " (TM-H%u-%" PRIu64 " ) Execute TensorFMA16A32 with msk: %d, bcols: %d, "
13411342 " arows: %d, acols: %d, aoffset: %d, tenb: %d, bstart: %d, "
13421343 " astart: %d, mul: %d, rm: rtz, tmask: 0x%lx" , cpu.mhartid , uuid, usemsk,
13431344 bcols, arows, acols, aoffset, tenb, bstart, astart, first_pass,
@@ -1442,7 +1443,7 @@ static void tensor_ima8a32_execute(Hart& cpu)
14421443 }
14431444
14441445 const auto uuid = cpu.core ->tmul .uuid ;
1445- LOG_HART (DEBUG, cpu, " (TM-H%u-%lu ) Execute TensorIMA8A32 with msk: %d, bcols: %d, "
1446+ LOG_HART (DEBUG, cpu, " (TM-H%u-%" PRIu64 " ) Execute TensorIMA8A32 with msk: %d, bcols: %d, "
14461447 " arows: %d, acols: %d, aoffset: %d, dst: %d, ub: %d, ua: %d, "
14471448 " tenb: %d, bstart: %d, astart: %d mul: %d, tmask: 0x%lx" , cpu.mhartid , uuid,
14481449 usemsk, bcols, arows, acols, aoffset, tenc2rf, ub, ua, tenb,
@@ -1635,20 +1636,20 @@ void tensor_fma_start(Hart& cpu, uint64_t control)
16351636 case tfma_type_fp32:
16361637 // Illegal instruction exception has higher priority than other errors
16371638 set_rounding_mode (cpu, FRM);
1638- LOG_HART (DEBUG, cpu, " \t (TM-H%u-%lu ) Start TensorFMA32 with msk: %d, bcols: %d, "
1639+ LOG_HART (DEBUG, cpu, " \t (TM-H%u-%" PRIu64 " ) Start TensorFMA32 with msk: %d, bcols: %d, "
16391640 " arows: %d, acols: %d, aoffset: %d, tenb: %d, bstart: %d, "
16401641 " astart: %d, mul: %d, rm: %s" , cpu.mhartid , uuid, msk, bcols, arows, acols,
16411642 aoffset, tenb, bstart, astart, mul,
16421643 get_rounding_mode (cpu, FRM));
16431644 break ;
16441645 case tfma_type_fp16:
1645- LOG_HART (DEBUG, cpu, " \t (TM-H%u-%lu ) Start TensorFMA16A32 with msk: %d, bcols: %d, "
1646+ LOG_HART (DEBUG, cpu, " \t (TM-H%u-%" PRIu64 " ) Start TensorFMA16A32 with msk: %d, bcols: %d, "
16461647 " arows: %d, acols: %d, aoffset: %d, tenb: %d, bstart: %d, "
16471648 " astart: %d, mul: %d, rm: rtz" , cpu.mhartid , uuid, msk, bcols, arows, acols*2 ,
16481649 aoffset*2 , tenb, bstart, astart, mul);
16491650 break ;
16501651 case tfma_type_int8:
1651- LOG_HART (DEBUG, cpu, " \t (TM-H%u-%lu ) Start TensorIMA8A32 with msk: %d, bcols: %d, "
1652+ LOG_HART (DEBUG, cpu, " \t (TM-H%u-%" PRIu64 " ) Start TensorIMA8A32 with msk: %d, bcols: %d, "
16521653 " arows: %d, acols: %d, aoffset: %d, dst: %d, ub: %d, ua: %d, "
16531654 " tenb: %d, bstart: %d, astart: %d mul: %d" , cpu.mhartid , uuid, msk, bcols,
16541655 arows, acols*4 , aoffset*4 , dst, ub, ua, tenb, bstart, astart, mul);
@@ -1861,7 +1862,7 @@ void tensor_reduce_start(Hart& cpu, uint64_t value)
18611862 }
18621863
18631864 const auto uuid = (reduce.uuid = ++(cpu.core ->tensor_uuid ));
1864- LOG_HART (DEBUG, cpu, " \t (TR-H%u-%lu ) Start %s(%s) with partner: H%u, freg: %u, "
1865+ LOG_HART (DEBUG, cpu, " \t (TR-H%u-%" PRIu64 " ) Start %s(%s) with partner: H%u, freg: %u, "
18651866 " count: %u" , cpu.mhartid , uuid, reducecmd[static_cast <int >(command)],
18661867 ((reduce.state == TReduce::State::waiting_to_receive)
18671868 ? " recv" : " send" ), reduce.hart ->mhartid , reduce.freg ,
@@ -2051,7 +2052,7 @@ void tensor_reduce_execute(Hart& cpu)
20512052#ifndef ZSIM
20522053 const auto uuid = cpu.core ->reduce .uuid ;
20532054 LOG_HART (DEBUG, cpu,
2054- " (TR-H%u-%lu ) Execute tensor reduce with sender=H%u funct=%s count=%u rmode=%s" ,
2055+ " (TR-H%u-%" PRIu64 " ) Execute tensor reduce with sender=H%u funct=%s count=%u rmode=%s" ,
20552056 cpu.mhartid , uuid,
20562057 snd_cpu.mhartid , fnctnm[cpu.core ->reduce .funct ],
20572058 cpu.core ->reduce .count ,
0 commit comments