@@ -629,6 +629,7 @@ inline int32_t getShuffleBlockSize(int epilogueTileM)
629629inline bool checkAndUpdateGemmOptions (
630630 GemmOptions& options, tg::CudaArch cudaArch, int tpGrpSize, bool updateOptions = true )
631631{
632+ std::cout << " Checking GemmOptions..." << std::endl;
632633 options.mWorldSize = tpGrpSize;
633634
634635 bool isBlackwell = tg::isArchBlackwell (cudaArch);
@@ -641,9 +642,11 @@ inline bool checkAndUpdateGemmOptions(
641642 }
642643 else
643644 {
645+ std::cout << " failed at dtypeB" << std::endl;
644646 return false ;
645647 }
646648 }
649+ std::cout << " ckpt 0" << std::endl;
647650
648651 // If not specified, used the input dtypes as MMA dtypes (no cast required).
649652 if (options.mDtypeMmaA == tg::Dtype::Void)
@@ -654,6 +657,7 @@ inline bool checkAndUpdateGemmOptions(
654657 }
655658 else
656659 {
660+ std::cout << " failed at dtypeMmaA" << std::endl;
657661 return false ;
658662 }
659663 }
@@ -665,6 +669,7 @@ inline bool checkAndUpdateGemmOptions(
665669 }
666670 else
667671 {
672+ std::cout << " failed at dtypeMmaB" << std::endl;
668673 return false ;
669674 }
670675 }
@@ -686,8 +691,13 @@ inline bool checkAndUpdateGemmOptions(
686691 // It must not exceed the padded dimensions.
687692 if (options.mValidM > options.mM || options.mValidN > options.mN || options.mValidK > options.mK )
688693 {
694+ std::cout << " test validM/N/K start" << std::endl;
695+ std::cout << " options.mValidM=" << options.mValidM << " , options.mM=" << options.mM << std::endl;
696+ std::cout << " options.mValidN=" << options.mValidN << " , options.mN=" << options.mN << std::endl;
697+ std::cout << " options.mValidK=" << options.mValidK << " , options.mK=" << options.mK << std::endl;
689698 TLLM_LOG_WARNING (options.mValidK <= options.mK ,
690699 " ValidM, ValidN, and ValidK must be less than or equal to M, N, and K respectively." );
700+ std::cout << " test validM/N/K start2" << std::endl;
691701 if (updateOptions)
692702 {
693703 options.mValidM = std::min (options.mValidM , options.mM );
@@ -696,6 +706,7 @@ inline bool checkAndUpdateGemmOptions(
696706 }
697707 else
698708 {
709+ std::cout << " failed at validM/N/K" << std::endl;
699710 return false ;
700711 }
701712 }
@@ -706,10 +717,12 @@ inline bool checkAndUpdateGemmOptions(
706717 bool hasValidParams = (options.mValidM != -1 && options.mValidM != options.mM )
707718 || (options.mValidN != -1 && options.mValidN != options.mN )
708719 || (options.mValidK != -1 && options.mValidK != options.mK );
720+ std::cout << " test BlockMajorK start" << std::endl;
709721 TLLM_CHECK_ERROR (!hasValidParams,
710722 " BlockMajorK layout does not support validM/validN/validK parameters due to swizzled layout. "
711723 " Found validM=" ,
712724 options.mValidM , " validN=" , options.mValidN , " validK=" , options.mValidK );
725+ std::cout << " test BlockMajorK start2" << std::endl;
713726 }
714727
715728#ifdef TLLM_PUBLIC_RELEASE
@@ -718,7 +731,7 @@ inline bool checkAndUpdateGemmOptions(
718731 TLLM_CHECK_ERROR (false , " E2m1 x E4m3 is not supported for JIT compile. Use cubins instead." );
719732 }
720733#endif // TLLM_PUBLIC_RELEASE
721-
734+ std::cout << " ckpt 1 " << std::endl;
722735 // Check that the A cast is supported.
723736 // Currently, we only support {MxFp4, NvFp4} -> Bf16.
724737 TLLM_CHECK_ERROR ((options.mDtypeA == options.mDtypeMmaA )
@@ -762,7 +775,7 @@ inline bool checkAndUpdateGemmOptions(
762775 TLLM_CHECK_ERROR (options.mDtypeMmaB == tg::Dtype::E4m3 || options.mDtypeMmaB == tg::Dtype::E2m1,
763776 " For dtypeMmaA = E4m3/E2m1 A, dtypeMmaB must also be E4m3/E2m1." );
764777 }
765-
778+ std::cout << " ckpt 2 " << std::endl;
766779 // kind::mxf8f6f4
767780 if (options.mDtypeMmaA == tg::Dtype::MxE4m3 || options.mDtypeMmaA == tg::Dtype::MxE2m1)
768781 {
@@ -774,7 +787,7 @@ inline bool checkAndUpdateGemmOptions(
774787 TLLM_CHECK_ERROR (options.mDtypeMmaA == tg::Dtype::MxE4m3 || options.mDtypeMmaA == tg::Dtype::MxE2m1,
775788 " For dtypeMmaB = MxE4m3 or MxE2m1, dtypeMmaA must also be MxE4m3 or MxE2m1." );
776789 }
777-
790+ std::cout << " ckpt 3 " << std::endl;
778791 // kind::f16
779792 if (options.mDtypeMmaA == tg::Dtype::Fp16 || options.mDtypeMmaA == tg::Dtype::Bfloat16)
780793 {
@@ -806,6 +819,7 @@ inline bool checkAndUpdateGemmOptions(
806819 }
807820 else
808821 {
822+ std::cout << " failed at mmaKind" << std::endl;
809823 return false ;
810824 }
811825 }
@@ -822,6 +836,7 @@ inline bool checkAndUpdateGemmOptions(
822836 }
823837 else
824838 {
839+ std::cout << " failed at mmaK" << std::endl;
825840 return false ;
826841 }
827842 }
@@ -852,7 +867,7 @@ inline bool checkAndUpdateGemmOptions(
852867 " Hopper does not use TMEM. The register layout corresponds to 16dp256bit. Got " , options.mEpilogueLdtmDps ,
853868 " dp" , options.mEpilogueLdtmBits , " bit." );
854869 }
855-
870+ std::cout << " ckpt 4 " << std::endl;
856871 // Constraints for NvFp4 and MxFp8.
857872 if ((options.mMmaKind == tg::MmaKind::MxFp4NvFp4 || options.mMmaKind == tg::MmaKind::MxFp8Fp6Fp4
858873 || options.mDtypeC == tg::Dtype::MxE4m3)
@@ -872,6 +887,7 @@ inline bool checkAndUpdateGemmOptions(
872887 }
873888 else
874889 {
890+ std::cout << " failed at mmaM" << std::endl;
875891 return false ;
876892 }
877893 }
@@ -916,6 +932,7 @@ inline bool checkAndUpdateGemmOptions(
916932 }
917933 else
918934 {
935+ std::cout << " failed at mmaK" << std::endl;
919936 return false ;
920937 }
921938 }
@@ -1022,6 +1039,7 @@ inline bool checkAndUpdateGemmOptions(
10221039 }
10231040 else
10241041 {
1042+ std::cout << " failed at dtypeC" << std::endl;
10251043 return false ;
10261044 }
10271045 }
@@ -1037,6 +1055,7 @@ inline bool checkAndUpdateGemmOptions(
10371055 }
10381056 else
10391057 {
1058+ std::cout << " failed at epilogueTileM" << std::endl;
10401059 return false ;
10411060 }
10421061 }
@@ -1051,6 +1070,7 @@ inline bool checkAndUpdateGemmOptions(
10511070 }
10521071 else
10531072 {
1073+ std::cout << " failed at epilogueTileN" << std::endl;
10541074 return false ;
10551075 }
10561076 }
@@ -1066,6 +1086,7 @@ inline bool checkAndUpdateGemmOptions(
10661086 }
10671087 else
10681088 {
1089+ std::cout << " failed at epilogueTileM/N" << std::endl;
10691090 return false ;
10701091 }
10711092 }
@@ -1080,6 +1101,7 @@ inline bool checkAndUpdateGemmOptions(
10801101 }
10811102 else
10821103 {
1104+ std::cout << " failed at epilogueTileM" << std::endl;
10831105 return false ;
10841106 }
10851107 }
@@ -1200,6 +1222,7 @@ inline bool checkAndUpdateGemmOptions(
12001222 }
12011223 else
12021224 {
1225+ std::cout << " failed at epilogueTileM/N" << std::endl;
12031226 return false ;
12041227 }
12051228 }
@@ -1223,6 +1246,7 @@ inline bool checkAndUpdateGemmOptions(
12231246 }
12241247 else
12251248 {
1249+ std::cout << " failed at mmaStages" << std::endl;
12261250 return false ;
12271251 }
12281252 }
@@ -1234,6 +1258,7 @@ inline bool checkAndUpdateGemmOptions(
12341258 }
12351259 else
12361260 {
1261+ std::cout << " failed at mmaStages" << std::endl;
12371262 return false ;
12381263 }
12391264 }
@@ -1245,6 +1270,7 @@ inline bool checkAndUpdateGemmOptions(
12451270 }
12461271 else
12471272 {
1273+ std::cout << " failed at mmaStages" << std::endl;
12481274 return false ;
12491275 }
12501276 }
@@ -1341,6 +1367,7 @@ inline bool checkAndUpdateGemmOptions(
13411367 }
13421368 else
13431369 {
1370+ std::cout << " failed at tileM" << std::endl;
13441371 return false ;
13451372 }
13461373 }
@@ -1355,6 +1382,7 @@ inline bool checkAndUpdateGemmOptions(
13551382 }
13561383 else
13571384 {
1385+ std::cout << " failed at numSlicesForSliceK" << std::endl;
13581386 return false ;
13591387 }
13601388 }
@@ -1399,6 +1427,7 @@ inline bool checkAndUpdateGemmOptions(
13991427 }
14001428 else
14011429 {
1430+ std::cout << " failed at unrollLoop2xForMma" << std::endl;
14021431 return false ;
14031432 }
14041433 }
@@ -1419,6 +1448,7 @@ inline bool checkAndUpdateGemmOptions(
14191448 }
14201449 else
14211450 {
1451+ std::cout << " failed at tileScheduler" << std::endl;
14221452 return false ;
14231453 }
14241454 }
@@ -1434,6 +1464,7 @@ inline bool checkAndUpdateGemmOptions(
14341464 }
14351465 else
14361466 {
1467+ std::cout << " failed at earlyExit" << std::endl;
14371468 return false ;
14381469 }
14391470 }
@@ -1521,6 +1552,7 @@ inline bool checkAndUpdateGemmOptions(
15211552 }
15221553 else
15231554 {
1555+ std::cout << " failed at blockK" << std::endl;
15241556 return false ;
15251557 }
15261558 }
0 commit comments