@@ -72,7 +72,7 @@ void DeviceSplitter::ComputeGD(vector<RegTree> &vTree, vector<vector<KeyValue> >
7272 KernelConf conf;
7373 // start prediction
7474// ################### for my future experiments
75- bool bOptimisePred = true ;
75+ bool bOptimisePred = false ;
7676 if (nNumofTree > 0 && numofUsedFea >0 && bOptimisePred == false )// numofUsedFea > 0 means the tree has more than one node.
7777 {
7878 uint startPos = 0 ;
@@ -85,14 +85,11 @@ bool bOptimisePred = true;
8585 int *pNumofFea = manager.m_pDNumofFea + startInsId;
8686 int numofInsToFill = nNumofIns;
8787
88- dim3 dimGridThreadForEachIns;
89- conf.ComputeBlock (numofInsToFill, dimGridThreadForEachIns);
90- int sharedMemSizeEachIns = 1 ;
9188 // memset dense instances
9289 real *pTempDense = manager.m_pdDenseInsEachBag + bagId * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns ;
9390 checkCudaErrors (cudaMemset (pTempDense, -1 , sizeof (real) * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns ));
9491 GETERROR (" before FillMultiDense" );
95- FillMultiDense<<<dimGridThreadForEachIns, sharedMemSizeEachIns , 0 , (*(cudaStream_t*)pStream)>>> (
92+ FillMultiDense<<<numofInsToFill, 1 , 0 , (*(cudaStream_t*)pStream)>>> (
9693 pDevInsValue, pInsStartPos, pDevFeaId, pNumofFea,
9794 pTempDense,
9895 manager.m_pSortedUsedFeaIdBag + bagId * bagManager.m_maxNumUsedFeaATree ,
@@ -106,10 +103,7 @@ bool bOptimisePred = true;
106103 {
107104 assert (pLastTree != NULL );
108105 if (bOptimisePred == false ){
109- dim3 dimGridThreadForEachIns;
110- conf.ComputeBlock (nNumofIns, dimGridThreadForEachIns);
111- int sharedMemSizeEachIns = 1 ;
112- PredMultiTarget<<<dimGridThreadForEachIns, sharedMemSizeEachIns, 0 , (*(cudaStream_t*)pStream)>>> (
106+ PredMultiTarget<<<nNumofIns, 1 , 0 , (*(cudaStream_t*)pStream)>>> (
113107 bagManager.m_pTargetValueEachBag + bagId * bagManager.m_numIns ,
114108 nNumofIns, pLastTree,
115109 manager.m_pdDenseInsEachBag + bagId * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns ,
0 commit comments