Skip to content

Commit 38f6081

Browse files
committed
fix bugs in computeGD using prediction
1 parent 799cbaf commit 38f6081

File tree

7 files changed

+18
-46
lines changed

7 files changed

+18
-46
lines changed

Device/DevicePredictorHelper.cu

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,7 @@ __global__ void PredMultiTarget(real *pdTargetValue, int numofDenseIns, const Tr
4343

4444
int pid = 0; //node id
4545
const TreeNode *curNode = pAllTreeNode + pid;
46-
if(curNode->nodeId != 0)
47-
{
48-
printf("id of root node is %d should be 0\n", curNode->nodeId);
49-
return;
50-
}
46+
CONCHECKER(curNode->nodeId == 0);
5147
int counter = 0;
5248
while(curNode->featureId != -1)//!curNode->isLeaf()
5349
{
@@ -69,11 +65,7 @@ __global__ void PredMultiTarget(real *pdTargetValue, int numofDenseIns, const Tr
6965
curNode = pAllTreeNode + pid;
7066

7167
counter++;
72-
if(counter > maxDepth)//for skipping from deadlock
73-
{
74-
printf("%s has bugs; fid=%d\n", __PRETTY_FUNCTION__, fid);
75-
break;
76-
}
68+
CONCHECKER(counter <= maxDepth);
7769
}
7870
pdTargetValue[targetId] += pAllTreeNode[pid].predValue;
7971
}

Device/FindSplit/ComputeGD.cu

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ void DeviceSplitter::ComputeGD(vector<RegTree> &vTree, vector<vector<KeyValue> >
7272
KernelConf conf;
7373
//start prediction
7474
//################### for my future experiments
75-
bool bOptimisePred = true;
75+
bool bOptimisePred = false;
7676
if(nNumofTree > 0 && numofUsedFea >0 && bOptimisePred == false)//numofUsedFea > 0 means the tree has more than one node.
7777
{
7878
uint startPos = 0;
@@ -85,14 +85,11 @@ bool bOptimisePred = true;
8585
int *pNumofFea = manager.m_pDNumofFea + startInsId;
8686
int numofInsToFill = nNumofIns;
8787

88-
dim3 dimGridThreadForEachIns;
89-
conf.ComputeBlock(numofInsToFill, dimGridThreadForEachIns);
90-
int sharedMemSizeEachIns = 1;
9188
//memset dense instances
9289
real *pTempDense = manager.m_pdDenseInsEachBag + bagId * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns;
9390
checkCudaErrors(cudaMemset(pTempDense, -1, sizeof(real) * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns));
9491
GETERROR("before FillMultiDense");
95-
FillMultiDense<<<dimGridThreadForEachIns, sharedMemSizeEachIns, 0, (*(cudaStream_t*)pStream)>>>(
92+
FillMultiDense<<<numofInsToFill, 1, 0, (*(cudaStream_t*)pStream)>>>(
9693
pDevInsValue, pInsStartPos, pDevFeaId, pNumofFea,
9794
pTempDense,
9895
manager.m_pSortedUsedFeaIdBag + bagId * bagManager.m_maxNumUsedFeaATree,
@@ -106,10 +103,7 @@ bool bOptimisePred = true;
106103
{
107104
assert(pLastTree != NULL);
108105
if(bOptimisePred == false){
109-
dim3 dimGridThreadForEachIns;
110-
conf.ComputeBlock(nNumofIns, dimGridThreadForEachIns);
111-
int sharedMemSizeEachIns = 1;
112-
PredMultiTarget<<<dimGridThreadForEachIns, sharedMemSizeEachIns, 0, (*(cudaStream_t*)pStream)>>>(
106+
PredMultiTarget<<<nNumofIns, 1, 0, (*(cudaStream_t*)pStream)>>>(
113107
bagManager.m_pTargetValueEachBag + bagId * bagManager.m_numIns,
114108
nNumofIns, pLastTree,
115109
manager.m_pdDenseInsEachBag + bagId * bagManager.m_maxNumUsedFeaATree * bagManager.m_numIns,

Device/FindSplit/FindFeaCsr.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ void DeviceSplitter::FeaFinderAllNode2(void *pStream, int bagId)
3131
BagManager bagManager;
3232
BagCsrManager csrManager(bagManager.m_numFea, bagManager.m_maxNumSplittable, bagManager.m_numFeaValue);
3333
int numofSNode = bagManager.m_curNumofSplitableEachBag_h[bagId];
34-
printf("numof sn=%d\n", numofSNode);
3534

3635
IndexComputer indexComp;
3736
indexComp.AllocMem(bagManager.m_numFea, numofSNode, bagManager.m_maxNumSplittable);
@@ -259,13 +258,6 @@ void DeviceSplitter::FeaFinderAllNode2(void *pStream, int bagId)
259258
cudaStreamSynchronize((*(cudaStream_t*)pStream));
260259
GETERROR("after ComputeGainDense");
261260

262-
//change the gain of the first feature value to 0
263-
// int blockSizeFirstGain;
264-
// dim3 dimNumofBlockFirstGain;
265-
// conf.ConfKernel(numSeg, blockSizeFirstGain, dimNumofBlockFirstGain);
266-
// FirstFeaGain<<<dimNumofBlockFirstGain, blockSizeFirstGain, 0, (*(cudaStream_t*)pStream)>>>(
267-
// csrManager.pEachCsrFeaStartPos, numSeg, pGain_d, csrManager.curNumCsr);
268-
269261
// cout << "searching" << endl;
270262
cudaDeviceSynchronize();
271263
clock_t start_search = clock();
@@ -294,7 +286,6 @@ void DeviceSplitter::FeaFinderAllNode2(void *pStream, int bagId)
294286
bagManager.m_pRChildStatEachBag + bagId * bagManager.m_maxNumSplittable,
295287
bagManager.m_pLChildStatEachBag + bagId * bagManager.m_maxNumSplittable);
296288
cudaStreamSynchronize((*(cudaStream_t*)pStream));
297-
// printf("completed splitting node.....................................\n");
298289
checkCudaErrors(cudaFree(pMaxGain_d));
299290
checkCudaErrors(cudaFree(pMaxGainKey_d));
300291
}

Device/FindSplit/FindFeaKernel.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,6 @@ __global__ void ComputeGainDense(const nodeStat *pSNodeStat, const int *pid2SNPo
8282
}
8383
else
8484
pGainOnEachFeaValue[gTid] = all2Left;
85-
// if(7773118 == gTid)
86-
// printf("gain=%f, totalHess=%f, totalMissHess=%f, last+1=%f, last-1=%f, last=%f, last-2=%f, last=%u\n",
87-
// pGainOnEachFeaValue[gTid], totalHess, totalMissingHess, pHessPrefixSumOnEachFeaValue[lastFvaluePos+1],
88-
// pHessPrefixSumOnEachFeaValue[lastFvaluePos],
89-
// pHessPrefixSumOnEachFeaValue[lastFvaluePos-1], pHessPrefixSumOnEachFeaValue[lastFvaluePos-2], lastFvaluePos);
9085
}
9186
else{
9287
//if the previous fea value is the same as the current fea value, gain is 0 for the current fea value.
@@ -172,10 +167,6 @@ __global__ void FindSplitInfo(const uint *pEachFeaStartPosEachNode, const T *pEa
172167
uint lastFvaluePos = segStartPos + segLen - 1;
173168
if(key == 0 || (key > 0 && pnKey[key] != pnKey[key - 1])){//first element of the feature
174169
const real gap = fabs(pDenseFeaValue[key]) + DeviceSplitter::rt_eps;
175-
//printf("############## %u v.s. %u; %u\n", bestFeaId, bestFeaId, pPrefixSumHess[key]);
176-
// printf("missing %f all to one node: fid=%u, pnKey[%u]=%u != pnKey[%u]=%u, segLen=%u, parentHess=%f, startPos=%u..........................\n",
177-
// pPrefixSumHess[key], bestFeaId, key, pnKey[key], key-1, pnKey[key-1], pEachFeaLenEachNode[segId],
178-
// snNodeStat[snPos].sum_hess, pEachFeaStartPosEachNode[segId]);
179170
if(pDefault2Right[key] == true){//all non-missing to left
180171
pBestSplitPoint[snPos].m_bDefault2Right = true;
181172
pBestSplitPoint[snPos].m_fSplitValue = pDenseFeaValue[lastFvaluePos] + gap;

SharedUtility/initCuda.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ int GetMaxMemDevice(int count){
6363

6464
bool InitCUDA(CUcontext &context, char gpuType = 'T')
6565
{
66-
// UseDevice(0, context);
67-
// return true;
66+
UseDevice(1, context);
67+
return true;
6868

6969
int count;
7070

auto-test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
fname="stderr.test"
33
dataset=["abalone", "cadata", "covetype", "e2006", "higgs", "car ins",
44
"log1p", "new20", "realsim", "susy", "year pre", "3d"]
5-
result=["2.26841", "72842", "0.365866", "0.3716", "0.446322", "38.9215",
5+
result=["2.26841", "72842", "0.348459", "0.371375", "0.446233", "38.9214",
66
"0.37719", "0.784022", "0.705245", "0.376718", "9.34178", "3.42847e+07"]
77
new_result=[]
88

gbdt_main.cu

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,18 @@ int main(int argc, char *argv[])
101101
printf("max numof used fea is %d\n", maxNumofUsedFeature);
102102

103103
//decide if want to use csr
104-
if(numFeaValue > pow(2, 28) || (numFeaValue > pow(2, 20) && numFea < numIns / 1000) || numFea / 10 < numIns){
104+
if(numFeaValue > pow(2, 28) || (numFeaValue > pow(2, 20) && numFea < numIns / 1000) || numFea / 10 < numIns)
105105
CsrCompressor::bUseCsr = true;
106-
cerr << "use CSR" << endl;
107-
}
108-
else{
106+
else
109107
CsrCompressor::bUseCsr = false;
110-
cerr << "use non-CSR" << endl;
111-
}
108+
109+
//CsrCompressor::bUseCsr = false;
110+
111+
if(CsrCompressor::bUseCsr == true)
112+
cerr << "use CSR" << endl;
113+
else
114+
cerr << "use non-CSR" << endl;
115+
112116

113117
//fill the bags
114118
BagManager bagManager;

0 commit comments

Comments
 (0)