@@ -395,19 +395,16 @@ void Ax_single_gpu(CUPDLPwork *w, cusparseDnVecDescr_t vecX,
395395
396396 switch (w -> problem -> data -> matrix_format ) {
397397 case CSR_CSC :
398- // cuda_csc_Ax(w->cusparsehandle, w->problem->data->csc_matrix->cuda_csc,
399- // vecX, vecAx, w->dBuffer, alpha, beta);
400-
401- cuda_csr_Ax (w -> cusparsehandle , w -> problem -> data -> csr_matrix -> cuda_csr ,
402- vecX , vecAx , w -> dBuffer , alpha , beta );
398+ cuda_csr_Ax (w -> cusparsehandle , w -> problem -> data -> csr_matrix -> cuda_csr ,
399+ vecX , vecAx , w -> dBuffer_csr_Ax , alpha , beta );
403400 break ;
404401 case CSC :
405- cuda_csc_Ax ( w -> cusparsehandle , w -> problem -> data -> csc_matrix -> cuda_csc ,
406- vecX , vecAx , w -> dBuffer , alpha , beta );
402+ cupdlp_printf ( "Error: Ax_single_gpu requires CSR matrix\n" );
403+ exit ( 1 );
407404 break ;
408405 case CSR :
409406 cuda_csr_Ax (w -> cusparsehandle , w -> problem -> data -> csr_matrix -> cuda_csr ,
410- vecX , vecAx , w -> dBuffer , alpha , beta );
407+ vecX , vecAx , w -> dBuffer_csr_Ax , alpha , beta );
411408 break ;
412409 default :
413410 cupdlp_printf ("Error: Unknown matrix format in Ax_single_gpu\n" );
@@ -430,18 +427,16 @@ void ATy_single_gpu(CUPDLPwork *w, cusparseDnVecDescr_t vecY,
430427
431428 switch (w -> problem -> data -> matrix_format ) {
432429 case CSR_CSC :
433- // cuda_csr_ATy(w->cusparsehandle, w->problem->data->csr_matrix->cuda_csr,
434- // vecY, vecATy, w->dBuffer, alpha, beta);
435- cuda_csc_ATy (w -> cusparsehandle , w -> problem -> data -> csc_matrix -> cuda_csc ,
436- vecY , vecATy , w -> dBuffer , alpha , beta );
430+ cuda_csc_ATy (w -> cusparsehandle , w -> problem -> data -> csc_matrix -> cuda_csc ,
431+ vecY , vecATy , w -> dBuffer_csc_ATy , alpha , beta );
437432 break ;
438433 case CSC :
439434 cuda_csc_ATy (w -> cusparsehandle , w -> problem -> data -> csc_matrix -> cuda_csc ,
440- vecY , vecATy , w -> dBuffer , alpha , beta );
435+ vecY , vecATy , w -> dBuffer_csc_ATy , alpha , beta );
441436 break ;
442437 case CSR :
443- cuda_csr_ATy ( w -> cusparsehandle , w -> problem -> data -> csr_matrix -> cuda_csr ,
444- vecY , vecATy , w -> dBuffer , alpha , beta );
438+ cupdlp_printf ( "Error: ATy_single_gpu requires CSC matrix\n" );
439+ exit ( 1 );
445440 break ;
446441 default :
447442 printf ("Error: Unknown matrix format in Ax_single_gpu\n" );
@@ -477,7 +472,7 @@ void Ax(CUPDLPwork *w, CUPDLPvec *ax, const CUPDLPvec *x) {
477472 break ;
478473 case MULTI_GPU :
479474#ifndef CUPDLP_CPU
480- Ax_multi_gpu (d , ax , x );
475+ Ax_multi_gpu (d , ax -> data , x -> data );
481476#else
482477 printf ("GPU not supported in CPU build\n" );
483478 exit (1 );
@@ -757,6 +752,7 @@ void cupdlp_initvec(cupdlp_float *x, const cupdlp_float val,
757752#endif
758753}
759754
755+ /*
760756void cupdlp_sub(cupdlp_float *xout, const cupdlp_float *x1,
761757 const cupdlp_float *x2, const cupdlp_int len) {
762758#ifndef CUPDLP_CPU
@@ -767,6 +763,7 @@ void cupdlp_sub(cupdlp_float *xout, const cupdlp_float *x1,
767763 cupdlp_axpy(NULL, len, &alpha, x2, xout);
768764#endif
769765}
766+ */
770767
771768void cupdlp_compute_interaction_and_movement (CUPDLPwork * w ,
772769 cupdlp_float * dMovement ,
@@ -778,15 +775,25 @@ void cupdlp_compute_interaction_and_movement(CUPDLPwork *w,
778775 cupdlp_float dX = 0.0 ;
779776 cupdlp_float dY = 0.0 ;
780777
781- cupdlp_sub (w -> buffer2 , iterates -> x -> data , iterates -> xUpdate -> data , nCols );
782- cupdlp_twoNorm (w , nCols , w -> buffer2 , & dX );
783- cupdlp_sub (w -> buffer3 , iterates -> y -> data , iterates -> yUpdate -> data , nRows );
784- cupdlp_twoNorm (w , nRows , w -> buffer3 , & dY );
785-
786- * dMovement = pow (dX , 2.0 ) * 0.5 * beta + pow (dY , 2.0 ) / (2.0 * beta );
787-
788- cupdlp_sub (w -> buffer3 , iterates -> aty -> data , iterates -> atyUpdate -> data , nCols );
789- cupdlp_dot (w , nCols , w -> buffer2 , w -> buffer3 , dInteraction );
778+ cupdlp_int iter = w -> timers -> nIter ;
779+ CUPDLPvec * x = iterates -> x [iter % 2 ];
780+ CUPDLPvec * y = iterates -> y [iter % 2 ];
781+ CUPDLPvec * aty = iterates -> aty [iter % 2 ];
782+ CUPDLPvec * xUpdate = iterates -> x [(iter + 1 ) % 2 ];
783+ CUPDLPvec * yUpdate = iterates -> y [(iter + 1 ) % 2 ];
784+ CUPDLPvec * atyUpdate = iterates -> aty [(iter + 1 ) % 2 ];
785+
786+ #if !(CUPDLP_CPU ) && USE_KERNELS
787+ cupdlp_movement_interaction_cuda (& dX , & dY , dInteraction , w -> buffer2 ,
788+ xUpdate -> data , x -> data , yUpdate -> data , y -> data , atyUpdate -> data , aty -> data , nRows , nCols );
789+ #else
790+ cupdlp_diffTwoNormSquared (w , x -> data , xUpdate -> data , nCols , & dX );
791+ cupdlp_diffTwoNormSquared (w , y -> data , yUpdate -> data , nRows , & dY );
792+ // Δx' (AΔy)
793+ cupdlp_diffDotDiff (w , x -> data , xUpdate -> data , aty -> data , atyUpdate -> data ,
794+ nCols , dInteraction );
795+ #endif
796+ * dMovement = dX * 0.5 * beta + dY / (2.0 * beta );
790797}
791798
792799// WIP iinfnormabslocaltermination
0 commit comments