Skip to content

Commit 586b428

Browse files
Merge pull request madgraph5#1049 from valassi/hack_ihel3_sep25_pr
Kernel splitting ihel1/2/3: helicity streams, color sum kernel, color sum BLAS
2 parents 5769f88 + 6ecfe01 commit 586b428

File tree

1,741 files changed

+107434
-62019
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,741 files changed

+107434
-62019
lines changed

.github/workflows/archiver.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright (C) 2020-2025 CERN and UCLouvain.
22
# Licensed under the GNU Lesser General Public License (version 3 or later).
33
# Created by: A. Valassi (Sep 2024) for the MG5aMC CUDACPP plugin.
4-
# Further modified by: D. Massaro, A. Valassi (2024) for the MG5aMC CUDACPP plugin.
4+
# Further modified by: D. Massaro, A. Valassi (2024-2025) for the MG5aMC CUDACPP plugin.
55

66
#----------------------------------------------------------------------------------------------------------------------------------
77

.github/workflows/c-cpp.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Copyright (C) 2020-2025 CERN and UCLouvain.
2+
# Licensed under the GNU Lesser General Public License (version 3 or later).
3+
# Created by: S. Hageboeck (Nov 2020) for the MG5aMC CUDACPP plugin.
4+
# Further modified by: S. Hageboeck, D. Massaro, S. Roiser, A. Valassi, Z. Wettersten (2024-2025) for the MG5aMC CUDACPP plugin.
5+
16
name: C/C++ CI
27

38
on:
Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
1-
// Copyright (C) 2020-2024 CERN and UCLouvain.
1+
// Copyright (C) 2020-2025 CERN and UCLouvain.
22
// Licensed under the GNU Lesser General Public License (version 3 or later).
33
// Created by: J. Teig (Jul 2023) for the MG5aMC CUDACPP plugin.
4-
// Further modified by: J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
4+
// Further modified by: J. Teig, A. Valassi (2020-2025) for the MG5aMC CUDACPP plugin.
55

66
#ifndef MG5AMC_GPUABSTRACTION_H
77
#define MG5AMC_GPUABSTRACTION_H 1
88

9+
#include "mgOnGpuConfig.h"
10+
911
#include <cassert>
1012

1113
//--------------------------------------------------------------------------
1214

1315
#ifdef __CUDACC__ // this must be __CUDACC__ (not MGONGPUCPP_GPUIMPL)
1416

17+
#ifndef MGONGPU_HAS_NO_BLAS
18+
#include "cublas_v2.h"
19+
#endif
20+
1521
#define gpuError_t cudaError_t
1622
#define gpuPeekAtLastError cudaPeekAtLastError
1723
#define gpuGetErrorString cudaGetErrorString
@@ -21,24 +27,61 @@
2127
#define gpuMalloc( ptr, size ) checkGpu( cudaMalloc( ptr, size ) )
2228

2329
#define gpuMemcpy( dstData, srcData, srcBytes, func ) checkGpu( cudaMemcpy( dstData, srcData, srcBytes, func ) )
30+
#define gpuMemset( data, value, bytes ) checkGpu( cudaMemset( data, value, bytes ) )
2431
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
2532
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
33+
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
2634
#define gpuMemcpyToSymbol( type1, type2, size ) checkGpu( cudaMemcpyToSymbol( type1, type2, size ) )
2735

2836
#define gpuFree( ptr ) checkGpu( cudaFree( ptr ) )
2937
#define gpuFreeHost( ptr ) checkGpu( cudaFreeHost( ptr ) )
3038

39+
#define gpuGetSymbolAddress( devPtr, symbol ) checkGpu( cudaGetSymbolAddress( devPtr, symbol ) )
40+
3141
#define gpuSetDevice cudaSetDevice
3242
#define gpuDeviceSynchronize cudaDeviceSynchronize
3343
#define gpuDeviceReset cudaDeviceReset
3444

3545
#define gpuLaunchKernel( kernel, blocks, threads, ... ) kernel<<<blocks, threads>>>( __VA_ARGS__ )
36-
#define gpuLaunchKernelSharedMem( kernel, blocks, threads, sharedMem, ... ) kernel<<<blocks, threads, sharedMem>>>( __VA_ARGS__ )
46+
//#define gpuLaunchKernelSharedMem( kernel, blocks, threads, sharedMem, ... ) kernel<<<blocks, threads, sharedMem>>>( __VA_>
47+
#define gpuLaunchKernelStream( kernel, blocks, threads, stream, ... ) kernel<<<blocks, threads, 0, stream>>>( __VA_ARGS__ )
48+
49+
#define gpuStream_t cudaStream_t
50+
#define gpuStreamCreate( pStream ) checkGpu( cudaStreamCreate( pStream ) )
51+
#define gpuStreamDestroy( stream ) checkGpu( cudaStreamDestroy( stream ) )
52+
53+
#define gpuBlasStatus_t cublasStatus_t
54+
#define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
55+
#ifndef MGONGPU_HAS_NO_BLAS
56+
#define gpuBlasHandle_t cublasHandle_t
57+
#else
58+
#define gpuBlasHandle_t void // hack to keep the same API also in noBLAS builds
59+
#endif
60+
#define gpuBlasCreate cublasCreate
61+
#define gpuBlasDestroy cublasDestroy
62+
#define gpuBlasSetStream cublasSetStream
63+
64+
#define gpuBlasSaxpy cublasSaxpy
65+
#define gpuBlasSdot cublasSdot
66+
#define gpuBlasSgemv cublasSgemv
67+
#define gpuBlasSgemm cublasSgemm
68+
#define gpuBlasSgemmStridedBatched cublasSgemmStridedBatched
69+
#define gpuBlasDaxpy cublasDaxpy
70+
#define gpuBlasDdot cublasDdot
71+
#define gpuBlasDgemv cublasDgemv
72+
#define gpuBlasDgemm cublasDgemm
73+
#define gpuBlasDgemmStridedBatched cublasDgemmStridedBatched
74+
#define GPUBLAS_OP_N CUBLAS_OP_N
75+
#define GPUBLAS_OP_T CUBLAS_OP_T
3776

3877
//--------------------------------------------------------------------------
3978

4079
#elif defined __HIPCC__
4180

81+
#ifndef MGONGPU_HAS_NO_BLAS
82+
#include "hipblas/hipblas.h"
83+
#endif
84+
4285
#define gpuError_t hipError_t
4386
#define gpuPeekAtLastError hipPeekAtLastError
4487
#define gpuGetErrorString hipGetErrorString
@@ -48,22 +91,69 @@
4891
#define gpuMalloc( ptr, size ) checkGpu( hipMalloc( ptr, size ) )
4992

5093
#define gpuMemcpy( dstData, srcData, srcBytes, func ) checkGpu( hipMemcpy( dstData, srcData, srcBytes, func ) )
94+
#define gpuMemset( data, value, bytes ) checkGpu( hipMemset( data, value, bytes ) )
5195
#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
5296
#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
97+
#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
5398
#define gpuMemcpyToSymbol( type1, type2, size ) checkGpu( hipMemcpyToSymbol( type1, type2, size ) )
5499

55100
#define gpuFree( ptr ) checkGpu( hipFree( ptr ) )
56101
#define gpuFreeHost( ptr ) checkGpu( hipHostFree( ptr ) )
57102

103+
#define gpuGetSymbolAddress( devPtr, symbol ) checkGpu( hipGetSymbolAddress( devPtr, symbol ) )
104+
58105
#define gpuSetDevice hipSetDevice
59106
#define gpuDeviceSynchronize hipDeviceSynchronize
60107
#define gpuDeviceReset hipDeviceReset
61108

62109
#define gpuLaunchKernel( kernel, blocks, threads, ... ) kernel<<<blocks, threads>>>( __VA_ARGS__ )
63-
#define gpuLaunchKernelSharedMem( kernel, blocks, threads, sharedMem, ... ) kernel<<<blocks, threads, sharedMem>>>( __VA_ARGS__ )
110+
//#define gpuLaunchKernelSharedMem( kernel, blocks, threads, sharedMem, ... ) kernel<<<blocks, threads, sharedMem>>>( __VA_>
111+
#define gpuLaunchKernelStream( kernel, blocks, threads, stream, ... ) kernel<<<blocks, threads, 0, stream>>>( __VA_ARGS__ )
112+
113+
#define gpuStream_t hipStream_t
114+
#define gpuStreamCreate( pStream ) checkGpu( hipStreamCreate( pStream ) )
115+
#define gpuStreamDestroy( stream ) checkGpu( hipStreamDestroy( stream ) )
116+
117+
#define gpuBlasStatus_t hipblasStatus_t
118+
#define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS
119+
#ifndef MGONGPU_HAS_NO_BLAS
120+
#define gpuBlasHandle_t hipblasHandle_t
121+
#else
122+
#define gpuBlasHandle_t void // hack to keep the same API also in noBLAS builds
123+
#endif
124+
#define gpuBlasCreate hipblasCreate
125+
#define gpuBlasDestroy hipblasDestroy
126+
#define gpuBlasSetStream hipblasSetStream
127+
128+
#define gpuBlasSaxpy hipblasSaxpy
129+
#define gpuBlasSdot hipblasSdot
130+
#define gpuBlasSgemv hipblasSgemv
131+
#define gpuBlasSgemm hipblasSgemm
132+
#define gpuBlasSgemmStridedBatched hipblasSgemmStridedBatched
133+
#define gpuBlasDaxpy hipblasDaxpy
134+
#define gpuBlasDdot hipblasDdot
135+
#define gpuBlasDgemv hipblasDgemv
136+
#define gpuBlasDgemm hipblasDgemm
137+
#define gpuBlasDgemmStridedBatched hipblasDgemmStridedBatched
138+
#define GPUBLAS_OP_N HIPBLAS_OP_N
139+
#define GPUBLAS_OP_T HIPBLAS_OP_T
140+
141+
#endif
64142

65143
//--------------------------------------------------------------------------
66144

145+
#ifdef MGONGPU_FPTYPE2_FLOAT
146+
#define gpuBlasTaxpy gpuBlasSaxpy
147+
#define gpuBlasTdot gpuBlasSdot
148+
#define gpuBlasTgemv gpuBlasSgemv
149+
#define gpuBlasTgemm gpuBlasSgemm
150+
#define gpuBlasTgemmStridedBatched gpuBlasSgemmStridedBatched
151+
#else
152+
#define gpuBlasTaxpy gpuBlasDaxpy
153+
#define gpuBlasTdot gpuBlasDdot
154+
#define gpuBlasTgemv gpuBlasDgemv
155+
#define gpuBlasTgemm gpuBlasDgemm
156+
#define gpuBlasTgemmStridedBatched gpuBlasDgemmStridedBatched
67157
#endif
68158

69159
#endif // MG5AMC_GPUABSTRACTION_H

epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2020-2024 CERN and UCLouvain.
1+
// Copyright (C) 2020-2025 CERN and UCLouvain.
22
// Licensed under the GNU Lesser General Public License (version 3 or later).
33
// Created by: J. Teig (Jun 2023, based on earlier work by S. Roiser) for the MG5aMC CUDACPP plugin.
44
// Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.
@@ -30,6 +30,22 @@ inline void assertGpu( gpuError_t code, const char* file, int line, bool abort =
3030

3131
//--------------------------------------------------------------------------
3232

33+
#ifdef MGONGPUCPP_GPUIMPL /* clang-format off */
34+
#ifndef MGONGPU_HAS_NO_BLAS
35+
#define checkGpuBlas( code ){ assertGpuBlas( code, __FILE__, __LINE__ ); }
36+
inline void assertGpuBlas( gpuBlasStatus_t code, const char *file, int line, bool abort = true )
37+
{
38+
if ( code != GPUBLAS_STATUS_SUCCESS )
39+
{
40+
printf( "ERROR! assertGpuBlas: '%d' in %s:%d\n", code, file, line );
41+
if( abort ) assert( code == GPUBLAS_STATUS_SUCCESS );
42+
}
43+
}
44+
#endif
45+
#endif /* clang-format on */
46+
47+
//--------------------------------------------------------------------------
48+
3349
#ifdef MGONGPUCPP_GPUIMPL
3450
namespace mg5amcGpu
3551
{

0 commit comments

Comments
 (0)