@@ -46,6 +46,43 @@ static bool isAssumedSize(mlir::ValueRange shape) {
4646 return false ;
4747}
4848
49+ static void createSharedMemoryGlobal (fir::FirOpBuilder &builder,
50+ mlir::Location loc, llvm::StringRef prefix,
51+ llvm::StringRef suffix,
52+ mlir::gpu::GPUModuleOp gpuMod,
53+ mlir::Type sharedMemType, unsigned size,
54+ unsigned align, bool isDynamic) {
55+ std::string sharedMemGlobalName =
56+ isDynamic ? (prefix + llvm::Twine (cudaSharedMemSuffix)).str ()
57+ : (prefix + llvm::Twine (cudaSharedMemSuffix) + suffix).str ();
58+
59+ mlir::OpBuilder::InsertionGuard guard (builder);
60+ builder.setInsertionPointToEnd (gpuMod.getBody ());
61+
62+ mlir::StringAttr linkage = isDynamic ? builder.createExternalLinkage ()
63+ : builder.createInternalLinkage ();
64+ llvm::SmallVector<mlir::NamedAttribute> attrs;
65+ auto globalOpName = mlir::OperationName (fir::GlobalOp::getOperationName (),
66+ gpuMod.getContext ());
67+ attrs.push_back (mlir::NamedAttribute (
68+ fir::GlobalOp::getDataAttrAttrName (globalOpName),
69+ cuf::DataAttributeAttr::get (gpuMod.getContext (),
70+ cuf::DataAttribute::Shared)));
71+
72+ mlir::DenseElementsAttr init = {};
73+ mlir::Type i8Ty = builder.getI8Type ();
74+ if (size > 0 ) {
75+ auto vecTy = mlir::VectorType::get (
76+ static_cast <fir::SequenceType::Extent>(size), i8Ty);
77+ mlir::Attribute zero = mlir::IntegerAttr::get (i8Ty, 0 );
78+ init = mlir::DenseElementsAttr::get (vecTy, llvm::ArrayRef (zero));
79+ }
80+ auto sharedMem =
81+ fir::GlobalOp::create (builder, loc, sharedMemGlobalName, false , false ,
82+ sharedMemType, init, linkage, attrs);
83+ sharedMem.setAlignment (align);
84+ }
85+
4986struct CUFComputeSharedMemoryOffsetsAndSize
5087 : public fir::impl::CUFComputeSharedMemoryOffsetsAndSizeBase<
5188 CUFComputeSharedMemoryOffsetsAndSize> {
@@ -108,18 +145,23 @@ struct CUFComputeSharedMemoryOffsetsAndSize
108145 crtDynOffset, dynSize);
109146 else
110147 crtDynOffset = dynSize;
111-
112- continue ;
148+ } else {
149+ // Static shared memory.
150+ auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash (
151+ loc, sharedOp.getInType (), *dl, kindMap);
152+ createSharedMemoryGlobal (
153+ builder, sharedOp.getLoc (), funcOp.getName (),
154+ *sharedOp.getBindcName (), gpuMod,
155+ fir::SequenceType::get (size, i8Ty), size,
156+ sharedOp.getAlignment () ? *sharedOp.getAlignment () : align,
157+ /* isDynamic=*/ false );
158+ mlir::Value zero = builder.createIntegerConstant (loc, i32Ty, 0 );
159+ sharedOp.getOffsetMutable ().assign (zero);
160+ if (!sharedOp.getAlignment ())
161+ sharedOp.setAlignment (align);
162+ sharedOp.setIsStatic (true );
163+ ++nbStaticSharedVariables;
113164 }
114- auto [size, align] = fir::getTypeSizeAndAlignmentOrCrash (
115- sharedOp.getLoc (), sharedOp.getInType (), *dl, kindMap);
116- ++nbStaticSharedVariables;
117- mlir::Value offset = builder.createIntegerConstant (
118- loc, i32Ty, llvm::alignTo (sharedMemSize, align));
119- sharedOp.getOffsetMutable ().assign (offset);
120- sharedMemSize =
121- llvm::alignTo (sharedMemSize, align) + llvm::alignTo (size, align);
122- alignment = std::max (alignment, align);
123165 }
124166
125167 if (nbDynamicSharedVariables == 0 && nbStaticSharedVariables == 0 )
@@ -130,35 +172,13 @@ struct CUFComputeSharedMemoryOffsetsAndSize
130172 funcOp.getLoc (),
131173 " static and dynamic shared variables in a single kernel" );
132174
133- mlir::DenseElementsAttr init = {};
134- if (sharedMemSize > 0 ) {
135- auto vecTy = mlir::VectorType::get (sharedMemSize, i8Ty);
136- mlir::Attribute zero = mlir::IntegerAttr::get (i8Ty, 0 );
137- init = mlir::DenseElementsAttr::get (vecTy, llvm::ArrayRef (zero));
138- }
175+ if (nbStaticSharedVariables > 0 )
176+ continue ;
139177
140- // Create the shared memory global where each shared variable will point
141- // to.
142178 auto sharedMemType = fir::SequenceType::get (sharedMemSize, i8Ty);
143- std::string sharedMemGlobalName =
144- (funcOp.getName () + llvm::Twine (cudaSharedMemSuffix)).str ();
145- // Dynamic shared memory needs an external linkage while static shared
146- // memory needs an internal linkage.
147- mlir::StringAttr linkage = nbDynamicSharedVariables > 0
148- ? builder.createExternalLinkage ()
149- : builder.createInternalLinkage ();
150- builder.setInsertionPointToEnd (gpuMod.getBody ());
151- llvm::SmallVector<mlir::NamedAttribute> attrs;
152- auto globalOpName = mlir::OperationName (fir::GlobalOp::getOperationName (),
153- gpuMod.getContext ());
154- attrs.push_back (mlir::NamedAttribute (
155- fir::GlobalOp::getDataAttrAttrName (globalOpName),
156- cuf::DataAttributeAttr::get (gpuMod.getContext (),
157- cuf::DataAttribute::Shared)));
158- auto sharedMem = fir::GlobalOp::create (
159- builder, funcOp.getLoc (), sharedMemGlobalName, false , false ,
160- sharedMemType, init, linkage, attrs);
161- sharedMem.setAlignment (alignment);
179+ createSharedMemoryGlobal (builder, funcOp.getLoc (), funcOp.getName (), " " ,
180+ gpuMod, sharedMemType, sharedMemSize, alignment,
181+ /* isDynamic=*/ true );
162182 }
163183 }
164184};
0 commit comments