@@ -18,6 +18,8 @@ SPDX-License-Identifier: MIT
1818#include < llvmWrapper/Support/Alignment.h>
1919#include < llvmWrapper/IR/DerivedTypes.h>
2020
21+ #include < cstddef>
22+
2123using namespace llvm ;
2224using namespace IGC ;
2325
@@ -213,6 +215,22 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
213215
214216*************************************************************************************************/
215217
218+ #pragma pack(push, 1)
219+ namespace packed
220+ {
221+ #include " implicit_args_struct.h"
222+ }
223+ #pragma pack(pop)
224+
225+ #undef IMPLICIT_ARGS_STRUCT_H_
226+ #include " implicit_args_struct.h"
227+
228+ // According to the ABI specification, implicit_args struct must be naturally aligned.
229+ // To ensure that offsets to struct members are compiler-independent, it is necessary to
230+ // align struct member offset to be divisible by the size of the member. It implicates
231+ // inserting an additional padding member in some cases.
232+ static_assert (sizeof (packed::implicit_args) == sizeof (implicit_args), " Implicit args struct is not properly aligned!" );
233+
216234// Structure of side buffer generated by NEO:
217235// struct implicit_args {
218236// uint8_t struct_size;
@@ -233,6 +251,8 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
233251// uint32_t group_count_x;
234252// uint32_t group_count_y;
235253// uint32_t group_count_z;
254+ // uint32_t padding0;
255+ // uint64_t rt_global_buffer_ptr;
236256// };
237257
238258// For SIMD8:
@@ -259,46 +279,6 @@ void WIFuncResolution::visitCallInst(CallInst& CI)
259279// uint16_t lz[32];
260280// };
261281
262-
263- class GLOBAL_STATE_FIELD_OFFSETS
264- {
265- public:
266- // This class holds offsets of various fields in side buffer
267- static const uint32_t STRUCT_SIZE = 0 ;
268-
269- static const uint32_t VERSION = STRUCT_SIZE + sizeof (uint8_t );
270-
271- static const uint32_t NUM_WORK_DIM = VERSION + sizeof (uint8_t );
272-
273- static const uint32_t SIMDSIZE = NUM_WORK_DIM + sizeof (uint8_t );
274-
275- static const uint32_t LOCAL_SIZES = SIMDSIZE + sizeof (uint8_t );
276- static const uint32_t LOCAL_SIZE_X = LOCAL_SIZES;
277- static const uint32_t LOCAL_SIZE_Y = LOCAL_SIZE_X + sizeof (uint32_t );
278- static const uint32_t LOCAL_SIZE_Z = LOCAL_SIZE_Y + sizeof (uint32_t );
279-
280- static const uint32_t GLOBAL_SIZES = LOCAL_SIZE_Z + sizeof (uint32_t );
281- static const uint32_t GLOBAL_SIZE_X = GLOBAL_SIZES;
282- static const uint32_t GLOBAL_SIZE_Y = GLOBAL_SIZE_X + sizeof (uint64_t );
283- static const uint32_t GLOBAL_SIZE_Z = GLOBAL_SIZE_Y + sizeof (uint64_t );
284-
285- static const uint32_t PRINTF_BUFFER = GLOBAL_SIZE_Z + sizeof (uint64_t );
286-
287- static const uint32_t GLOBAL_OFFSETS = PRINTF_BUFFER + sizeof (uint64_t );
288- static const uint32_t GLOBAL_OFFSET_X = GLOBAL_OFFSETS;
289- static const uint32_t GLOBAL_OFFSET_Y = GLOBAL_OFFSET_X + sizeof (uint64_t );
290- static const uint32_t GLOBAL_OFFSET_Z = GLOBAL_OFFSET_Y + sizeof (uint64_t );
291-
292- static const uint32_t LOCAL_IDS = GLOBAL_OFFSET_Z + sizeof (uint64_t );
293-
294- static const uint32_t GROUP_COUNTS = LOCAL_IDS + sizeof (uint64_t );
295- static const uint32_t GROUP_COUNT_X = GROUP_COUNTS;
296- static const uint32_t GROUP_COUNT_Y = GROUP_COUNT_X + sizeof (uint32_t );
297- static const uint32_t GROUP_COUNT_Z = GROUP_COUNT_Y + sizeof (uint32_t );
298-
299- static const uint32_t TOTAL_SIZE = GROUP_COUNT_Z + sizeof (uint32_t );
300- };
301-
302282llvm::Value* LowerImplicitArgIntrinsics::BuildLoadInst (llvm::CallInst& CI, unsigned int Offset, llvm::Type* DataType)
303283{
304284 // This function computes type aligned address that includes Offset.
@@ -357,9 +337,9 @@ llvm::Value* LowerImplicitArgIntrinsics::BuildLoadInst(llvm::CallInst& CI, unsig
357337
358338 if (Offset != AlignedOffset)
359339 {
360- auto ByteType = Type::getInt8Ty (Builder.getContext () );
361- auto BitCastToByte = Builder.CreateBitCast (LoadedData, ByteType );
362- Value* NewVector = UndefValue::get (IGCLLVM::FixedVectorType::get (ByteType , Size));
340+ auto ByteVectorType = IGCLLVM::FixedVectorType::get (Builder.getInt8Ty (), LoadByteSize );
341+ auto BitCastToByte = Builder.CreateBitCast (LoadedData, ByteVectorType );
342+ Value* NewVector = UndefValue::get (IGCLLVM::FixedVectorType::get (Builder. getInt8Ty () , Size));
363343 for (unsigned int I = Offset; I != (Offset + Size); ++I)
364344 {
365345 auto Elem = Builder.CreateExtractElement (BitCastToByte, I - AlignedOffset);
@@ -790,7 +770,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
790770
791771 // Get Local ID Base Ptr
792772 auto DataTypeI64 = Type::getInt64Ty (F->getParent ()->getContext ());
793- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::LOCAL_IDS ;
773+ unsigned int Offset = offsetof (implicit_args, local_id_table_ptr) ;
794774 auto LocalIDBase = BuildLoadInst (CI, Offset, DataTypeI64);
795775
796776 // Get local thread id
@@ -842,7 +822,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
842822 // Assume local size and enqueued local size are the same
843823 auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
844824 auto VecTyD = IGCLLVM::FixedVectorType::get (ElemTypeD, 3 );
845- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::LOCAL_SIZE_X ;
825+ unsigned int Offset = offsetof (implicit_args, local_size_x) ;
846826 auto LoadInst = BuildLoadInst (CI, Offset, VecTyD);
847827 V = LoadInst;
848828 break ;
@@ -851,7 +831,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
851831 {
852832 // global_offset is loaded from PayloadHeader[0:2]
853833 // currently there are no other uses for payload header.
854- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GLOBAL_OFFSET_X ;
834+ unsigned int Offset = offsetof (implicit_args, global_offset_x) ;
855835 auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
856836 auto VecTyQ = IGCLLVM::FixedVectorType::get (Type::getInt64Ty (F->getParent ()->getContext ()), 3 );
857837 auto LoadInst = BuildLoadInst (CI, Offset, VecTyQ);
@@ -868,7 +848,7 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
868848 case GenISAIntrinsic::GenISA_getGlobalSize:
869849 case GenISAIntrinsic::GenISA_getStageInGridSize:
870850 {
871- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GLOBAL_SIZE_X ;
851+ unsigned int Offset = offsetof (implicit_args, global_size_x) ;
872852 auto VecTyQ = IGCLLVM::FixedVectorType::get (Type::getInt64Ty (F->getParent ()->getContext ()), 3 );
873853 auto ElemTypeD = Type::getInt32Ty (F->getParent ()->getContext ());
874854 auto LoadInst = BuildLoadInst (CI, Offset, VecTyQ);
@@ -886,15 +866,15 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
886866 {
887867 auto ElemTypeUD = Type::getInt32Ty (F->getParent ()->getContext ());
888868 auto VecTyUD = IGCLLVM::FixedVectorType::get (ElemTypeUD, 3 );
889- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::GROUP_COUNT_X ;
869+ unsigned int Offset = offsetof (implicit_args, group_count_x) ;
890870 auto LoadInst = BuildLoadInst (CI, Offset, VecTyUD);
891871 V = LoadInst;
892872 break ;
893873 }
894874 case GenISAIntrinsic::GenISA_getWorkDim:
895875 {
896876 unsigned int Size = 4 ;
897- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::NUM_WORK_DIM / Size;
877+ unsigned int Offset = offsetof (implicit_args, num_work_dim) / Size;
898878 auto TypeUD = Type::getInt32Ty (F->getParent ()->getContext ());
899879 auto LoadInst = BuildLoadInst (CI, Offset, TypeUD);
900880 auto LShr = Builder.CreateLShr (LoadInst, (uint64_t )16 );
@@ -906,12 +886,20 @@ void LowerImplicitArgIntrinsics::visitCallInst(CallInst& CI)
906886 {
907887 // This function is invoked when expanding printf call to retrieve printf buffer ptr.
908888 auto DataTypeI64 = Type::getInt64Ty (CI.getFunction ()->getParent ()->getContext ());
909- unsigned int Offset = GLOBAL_STATE_FIELD_OFFSETS::PRINTF_BUFFER ;
889+ unsigned int Offset = offsetof (implicit_args, printf_buffer_ptr) ;
910890 auto Result = BuildLoadInst (CI, Offset, DataTypeI64);
911891 Result = Builder.CreateIntToPtr (Result, CI.getType ());
912892 V = Result;
913893 break ;
914894 }
895+ case GenISAIntrinsic::GenISA_getRtGlobalBufferPtr:
896+ {
897+ unsigned int Offset = offsetof (implicit_args, rt_global_buffer_ptr);
898+ auto Result = BuildLoadInst (CI, Offset, Builder.getInt64Ty ());
899+ Result = Builder.CreateIntToPtr (Result, CI.getType ());
900+ V = Result;
901+ break ;
902+ }
915903 default :
916904 break ;
917905 }
0 commit comments