@@ -141,6 +141,9 @@ def llvm_shared_cluster_ptr_ty : LLVMQualPointerType<7>; // (shared_cluster)ptr
141141
142142defvar WARP_SIZE = 32;
143143
144+ // Note: the maximum grid size in the x-dimension is the lower value of 65535
145+ // on sm_20. We conservatively use the larger value here as it required for
146+ // sm_30+ and also correct for sm_20.
144147defvar MAX_GRID_SIZE_X = 0x7fffffff;
145148defvar MAX_GRID_SIZE_Y = 0xffff;
146149defvar MAX_GRID_SIZE_Z = 0xffff;
@@ -4768,6 +4771,7 @@ class PTXReadSRegIntrinsic_r32<string name,
47684771
47694772multiclass PTXReadSRegIntrinsic_v4i32<string regname,
47704773 list<list<IntrinsicProperty>> properties = [[], [], [], []]> {
4774+ assert !eq(!size(properties), 4), "properties must be a list of 4 lists";
47714775// FIXME: Do we need the 128-bit integer type version?
47724776// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
47734777
@@ -4781,6 +4785,7 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname,
47814785// Same, but without automatic clang builtins. It will be used for
47824786// registers that require particular GPU or PTX version.
47834787multiclass PTXReadSRegIntrinsicNB_v4i32<list<list<IntrinsicProperty>> properties = [[], [], [], []]> {
4788+ assert !eq(!size(properties), 4), "properties must be a list of 4 lists";
47844789 defvar suffixes = ["_x", "_y", "_z", "_w"];
47854790 foreach i = !range(suffixes) in
47864791 def suffixes[i] : PTXReadSRegIntrinsicNB_r32<properties[i]>;
0 commit comments