11// RUN: triton-opt -split-input-file %s --convert-triton-amdgpu-to-llvm='arch=gfx942' -verify-diagnostics
22
33// Invalid size
4- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
4+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
55tt.func @invalid_size_input (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
66 // expected-error @+1 {{result shape must be multiple of shapePerCTATile}}
77 %1 = amdg.extract_slice %arg0 [0 ,0 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x2 xi32 , #blocked1 >
@@ -11,7 +11,7 @@ tt.func @invalid_size_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibili
1111// -----
1212
1313// Invalid offset, not multiple of shapePerTile
14- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
14+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
1515tt.func @invalid_offset_input (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
1616 // expected-error @+1 {{offset must be multiple of shapePerCTATile}}
1717 %1 = amdg.extract_slice %arg0 [0 ,5 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x16 xi32 , #blocked1 >
@@ -21,7 +21,7 @@ tt.func @invalid_offset_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibi
2121// -----
2222
2323// Invalid offset, out of bounds for dimension
24- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
24+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
2525tt.func @invalid_offset_input (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
2626 // expected-error @+1 {{invalid offset at dimension 1}}
2727 %1 = amdg.extract_slice %arg0 [0 ,128 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x16 xi32 , #blocked1 >
@@ -31,8 +31,8 @@ tt.func @invalid_offset_input(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibi
3131// -----
3232
3333// Invalid result layout
34- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
35- #blocked2 = #ttg.blocked <{sizePerThread = [4 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
34+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
35+ #blocked2 = #ttg.blocked <{sizePerThread = [4 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
3636tt.func @invalid_result_layout (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
3737 // expected-error @+1 {{CTA tile shapes must match between source and destination tensors.}}
3838 %1 = amdg.extract_slice %arg0 [0 ,0 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x16 xi32 , #blocked2 >
@@ -41,7 +41,7 @@ tt.func @invalid_result_layout(%arg0: tensor<256x128xi32, #blocked1> {tt.divisib
4141// -----
4242
4343// Invalid result element type
44- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
44+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
4545tt.func @invalid_result_element_type (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
4646 // expected-error @+1 {{result element type must match source element type}}
4747 %1 = amdg.extract_slice %arg0 [0 ,0 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x16 xi64 , #blocked1 >
@@ -51,7 +51,7 @@ tt.func @invalid_result_element_type(%arg0: tensor<256x128xi32, #blocked1> {tt.d
5151// -----
5252
5353// Invalid result rank
54- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
54+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
5555tt.func @invalid_result_rank (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
5656 // expected-error @+1 {{result rank must be equal to source rank}}
5757 %1 = amdg.extract_slice %arg0 [0 ,0 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x16 x2 xi32 , #blocked1 >
@@ -61,7 +61,7 @@ tt.func @invalid_result_rank(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibil
6161// -----
6262
6363// Invalid result shape
64- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
64+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
6565tt.func @invalid_result_rank (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }) {
6666 // expected-error @+1 {{result shape cannot exceed source shape at dimension 1}}
6767 %1 = amdg.extract_slice %arg0 [0 ,0 ] : tensor <256 x128 xi32 , #blocked1 > to tensor <256 x256 xi32 , #blocked1 >
@@ -71,7 +71,7 @@ tt.func @invalid_result_rank(%arg0: tensor<256x128xi32, #blocked1> {tt.divisibil
7171// -----
7272
7373// Invalid non static offset
74- #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ], CTAsPerCGA = [ 1 , 1 ], CTASplitNum = [ 1 , 1 ], CTAOrder = [ 0 , 1 ] }>
74+ #blocked1 = #ttg.blocked <{sizePerThread = [8 , 1 ], threadsPerWarp = [4 , 16 ], warpsPerCTA = [8 , 1 ], order = [1 , 0 ]}>
7575tt.func @invalid_non_static_offset (%arg0: tensor <256 x128 xi32 , #blocked1 > {tt.divisibility = 16 : i32 }, %arg1: i32 ) {
7676 // expected-error @+2 {{expected ']'}}
7777 // expected-error @+1 {{expected integer value}}
0 commit comments