Rebase to triton main (#122)

quintinwang5 · web-flow · commit da1bc1fb7a39 · 2023-11-14T08:19:42.000+08:00
* Fix nightly script

* Update triton hash

* change triton::gpu::{CmpI, CmpF, Select} to mlir::arith::{CmpI, CmpF, Select}

* Update sed rule to co-work with new triton

* Update failed todo UT due to UT change
diff --git a/.github/scripts/case_update.sh b/.github/scripts/case_update.sh
@@ -10,6 +10,7 @@ sed -i '/import torch/ a\import intel_extension_for_pytorch' ${HOME}/${JOB_WORKS
 sed -i '/import torch/ a\import intel_extension_for_pytorch' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
 sed -i '/def test_abs_fp8(in_dtype, device):/ a\    pytest.skip("fp8 is not supported for xpu")' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
 sed -i 's/MmaLayout(/# &/' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
+sed -i 's/instr_shape=\[/# &/' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
 sed -i 's/f.name/&, device_type=device/' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
 sed -i '/pytest.skip("float8e4nv is only supported on NVGPU with cc >= 90")/ a\    if dtype in  [tl.float8e4b15, tl.float8e4b15x4, tl.float8e4nv, tl.float8e5, "float8e4b15", "tl.float8e4b15x4", "float8e4nv", "float8e5"]:\n        pytest.skip("fp8 is not supported yet")' ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
 sed -i "/ptx = pgm.asm\['ptx'\]/,/^$/d" ${HOME}/${JOB_WORKSPACE}/triton_src/python/test/unit/language/test_core.py
diff --git a/.github/tests/triton_todo_failure_tests.log b/.github/tests/triton_todo_failure_tests.log
@@ -1,91 +1,157 @@
+test_core.py::test_constexpr_propagation
+test_core.py::test_enable_fp_fusion[False]
+test_core.py::test_enable_fp_fusion[True]
+test_core.py::test_locality[2-1024-128-max]
+test_core.py::test_locality[2-1024-128-min]
+test_core.py::test_locality[2-1024-128-sum]
+test_core.py::test_locality[2-1024-32-max]
+test_core.py::test_locality[2-1024-32-min]
+test_core.py::test_locality[2-1024-32-sum]
+test_core.py::test_locality[2-1024-64-max]
+test_core.py::test_locality[2-1024-64-min]
+test_core.py::test_locality[2-1024-64-sum]
+test_core.py::test_locality[2-2048-128-max]
+test_core.py::test_locality[2-2048-128-min]
+test_core.py::test_locality[2-2048-128-sum]
+test_core.py::test_locality[2-2048-32-max]
+test_core.py::test_locality[2-2048-32-min]
+test_core.py::test_locality[2-2048-32-sum]
+test_core.py::test_locality[2-2048-64-max]
+test_core.py::test_locality[2-2048-64-min]
+test_core.py::test_locality[2-2048-64-sum]
+test_core.py::test_locality[2-512-128-max]
+test_core.py::test_locality[2-512-128-min]
+test_core.py::test_locality[2-512-128-sum]
+test_core.py::test_locality[2-512-32-max]
+test_core.py::test_locality[2-512-32-min]
+test_core.py::test_locality[2-512-32-sum]
+test_core.py::test_locality[2-512-64-max]
+test_core.py::test_locality[2-512-64-min]
+test_core.py::test_locality[2-512-64-sum]
+test_core.py::test_locality[4-1024-128-max]
+test_core.py::test_locality[4-1024-128-min]
+test_core.py::test_locality[4-1024-128-sum]
+test_core.py::test_locality[4-1024-32-max]
+test_core.py::test_locality[4-1024-32-min]
+test_core.py::test_locality[4-1024-32-sum]
+test_core.py::test_locality[4-1024-64-max]
+test_core.py::test_locality[4-1024-64-min]
+test_core.py::test_locality[4-1024-64-sum]
+test_core.py::test_locality[4-2048-128-max]
+test_core.py::test_locality[4-2048-128-min]
+test_core.py::test_locality[4-2048-128-sum]
+test_core.py::test_locality[4-2048-32-max]
+test_core.py::test_locality[4-2048-32-min]
+test_core.py::test_locality[4-2048-32-sum]
+test_core.py::test_locality[4-2048-64-max]
+test_core.py::test_locality[4-2048-64-min]
+test_core.py::test_locality[4-2048-64-sum]
+test_core.py::test_locality[4-512-128-max]
+test_core.py::test_locality[4-512-128-min]
+test_core.py::test_locality[4-512-128-sum]
+test_core.py::test_locality[4-512-32-max]
+test_core.py::test_locality[4-512-32-min]
+test_core.py::test_locality[4-512-32-sum]
+test_core.py::test_locality[4-512-64-max]
+test_core.py::test_locality[4-512-64-min]
+test_core.py::test_locality[4-512-64-sum]
+test_core.py::test_math_op[float32-cos-3.0]
 test_core.py::test_math_op[float32-exp-3.0]
 test_core.py::test_math_op[float32-log-3.0]
-test_core.py::test_math_op[float32-cos-3.0]
 test_core.py::test_math_op[float32-sin-3.0]
+test_core.py::test_math_op[float64-cos-3.0]
 test_core.py::test_math_op[float64-exp-3.0]
 test_core.py::test_math_op[float64-log-3.0]
-test_core.py::test_math_op[float64-cos-3.0]
 test_core.py::test_math_op[float64-sin-3.0]
+test_core.py::test_ptx_cast[float16]
+test_core.py::test_ptx_cast[int16]
+test_core.py::test_reduce[1-argmax-float32-shape129-0]
+test_core.py::test_reduce[1-argmax-float32-shape130-1]
+test_core.py::test_reduce[1-argmax-float32-shape132-0]
+test_core.py::test_reduce[1-argmax-float32-shape133-1]
+test_core.py::test_reduce[1-argmax-float32-shape135-0]
+test_core.py::test_reduce[1-argmax-float32-shape136-1]
+test_core.py::test_reduce[1-argmin-float32-shape120-0]
+test_core.py::test_reduce[1-argmin-float32-shape121-1]
+test_core.py::test_reduce[1-argmin-float32-shape123-0]
+test_core.py::test_reduce[1-argmin-float32-shape124-1]
+test_core.py::test_reduce[1-argmin-float32-shape126-0]
+test_core.py::test_reduce[1-argmin-float32-shape127-1]
+test_core.py::test_reduce[1-max-float32-shape102-0]
 test_core.py::test_reduce[1-max-float32-shape103-1]
-test_core.py::test_scan2d[get_first_element-float32-shape71-0-4]
+test_core.py::test_reduce[1-max-float32-shape105-0]
+test_core.py::test_reduce[1-max-float32-shape106-1]
+test_core.py::test_reduce[1-max-float32-shape108-0]
+test_core.py::test_reduce[1-max-float32-shape109-1]
+test_core.py::test_reduce[1-min-float32-shape100-1]
+test_core.py::test_reduce[1-min-float32-shape93-0]
+test_core.py::test_reduce[1-min-float32-shape94-1]
+test_core.py::test_reduce[1-min-float32-shape96-0]
+test_core.py::test_reduce[1-min-float32-shape97-1]
+test_core.py::test_reduce[1-min-float32-shape99-0]
+test_core.py::test_reduce[1-sum-float32-shape111-0]
+test_core.py::test_reduce[1-sum-float32-shape112-1]
+test_core.py::test_reduce[1-sum-float32-shape114-0]
+test_core.py::test_reduce[1-sum-float32-shape115-1]
+test_core.py::test_reduce[1-sum-float32-shape117-0]
+test_core.py::test_reduce[1-sum-float32-shape118-1]
+test_core.py::test_scan2d[get_first_element-float32-shape128-1-16]
+test_core.py::test_scan2d[get_first_element-float32-shape131-1-16]
+test_core.py::test_scan2d[get_first_element-float32-shape134-1-16]
 test_core.py::test_scan2d[get_first_element-float32-shape137-1-16]
-test_core.py::test_scan2d[get_first_element-float32-shape53-1-4]
-test_core.py::test_scan2d[get_first_element-int32-shape104-1-16]
-test_core.py::test_scan2d[get_first_element-int32-shape5-1-4]
-test_core.py::test_reduce[1-argmin-float32-shape123-0]
-test_core.py::test_scan2d[get_first_element-float32-shape74-0-4]
-test_core.py::test_scan2d[get_first_element-int32-shape8-1-4]
-test_core.py::test_scan2d[get_first_element-int32-shape107-0-16]
-test_core.py::test_scan2d[get_first_element-int32-shape11-1-4]
-test_core.py::test_scan2d[get_first_element-float32-shape77-0-4]
-test_core.py::test_scan2d[get_first_element-int32-shape110-0-16]
 test_core.py::test_scan2d[get_first_element-float32-shape143-1-16]
-test_core.py::test_scan2d[get_first_element-int32-shape113-0-16]
 test_core.py::test_scan2d[get_first_element-float32-shape146-1-16]
-test_core.py::test_scan2d[get_first_element-int32-shape116-0-16]
 test_core.py::test_scan2d[get_first_element-float32-shape149-0-16]
-test_core.py::test_scan2d[get_first_element-int32-shape119-0-16]
 test_core.py::test_scan2d[get_first_element-float32-shape152-0-16]
-test_core.py::test_scan2d[get_first_element-int32-shape17-1-4]
 test_core.py::test_scan2d[get_first_element-float32-shape155-0-16]
-test_core.py::test_scan2d[get_first_element-int32-shape20-1-4]
 test_core.py::test_scan2d[get_first_element-float32-shape158-0-16]
 test_core.py::test_scan2d[get_first_element-float32-shape161-0-16]
+test_core.py::test_scan2d[get_first_element-float32-shape164-0-16]
+test_core.py::test_scan2d[get_first_element-float32-shape44-1-4]
+test_core.py::test_scan2d[get_first_element-float32-shape47-1-4]
+test_core.py::test_scan2d[get_first_element-float32-shape50-1-4]
+test_core.py::test_scan2d[get_first_element-float32-shape53-1-4]
 test_core.py::test_scan2d[get_first_element-float32-shape59-1-4]
 test_core.py::test_scan2d[get_first_element-float32-shape62-1-4]
 test_core.py::test_scan2d[get_first_element-float32-shape65-0-4]
 test_core.py::test_scan2d[get_first_element-float32-shape68-0-4]
+test_core.py::test_scan2d[get_first_element-float32-shape71-0-4]
+test_core.py::test_scan2d[get_first_element-float32-shape74-0-4]
+test_core.py::test_scan2d[get_first_element-float32-shape77-0-4]
 test_core.py::test_scan2d[get_first_element-float32-shape80-0-4]
+test_core.py::test_scan2d[get_first_element-int32-shape101-1-16]
+test_core.py::test_scan2d[get_first_element-int32-shape104-1-16]
+test_core.py::test_scan2d[get_first_element-int32-shape107-0-16]
+test_core.py::test_scan2d[get_first_element-int32-shape110-0-16]
+test_core.py::test_scan2d[get_first_element-int32-shape11-1-4]
+test_core.py::test_scan2d[get_first_element-int32-shape113-0-16]
+test_core.py::test_scan2d[get_first_element-int32-shape116-0-16]
+test_core.py::test_scan2d[get_first_element-int32-shape119-0-16]
 test_core.py::test_scan2d[get_first_element-int32-shape122-0-16]
-test_core.py::test_scan2d[get_first_element-float32-shape164-0-16]
-test_core.py::test_scan2d[get_first_element-float32-shape128-1-16]
+test_core.py::test_scan2d[get_first_element-int32-shape17-1-4]
+test_core.py::test_scan2d[get_first_element-int32-shape20-1-4]
+test_core.py::test_scan2d[get_first_element-int32-shape2-1-4]
 test_core.py::test_scan2d[get_first_element-int32-shape23-0-4]
-test_core.py::test_scan2d[get_first_element-float32-shape131-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape26-0-4]
-test_core.py::test_scan2d[get_first_element-float32-shape134-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape29-0-4]
-test_core.py::test_scan2d[get_first_element-int32-shape86-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape32-0-4]
-test_core.py::test_scan2d[get_first_element-int32-shape89-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape35-0-4]
-test_core.py::test_scan2d[get_first_element-int32-shape92-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape38-0-4]
+test_core.py::test_scan2d[get_first_element-int32-shape5-1-4]
+test_core.py::test_scan2d[get_first_element-int32-shape8-1-4]
+test_core.py::test_scan2d[get_first_element-int32-shape86-1-16]
+test_core.py::test_scan2d[get_first_element-int32-shape89-1-16]
+test_core.py::test_scan2d[get_first_element-int32-shape92-1-16]
 test_core.py::test_scan2d[get_first_element-int32-shape95-1-16]
-test_core.py::test_scan2d[get_first_element-float32-shape44-1-4]
-test_core.py::test_scan2d[get_first_element-float32-shape47-1-4]
-test_core.py::test_scan2d[get_first_element-int32-shape101-1-16]
-test_core.py::test_scan2d[get_first_element-float32-shape50-1-4]
-test_core.py::test_reduce[1-min-float32-shape93-0]
-test_core.py::test_enable_fp_fusion[False]
-test_core.py::test_enable_fp_fusion[True]
-test_core.py::test_reduce[1-max-float32-shape105-0]
-test_core.py::test_reduce[1-max-float32-shape106-1]
-test_core.py::test_reduce[1-max-float32-shape109-1]
-test_core.py::test_reduce[1-sum-float32-shape112-1]
-test_core.py::test_reduce[1-argmax-float32-shape136-1]
-test_core.py::test_reduce[1-sum-float32-shape115-1]
-test_core.py::test_reduce[1-argmin-float32-shape121-1]
-test_core.py::test_reduce[1-argmin-float32-shape124-1]
-test_core.py::test_reduce[1-max-float32-shape108-0]
-test_core.py::test_reduce[1-argmax-float32-shape129-0]
-test_core.py::test_reduce[1-argmax-float32-shape132-0]
-test_core.py::test_reduce[1-argmin-float32-shape120-0]
-test_core.py::test_reduce[1-argmax-float32-shape133-1]
-test_core.py::test_scan2d[get_first_element-int32-shape2-1-4]
-test_core.py::test_reduce[1-sum-float32-shape118-1]
-test_core.py::test_reduce[1-max-float32-shape102-0]
-test_core.py::test_reduce[1-min-float32-shape96-0]
-test_core.py::test_reduce[1-sum-float32-shape114-0]
-test_core.py::test_reduce[1-sum-float32-shape111-0]
-test_core.py::test_reduce[1-sum-float32-shape117-0]
-test_core.py::test_reduce[1-min-float32-shape94-1]
-test_core.py::test_reduce[1-min-float32-shape100-1]
-test_core.py::test_reduce[1-argmax-float32-shape130-1]
-test_core.py::test_reduce[1-min-float32-shape99-0]
-test_core.py::test_reduce[1-argmin-float32-shape126-0]
-test_core.py::test_reduce[1-min-float32-shape97-1]
-test_core.py::test_reduce[1-argmax-float32-shape135-0]
-test_core.py::test_reduce[1-argmin-float32-shape127-1]
-test_core.py::test_ptx_cast[int16]
-test_core.py::test_ptx_cast[float16]
-test_core.py::test_constexpr_propagation
+test_core.py::test_slice
+test_core.py::test_tensor_atomic_cas[1-acq_rel]
+test_core.py::test_tensor_atomic_cas[1-acquire]
+test_core.py::test_tensor_atomic_cas[1-None]
+test_core.py::test_tensor_atomic_cas[1-relaxed]
+test_core.py::test_tensor_atomic_cas[1-release]
+test_core.py::test_unsigned_name_mangling
+test_line_info.py::test_line_info[autotune]
+test_subprocess.py::test_print[device_print_large-int32]
+test_subprocess.py::test_print[device_print_multiple_args-int32]
+test_subprocess.py::test_print[print_multiple_args-int32]
+test_subprocess.py::test_print[print_no_arg-int32]
diff --git a/.github/workflows/triton_xpu_backend_nightly.yml b/.github/workflows/triton_xpu_backend_nightly.yml
@@ -55,7 +55,7 @@ jobs:
         run: |
           source ${HOME}/miniconda3/bin/activate triton-nightly-test
           cd ${HOME}/triton-nightly
-          rm -rf triton
+          rm -rf triton_src
           git clone https://github.com/openai/triton triton_src
           cd triton_src
           triton_commit=`git rev-parse HEAD`
diff --git a/lib/Conversion/TritonGPUToSPIRV/ElementwiseOpToSPIRV.cpp b/lib/Conversion/TritonGPUToSPIRV/ElementwiseOpToSPIRV.cpp
@@ -504,15 +504,15 @@ static bool isBoolScalarOrVector(Type type) {
 }
 
 struct CmpIOpSPIRVConversion
-    : public ElementwiseOpSPIRVConversionBase<triton::gpu::CmpIOp,
+    : public ElementwiseOpSPIRVConversionBase<mlir::arith::CmpIOp,
                                               CmpIOpSPIRVConversion> {
-  using Base = ElementwiseOpSPIRVConversionBase<triton::gpu::CmpIOp,
+  using Base = ElementwiseOpSPIRVConversionBase<mlir::arith::CmpIOp,
                                                 CmpIOpSPIRVConversion>;
   using Base::Base;
   using Adaptor = typename Base::OpAdaptor;
 
   // An interface to support variant DestOp builder.
-  Value createDestOp(triton::gpu::CmpIOp op, OpAdaptor adaptor,
+  Value createDestOp(mlir::arith::CmpIOp op, OpAdaptor adaptor,
                      ConversionPatternRewriter &rewriter, Type elemTy,
                      ValueRange operands, Location loc) const {
 
@@ -557,15 +557,15 @@ struct CmpIOpSPIRVConversion
 };
 
 struct CmpFOpSPIRVConversion
-    : public ElementwiseOpSPIRVConversionBase<triton::gpu::CmpFOp,
+    : public ElementwiseOpSPIRVConversionBase<mlir::arith::CmpFOp,
                                               CmpFOpSPIRVConversion> {
-  using Base = ElementwiseOpSPIRVConversionBase<triton::gpu::CmpFOp,
+  using Base = ElementwiseOpSPIRVConversionBase<mlir::arith::CmpFOp,
                                                 CmpFOpSPIRVConversion>;
   using Base::Base;
   using Adaptor = typename Base::OpAdaptor;
 
   // An interface to support variant DestOp builder.
-  Value createDestOp(triton::gpu::CmpFOp op, OpAdaptor adaptor,
+  Value createDestOp(mlir::arith::CmpFOp op, OpAdaptor adaptor,
                      ConversionPatternRewriter &rewriter, Type elemTy,
                      ValueRange operands, Location loc) const {
     switch (op.getPredicate()) {
@@ -991,7 +991,7 @@ void populateElementwiseOpToSPIRVPatterns(
 #define POPULATE_TERNARY_OP(SRC_OP, DST_OP)                                    \
   patterns.add<ElementwiseOpSPIRVConversion<SRC_OP, DST_OP>>(                  \
       typeConverter, context, benefit);
-  POPULATE_TERNARY_OP(triton::gpu::SelectOp, spirv::SelectOp)
+  POPULATE_TERNARY_OP(mlir::arith::SelectOp, spirv::SelectOp)
 #undef POPULATE_TERNARY_OP
 
 #define POPULATE_BINARY_OP(SRC_OP, DST_OP)                                     \
diff --git a/triton_hash.txt b/triton_hash.txt
@@ -1 +1 @@
-30186f401ec52d9addac79a60f418792875f7d11
+f168b148ecdd067205c6066bc3e6939fd67ab893

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-30186f401ec52d9addac79a60f418792875f7d11`
	`1`	`+f168b148ecdd067205c6066bc3e6939fd67ab893`