|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ |
| 3 | +// RUN: -emit-llvm -disable-llvm-passes -o - | \ |
| 4 | +// RUN: FileCheck %s --check-prefixes=CHECK |
| 5 | + |
| 6 | + |
| 7 | +// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_( |
| 8 | +// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { |
| 9 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 10 | +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8 |
| 11 | +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8 |
| 12 | +// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8 |
| 13 | +// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8 |
| 14 | +// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8 |
| 15 | +// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8 |
| 16 | +// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0 |
| 17 | +// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1 |
| 18 | +// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0 |
| 19 | +// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1 |
| 20 | +// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) |
| 21 | +// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 |
| 22 | +// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 |
| 23 | +// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 |
| 24 | +// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] |
| 25 | +// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] |
| 26 | +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0 |
| 27 | +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 |
| 28 | +// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]] |
| 29 | +// |
| 30 | +uint2 test_AddUint64_uint2(uint2 a, uint2 b) { |
| 31 | + return AddUint64(a, b); |
| 32 | +} |
| 33 | + |
| 34 | +// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_( |
| 35 | +// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { |
| 36 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 37 | +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 |
| 38 | +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 |
| 39 | +// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16 |
| 40 | +// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16 |
| 41 | +// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16 |
| 42 | +// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16 |
| 43 | +// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0 |
| 44 | +// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1 |
| 45 | +// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0 |
| 46 | +// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1 |
| 47 | +// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]]) |
| 48 | +// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1 |
| 49 | +// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0 |
| 50 | +// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32 |
| 51 | +// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]] |
| 52 | +// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]] |
| 53 | +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0 |
| 54 | +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1 |
| 55 | +// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2 |
| 56 | +// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3 |
| 57 | +// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2 |
| 58 | +// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3 |
| 59 | +// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]]) |
| 60 | +// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1 |
| 61 | +// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0 |
| 62 | +// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32 |
| 63 | +// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]] |
| 64 | +// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]] |
| 65 | +// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2 |
| 66 | +// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3 |
| 67 | +// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]] |
| 68 | +// |
| 69 | +uint4 test_AddUint64_uint4(uint4 a, uint4 b) { |
| 70 | + return AddUint64(a, b); |
| 71 | +} |
0 commit comments