1111// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
1212// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
1313// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
14- // CHECK-NEXT: [[A_LOAD :%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
15- // CHECK-NEXT: [[B_LOAD :%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
16- // CHECK-NEXT: [[LowA :%.*]] = extractelement <2 x i32> [[A_LOAD ]], i64 0
17- // CHECK-NEXT: [[HighA :%.*]] = extractelement <2 x i32> [[A_LOAD ]], i64 1
18- // CHECK-NEXT: [[LowB :%.*]] = extractelement <2 x i32> [[B_LOAD ]], i64 0
19- // CHECK-NEXT: [[HighB :%.*]] = extractelement <2 x i32> [[B_LOAD ]], i64 1
20- // CHECK-NEXT: [[UAddc :%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA ]], i32 [[LowB ]])
21- // CHECK-NEXT: [[Carry :%.*]] = extractvalue { i32, i1 } [[UAddc ]], 1
22- // CHECK-NEXT: [[LowSum :%.*]] = extractvalue { i32, i1 } [[UAddc ]], 0
23- // CHECK-NEXT: [[CarryZExt :%.*]] = zext i1 [[Carry ]] to i32
24- // CHECK-NEXT: [[HighSum :%.*]] = add i32 [[HighA ]], [[HighB ]]
25- // CHECK-NEXT: [[HighSumPlusCarry :%.*]] = add i32 [[HighSum ]], [[CarryZExt ]]
26- // CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum ]], i64 0
27- // CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry ]], i64 1
14+ // CHECK-NEXT: [[TMP0 :%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
15+ // CHECK-NEXT: [[TMP1 :%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
16+ // CHECK-NEXT: [[LOWA :%.*]] = extractelement <2 x i32> [[TMP0 ]], i64 0
17+ // CHECK-NEXT: [[HIGHA :%.*]] = extractelement <2 x i32> [[TMP0 ]], i64 1
18+ // CHECK-NEXT: [[LOWB :%.*]] = extractelement <2 x i32> [[TMP1 ]], i64 0
19+ // CHECK-NEXT: [[HIGHB :%.*]] = extractelement <2 x i32> [[TMP1 ]], i64 1
20+ // CHECK-NEXT: [[TMP2 :%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA ]], i32 [[LOWB ]])
21+ // CHECK-NEXT: [[TMP3 :%.*]] = extractvalue { i32, i1 } [[TMP2 ]], 1
22+ // CHECK-NEXT: [[TMP4 :%.*]] = extractvalue { i32, i1 } [[TMP2 ]], 0
23+ // CHECK-NEXT: [[CARRYZEXT :%.*]] = zext i1 [[TMP3 ]] to i32
24+ // CHECK-NEXT: [[HIGHSUM :%.*]] = add i32 [[HIGHA ]], [[HIGHB ]]
25+ // CHECK-NEXT: [[HIGHSUMPLUSCARRY :%.*]] = add i32 [[HIGHSUM ]], [[CARRYZEXT ]]
26+ // CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4 ]], i64 0
27+ // CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY ]], i64 1
2828// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
2929//
3030uint2 test_AddUint64_uint2 (uint2 a, uint2 b) {
@@ -38,32 +38,19 @@ uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
3838// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
3939// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
4040// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
41- // CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
42- // CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
43- // CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
44- // CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
45- // CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
46- // CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
47- // CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
48- // CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
49- // CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
50- // CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
51- // CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
52- // CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
53- // CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
54- // CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
55- // CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
56- // CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
57- // CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
58- // CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
59- // CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
60- // CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
61- // CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
62- // CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
63- // CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
64- // CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
65- // CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
66- // CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
41+ // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
42+ // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
43+ // CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
44+ // CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
45+ // CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
46+ // CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
47+ // CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
48+ // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
49+ // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
50+ // CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
51+ // CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
52+ // CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
53+ // CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
6754// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
6855//
6956uint4 test_AddUint64_uint4 (uint4 a, uint4 b) {
0 commit comments