@@ -55,7 +55,7 @@ define half @round_f16(half %h) {
5555; AVX512F-NEXT: vmovd %eax, %xmm0
5656; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5757; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
58- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
58+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
5959; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
6060; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
6161; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -67,7 +67,7 @@ define half @round_f16(half %h) {
6767; AVX512FP16: ## %bb.0: ## %entry
6868; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
6969; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
70- ; AVX512FP16-NEXT: vpternlogq $248, %xmm1, %xmm0, % xmm2
70+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm2 = xmm2 | (xmm0 & xmm1)
7171; AVX512FP16-NEXT: vaddsh %xmm2, %xmm0, %xmm0
7272; AVX512FP16-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0
7373; AVX512FP16-NEXT: retq
@@ -103,15 +103,15 @@ define float @round_f32(float %x) {
103103; AVX512F-LABEL: round_f32:
104104; AVX512F: # %bb.0:
105105; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
106- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
106+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
107107; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
108108; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
109109; AVX512F-NEXT: retq
110110;
111111; AVX512FP16-LABEL: round_f32:
112112; AVX512FP16: ## %bb.0:
113113; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
114- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
114+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
115115; AVX512FP16-NEXT: vaddss %xmm1, %xmm0, %xmm0
116116; AVX512FP16-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
117117; AVX512FP16-NEXT: retq
@@ -147,15 +147,15 @@ define double @round_f64(double %x) {
147147; AVX512F-LABEL: round_f64:
148148; AVX512F: # %bb.0:
149149; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
150- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
150+ ; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
151151; AVX512F-NEXT: vaddsd %xmm1, %xmm0, %xmm0
152152; AVX512F-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
153153; AVX512F-NEXT: retq
154154;
155155; AVX512FP16-LABEL: round_f64:
156156; AVX512FP16: ## %bb.0:
157157; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
158- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
158+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
159159; AVX512FP16-NEXT: vaddsd %xmm1, %xmm0, %xmm0
160160; AVX512FP16-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
161161; AVX512FP16-NEXT: retq
@@ -213,15 +213,15 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
213213; AVX512F-LABEL: round_v4f32:
214214; AVX512F: # %bb.0:
215215; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
216- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
216+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
217217; AVX512F-NEXT: vaddps %xmm1, %xmm0, %xmm0
218218; AVX512F-NEXT: vroundps $11, %xmm0, %xmm0
219219; AVX512F-NEXT: retq
220220;
221221; AVX512FP16-LABEL: round_v4f32:
222222; AVX512FP16: ## %bb.0:
223223; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
224- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
224+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
225225; AVX512FP16-NEXT: vaddps %xmm1, %xmm0, %xmm0
226226; AVX512FP16-NEXT: vroundps $11, %xmm0, %xmm0
227227; AVX512FP16-NEXT: retq
@@ -267,15 +267,15 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
267267; AVX512F-LABEL: round_v2f64:
268268; AVX512F: # %bb.0:
269269; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
270- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
270+ ; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
271271; AVX512F-NEXT: vaddpd %xmm1, %xmm0, %xmm0
272272; AVX512F-NEXT: vroundpd $11, %xmm0, %xmm0
273273; AVX512F-NEXT: retq
274274;
275275; AVX512FP16-LABEL: round_v2f64:
276276; AVX512FP16: ## %bb.0:
277277; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
278- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
278+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
279279; AVX512FP16-NEXT: vaddpd %xmm1, %xmm0, %xmm0
280280; AVX512FP16-NEXT: vroundpd $11, %xmm0, %xmm0
281281; AVX512FP16-NEXT: retq
@@ -361,15 +361,15 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
361361; AVX512F-LABEL: round_v8f32:
362362; AVX512F: # %bb.0:
363363; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
364- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, % ymm1
364+ ; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
365365; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
366366; AVX512F-NEXT: vroundps $11, %ymm0, %ymm0
367367; AVX512F-NEXT: retq
368368;
369369; AVX512FP16-LABEL: round_v8f32:
370370; AVX512FP16: ## %bb.0:
371371; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
372- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, % ymm1
372+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
373373; AVX512FP16-NEXT: vaddps %ymm1, %ymm0, %ymm0
374374; AVX512FP16-NEXT: vroundps $11, %ymm0, %ymm0
375375; AVX512FP16-NEXT: retq
@@ -431,15 +431,15 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
431431; AVX512F-LABEL: round_v4f64:
432432; AVX512F: # %bb.0:
433433; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
434- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, % ymm1
434+ ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
435435; AVX512F-NEXT: vaddpd %ymm1, %ymm0, %ymm0
436436; AVX512F-NEXT: vroundpd $11, %ymm0, %ymm0
437437; AVX512F-NEXT: retq
438438;
439439; AVX512FP16-LABEL: round_v4f64:
440440; AVX512FP16: ## %bb.0:
441441; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
442- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, % ymm1
442+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
443443; AVX512FP16-NEXT: vaddpd %ymm1, %ymm0, %ymm0
444444; AVX512FP16-NEXT: vroundpd $11, %ymm0, %ymm0
445445; AVX512FP16-NEXT: retq
@@ -587,15 +587,15 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
587587; AVX512F-LABEL: round_v16f32:
588588; AVX512F: # %bb.0:
589589; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
590- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, % zmm1
590+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
591591; AVX512F-NEXT: vaddps %zmm1, %zmm0, %zmm0
592592; AVX512F-NEXT: vrndscaleps $11, %zmm0, %zmm0
593593; AVX512F-NEXT: retq
594594;
595595; AVX512FP16-LABEL: round_v16f32:
596596; AVX512FP16: ## %bb.0:
597597; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
598- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, % zmm1
598+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
599599; AVX512FP16-NEXT: vaddps %zmm1, %zmm0, %zmm0
600600; AVX512FP16-NEXT: vrndscaleps $11, %zmm0, %zmm0
601601; AVX512FP16-NEXT: retq
@@ -695,15 +695,15 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
695695; AVX512F-LABEL: round_v8f64:
696696; AVX512F: # %bb.0:
697697; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
698- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
698+ ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
699699; AVX512F-NEXT: vaddpd %zmm1, %zmm0, %zmm0
700700; AVX512F-NEXT: vrndscalepd $11, %zmm0, %zmm0
701701; AVX512F-NEXT: retq
702702;
703703; AVX512FP16-LABEL: round_v8f64:
704704; AVX512FP16: ## %bb.0:
705705; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
706- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
706+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
707707; AVX512FP16-NEXT: vaddpd %zmm1, %zmm0, %zmm0
708708; AVX512FP16-NEXT: vrndscalepd $11, %zmm0, %zmm0
709709; AVX512FP16-NEXT: retq
0 commit comments