@@ -26,6 +26,28 @@ entry:
2626 ret i32 %and
2727}
2828
29+ define i32 @f32_bzhi_commute (i32 %x , i32 %y ) local_unnamed_addr {
30+ ; X64-LABEL: f32_bzhi_commute:
31+ ; X64: # %bb.0: # %entry
32+ ; X64-NEXT: movl %edi, %eax
33+ ; X64-NEXT: movslq %esi, %rcx
34+ ; X64-NEXT: andl fill_table32(,%rcx,4), %eax
35+ ; X64-NEXT: retq
36+ ;
37+ ; X86-LABEL: f32_bzhi_commute:
38+ ; X86: # %bb.0: # %entry
39+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
40+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
41+ ; X86-NEXT: andl fill_table32(,%ecx,4), %eax
42+ ; X86-NEXT: retl
43+ entry:
44+ %idxprom = sext i32 %y to i64
45+ %arrayidx = getelementptr inbounds [32 x i32 ], ptr @fill_table32 , i64 0 , i64 %idxprom
46+ %0 = load i32 , ptr %arrayidx , align 4
47+ %and = and i32 %x , %0
48+ ret i32 %and
49+ }
50+
2951define i32 @f32_bzhi_partial (i32 %x , i32 %y ) local_unnamed_addr {
3052; X64-LABEL: f32_bzhi_partial:
3153; X64: # %bb.0: # %entry
@@ -45,6 +67,28 @@ entry:
4567 ret i32 %and
4668}
4769
70+ define i32 @f32_bzhi_partial_commute (i32 %x , i32 %y ) local_unnamed_addr {
71+ ; X64-LABEL: f32_bzhi_partial_commute:
72+ ; X64: # %bb.0: # %entry
73+ ; X64-NEXT: movl %edi, %eax
74+ ; X64-NEXT: movslq %esi, %rcx
75+ ; X64-NEXT: andl fill_table32_partial(,%rcx,4), %eax
76+ ; X64-NEXT: retq
77+ ;
78+ ; X86-LABEL: f32_bzhi_partial_commute:
79+ ; X86: # %bb.0: # %entry
80+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
81+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
82+ ; X86-NEXT: andl fill_table32_partial(,%ecx,4), %eax
83+ ; X86-NEXT: retl
84+ entry:
85+ %idxprom = sext i32 %y to i64
86+ %arrayidx = getelementptr inbounds [17 x i32 ], ptr @fill_table32_partial , i64 0 , i64 %idxprom
87+ %0 = load i32 , ptr %arrayidx , align 4
88+ %and = and i32 %x , %0
89+ ret i32 %and
90+ }
91+
4892define i64 @f64_bzhi (i64 %x , i64 %y ) local_unnamed_addr {
4993; X64-LABEL: f64_bzhi:
5094; X64: # %bb.0: # %entry
@@ -66,6 +110,28 @@ entry:
66110 ret i64 %and
67111}
68112
113+ define i64 @f64_bzhi_commute (i64 %x , i64 %y ) local_unnamed_addr {
114+ ; X64-LABEL: f64_bzhi_commute:
115+ ; X64: # %bb.0: # %entry
116+ ; X64-NEXT: movq %rdi, %rax
117+ ; X64-NEXT: andq fill_table64(,%rsi,8), %rax
118+ ; X64-NEXT: retq
119+ ;
120+ ; X86-LABEL: f64_bzhi_commute:
121+ ; X86: # %bb.0: # %entry
122+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
123+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
124+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
125+ ; X86-NEXT: andl fill_table64(,%ecx,8), %eax
126+ ; X86-NEXT: andl fill_table64+4(,%ecx,8), %edx
127+ ; X86-NEXT: retl
128+ entry:
129+ %arrayidx = getelementptr inbounds [64 x i64 ], ptr @fill_table64 , i64 0 , i64 %y
130+ %0 = load i64 , ptr %arrayidx , align 8
131+ %and = and i64 %x , %0
132+ ret i64 %and
133+ }
134+
69135define i64 @f64_bzhi_partial (i64 %x , i64 %y ) local_unnamed_addr {
70136; X64-LABEL: f64_bzhi_partial:
71137; X64: # %bb.0: # %entry
@@ -87,3 +153,24 @@ entry:
87153 ret i64 %and
88154}
89155
156+ define i64 @f64_bzhi_partial_commute (i64 %x , i64 %y ) local_unnamed_addr {
157+ ; X64-LABEL: f64_bzhi_partial_commute:
158+ ; X64: # %bb.0: # %entry
159+ ; X64-NEXT: movq %rdi, %rax
160+ ; X64-NEXT: andq fill_table64_partial(,%rsi,8), %rax
161+ ; X64-NEXT: retq
162+ ;
163+ ; X86-LABEL: f64_bzhi_partial_commute:
164+ ; X86: # %bb.0: # %entry
165+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
166+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
167+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
168+ ; X86-NEXT: andl fill_table64_partial(,%ecx,8), %eax
169+ ; X86-NEXT: andl fill_table64_partial+4(,%ecx,8), %edx
170+ ; X86-NEXT: retl
171+ entry:
172+ %arrayidx = getelementptr inbounds [51 x i64 ], ptr @fill_table64_partial , i64 0 , i64 %y
173+ %0 = load i64 , ptr %arrayidx , align 8
174+ %and = and i64 %x , %0
175+ ret i64 %and
176+ }
0 commit comments