Skip to content

Commit 91fff70

Browse files
authored
[clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (llvm#153229)
Replaces the XOP/AVX512 per-element rotation/funnel shift builtins with the generic __builtin_elementwise_fshl/fshr We still have uniform immediate variants to handle next. Part of llvm#153152
1 parent a9a0978 commit 91fff70

File tree

7 files changed

+61
-206
lines changed

7 files changed

+61
-206
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 0 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -945,10 +945,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
945945
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
946946
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
947947
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
948-
def vprotb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
949-
def vprotw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
950-
def vprotd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
951-
def vprotq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
952948
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
953949
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
954950
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
@@ -1882,78 +1878,6 @@ let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVect
18821878
def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
18831879
}
18841880

1885-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1886-
def vpshldvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1887-
}
1888-
1889-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1890-
def vpshldvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1891-
}
1892-
1893-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1894-
def vpshldvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1895-
}
1896-
1897-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1898-
def vpshldvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
1899-
}
1900-
1901-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1902-
def vpshldvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
1903-
}
1904-
1905-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1906-
def vpshldvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
1907-
}
1908-
1909-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1910-
def vpshldvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
1911-
}
1912-
1913-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1914-
def vpshldvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
1915-
}
1916-
1917-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1918-
def vpshldvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
1919-
}
1920-
1921-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1922-
def vpshrdvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1923-
}
1924-
1925-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1926-
def vpshrdvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1927-
}
1928-
1929-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1930-
def vpshrdvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1931-
}
1932-
1933-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1934-
def vpshrdvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
1935-
}
1936-
1937-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1938-
def vpshrdvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
1939-
}
1940-
1941-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1942-
def vpshrdvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
1943-
}
1944-
1945-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1946-
def vpshrdvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
1947-
}
1948-
1949-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1950-
def vpshrdvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
1951-
}
1952-
1953-
let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1954-
def vpshrdvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
1955-
}
1956-
19571881
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
19581882
def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
19591883
}
@@ -2165,28 +2089,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
21652089
}
21662090

21672091
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2168-
def prolvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
2169-
def prolvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
21702092
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
21712093
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
21722094
}
21732095

2174-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2175-
def prolvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
2176-
}
2177-
2178-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2179-
def prolvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
2180-
}
2181-
2182-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2183-
def prolvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
2184-
}
2185-
2186-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2187-
def prolvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
2188-
}
2189-
21902096
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
21912097
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
21922098
}
@@ -2203,27 +2109,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
22032109
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
22042110
}
22052111

2206-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2207-
def prorvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
2208-
def prorvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
2209-
}
2210-
2211-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2212-
def prorvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
2213-
}
2214-
2215-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2216-
def prorvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
2217-
}
2218-
2219-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2220-
def prorvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
2221-
}
2222-
2223-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2224-
def prorvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
2225-
}
2226-
22272112
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
22282113
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
22292114
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,10 +1932,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
19321932
return SI;
19331933
}
19341934
// Rotate is a special case of funnel shift - 1st 2 args are the same.
1935-
case X86::BI__builtin_ia32_vprotb:
1936-
case X86::BI__builtin_ia32_vprotw:
1937-
case X86::BI__builtin_ia32_vprotd:
1938-
case X86::BI__builtin_ia32_vprotq:
19391935
case X86::BI__builtin_ia32_vprotbi:
19401936
case X86::BI__builtin_ia32_vprotwi:
19411937
case X86::BI__builtin_ia32_vprotdi:
@@ -1946,25 +1942,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
19461942
case X86::BI__builtin_ia32_prolq128:
19471943
case X86::BI__builtin_ia32_prolq256:
19481944
case X86::BI__builtin_ia32_prolq512:
1949-
case X86::BI__builtin_ia32_prolvd128:
1950-
case X86::BI__builtin_ia32_prolvd256:
1951-
case X86::BI__builtin_ia32_prolvd512:
1952-
case X86::BI__builtin_ia32_prolvq128:
1953-
case X86::BI__builtin_ia32_prolvq256:
1954-
case X86::BI__builtin_ia32_prolvq512:
19551945
return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
19561946
case X86::BI__builtin_ia32_prord128:
19571947
case X86::BI__builtin_ia32_prord256:
19581948
case X86::BI__builtin_ia32_prord512:
19591949
case X86::BI__builtin_ia32_prorq128:
19601950
case X86::BI__builtin_ia32_prorq256:
19611951
case X86::BI__builtin_ia32_prorq512:
1962-
case X86::BI__builtin_ia32_prorvd128:
1963-
case X86::BI__builtin_ia32_prorvd256:
1964-
case X86::BI__builtin_ia32_prorvd512:
1965-
case X86::BI__builtin_ia32_prorvq128:
1966-
case X86::BI__builtin_ia32_prorvq256:
1967-
case X86::BI__builtin_ia32_prorvq512:
19681952
return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
19691953
case X86::BI__builtin_ia32_selectb_128:
19701954
case X86::BI__builtin_ia32_selectb_256:
@@ -2357,29 +2341,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
23572341
// Ops 0 and 1 are swapped.
23582342
return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
23592343

2360-
case X86::BI__builtin_ia32_vpshldvd128:
2361-
case X86::BI__builtin_ia32_vpshldvd256:
2362-
case X86::BI__builtin_ia32_vpshldvd512:
2363-
case X86::BI__builtin_ia32_vpshldvq128:
2364-
case X86::BI__builtin_ia32_vpshldvq256:
2365-
case X86::BI__builtin_ia32_vpshldvq512:
2366-
case X86::BI__builtin_ia32_vpshldvw128:
2367-
case X86::BI__builtin_ia32_vpshldvw256:
2368-
case X86::BI__builtin_ia32_vpshldvw512:
2369-
return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
2370-
2371-
case X86::BI__builtin_ia32_vpshrdvd128:
2372-
case X86::BI__builtin_ia32_vpshrdvd256:
2373-
case X86::BI__builtin_ia32_vpshrdvd512:
2374-
case X86::BI__builtin_ia32_vpshrdvq128:
2375-
case X86::BI__builtin_ia32_vpshrdvq256:
2376-
case X86::BI__builtin_ia32_vpshrdvq512:
2377-
case X86::BI__builtin_ia32_vpshrdvw128:
2378-
case X86::BI__builtin_ia32_vpshrdvw256:
2379-
case X86::BI__builtin_ia32_vpshrdvw512:
2380-
// Ops 0 and 1 are swapped.
2381-
return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
2382-
23832344
// Reductions
23842345
case X86::BI__builtin_ia32_reduce_fadd_pd512:
23852346
case X86::BI__builtin_ia32_reduce_fadd_ps512:

clang/lib/Headers/avx512fintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4926,7 +4926,7 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
49264926
static __inline__ __m512i __DEFAULT_FN_ATTRS512
49274927
_mm512_rorv_epi32 (__m512i __A, __m512i __B)
49284928
{
4929-
return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4929+
return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
49304930
}
49314931

49324932
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -4948,7 +4948,7 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
49484948
static __inline__ __m512i __DEFAULT_FN_ATTRS512
49494949
_mm512_rorv_epi64 (__m512i __A, __m512i __B)
49504950
{
4951-
return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4951+
return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
49524952
}
49534953

49544954
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5038,7 +5038,7 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
50385038
static __inline__ __m512i __DEFAULT_FN_ATTRS512
50395039
_mm512_rolv_epi32 (__m512i __A, __m512i __B)
50405040
{
5041-
return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5041+
return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
50425042
}
50435043

50445044
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5060,7 +5060,7 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
50605060
static __inline__ __m512i __DEFAULT_FN_ATTRS512
50615061
_mm512_rolv_epi64 (__m512i __A, __m512i __B)
50625062
{
5063-
return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5063+
return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
50645064
}
50655065

50665066
static __inline__ __m512i __DEFAULT_FN_ATTRS512

clang/lib/Headers/avx512vbmi2intrin.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
215215
static __inline__ __m512i __DEFAULT_FN_ATTRS
216216
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
217217
{
218-
return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
219-
(__v8di)__C);
218+
return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B,
219+
(__v8du)__C);
220220
}
221221

222222
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -238,8 +238,8 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
238238
static __inline__ __m512i __DEFAULT_FN_ATTRS
239239
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
240240
{
241-
return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
242-
(__v16si)__C);
241+
return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B,
242+
(__v16su)__C);
243243
}
244244

245245
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -261,8 +261,8 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
261261
static __inline__ __m512i __DEFAULT_FN_ATTRS
262262
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
263263
{
264-
return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
265-
(__v32hi)__C);
264+
return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B,
265+
(__v32hu)__C);
266266
}
267267

268268
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -284,8 +284,9 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
284284
static __inline__ __m512i __DEFAULT_FN_ATTRS
285285
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
286286
{
287-
return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
288-
(__v8di)__C);
287+
// Ops __A and __B are swapped.
288+
return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A,
289+
(__v8du)__C);
289290
}
290291

291292
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -307,8 +308,9 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
307308
static __inline__ __m512i __DEFAULT_FN_ATTRS
308309
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
309310
{
310-
return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
311-
(__v16si)__C);
311+
// Ops __A and __B are swapped.
312+
return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A,
313+
(__v16su)__C);
312314
}
313315

314316
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -330,8 +332,9 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
330332
static __inline__ __m512i __DEFAULT_FN_ATTRS
331333
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
332334
{
333-
return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
334-
(__v32hi)__C);
335+
// Ops __A and __B are swapped.
336+
return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A,
337+
(__v32hu)__C);
335338
}
336339

337340
static __inline__ __m512i __DEFAULT_FN_ATTRS

clang/lib/Headers/avx512vlintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4310,7 +4310,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
43104310
static __inline__ __m128i __DEFAULT_FN_ATTRS128
43114311
_mm_rolv_epi32 (__m128i __A, __m128i __B)
43124312
{
4313-
return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4313+
return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
43144314
}
43154315

43164316
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4332,7 +4332,7 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
43324332
static __inline__ __m256i __DEFAULT_FN_ATTRS256
43334333
_mm256_rolv_epi32 (__m256i __A, __m256i __B)
43344334
{
4335-
return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4335+
return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
43364336
}
43374337

43384338
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4354,7 +4354,7 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
43544354
static __inline__ __m128i __DEFAULT_FN_ATTRS128
43554355
_mm_rolv_epi64 (__m128i __A, __m128i __B)
43564356
{
4357-
return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4357+
return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
43584358
}
43594359

43604360
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4376,7 +4376,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
43764376
static __inline__ __m256i __DEFAULT_FN_ATTRS256
43774377
_mm256_rolv_epi64 (__m256i __A, __m256i __B)
43784378
{
4379-
return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4379+
return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
43804380
}
43814381

43824382
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4578,7 +4578,7 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
45784578
static __inline__ __m128i __DEFAULT_FN_ATTRS128
45794579
_mm_rorv_epi32 (__m128i __A, __m128i __B)
45804580
{
4581-
return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4581+
return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
45824582
}
45834583

45844584
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4600,7 +4600,7 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
46004600
static __inline__ __m256i __DEFAULT_FN_ATTRS256
46014601
_mm256_rorv_epi32 (__m256i __A, __m256i __B)
46024602
{
4603-
return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4603+
return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
46044604
}
46054605

46064606
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4622,7 +4622,7 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
46224622
static __inline__ __m128i __DEFAULT_FN_ATTRS128
46234623
_mm_rorv_epi64 (__m128i __A, __m128i __B)
46244624
{
4625-
return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4625+
return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
46264626
}
46274627

46284628
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4644,7 +4644,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
46444644
static __inline__ __m256i __DEFAULT_FN_ATTRS256
46454645
_mm256_rorv_epi64 (__m256i __A, __m256i __B)
46464646
{
4647-
return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4647+
return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
46484648
}
46494649

46504650
static __inline__ __m256i __DEFAULT_FN_ATTRS256

0 commit comments

Comments
 (0)