Skip to content

Commit c6145db

Browse files
committed
[WebAssembly SIMD] Support vector comparisons on Intel
https://bugs.webkit.org/show_bug.cgi?id=248568 rdar://103089559 Reviewed by Yusuke Suzuki. Implements support for integer and floating point vector comparisons for the Intel x86_64 macro assembler. * Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h: (JSC::MacroAssemblerX86_64::compareFloatingPointVector): (JSC::MacroAssemblerX86_64::compareIntegerVector): * Source/JavaScriptCore/assembler/X86Assembler.h: (JSC::X86Assembler::vpcmpeqq_rr): (JSC::X86Assembler::vpcmpgtb_rr): (JSC::X86Assembler::vpcmpgtw_rr): (JSC::X86Assembler::vpcmpgtd_rr): (JSC::X86Assembler::vpcmpgtq_rr): (JSC::X86Assembler::vcmpps_rr): (JSC::X86Assembler::vcmppd_rr): * Source/JavaScriptCore/wasm/WasmAirIRGenerator.cpp: (JSC::Wasm::AirIRGenerator::addSIMDRelOp): Canonical link: https://commits.webkit.org/257532@main
1 parent 066e5bd commit c6145db

File tree

3 files changed

+295
-24
lines changed

3 files changed

+295
-24
lines changed

Source/JavaScriptCore/assembler/MacroAssemblerX86_64.h

Lines changed: 165 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2254,23 +2254,47 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
22542254

22552255
void compareFloatingPointVector(DoubleCondition cond, SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
22562256
{
2257+
RELEASE_ASSERT(supportsAVXForSIMD());
22572258
RELEASE_ASSERT(scalarTypeIsFloatingPoint(simdInfo.lane));
2258-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
2259+
2260+
using PackedCompareCondition = X86Assembler::PackedCompareCondition;
22592261

22602262
switch (cond) {
22612263
case DoubleEqualAndOrdered:
2264+
if (simdInfo.lane == SIMDLane::f32x4)
2265+
m_assembler.vcmpps_rr(PackedCompareCondition::Equal, left, right, dest);
2266+
else
2267+
m_assembler.vcmppd_rr(PackedCompareCondition::Equal, left, right, dest);
22622268
break;
22632269
case DoubleNotEqualOrUnordered:
2270+
if (simdInfo.lane == SIMDLane::f32x4)
2271+
m_assembler.vcmpps_rr(PackedCompareCondition::NotEqual, left, right, dest);
2272+
else
2273+
m_assembler.vcmppd_rr(PackedCompareCondition::NotEqual, left, right, dest);
22642274
break;
22652275
case DoubleGreaterThanAndOrdered:
2276+
if (simdInfo.lane == SIMDLane::f32x4)
2277+
m_assembler.vcmpps_rr(PackedCompareCondition::GreaterThan, left, right, dest);
2278+
else
2279+
m_assembler.vcmppd_rr(PackedCompareCondition::GreaterThan, left, right, dest);
22662280
break;
22672281
case DoubleGreaterThanOrEqualAndOrdered:
2282+
if (simdInfo.lane == SIMDLane::f32x4)
2283+
m_assembler.vcmpps_rr(PackedCompareCondition::GreaterThanOrEqual, left, right, dest);
2284+
else
2285+
m_assembler.vcmppd_rr(PackedCompareCondition::GreaterThanOrEqual, left, right, dest);
22682286
break;
22692287
case DoubleLessThanAndOrdered:
2270-
// a < b => b > a
2288+
if (simdInfo.lane == SIMDLane::f32x4)
2289+
m_assembler.vcmpps_rr(PackedCompareCondition::LessThan, left, right, dest);
2290+
else
2291+
m_assembler.vcmppd_rr(PackedCompareCondition::LessThan, left, right, dest);
22712292
break;
22722293
case DoubleLessThanOrEqualAndOrdered:
2273-
// a <= b => b >= a
2294+
if (simdInfo.lane == SIMDLane::f32x4)
2295+
m_assembler.vcmpps_rr(PackedCompareCondition::LessThanOrEqual, left, right, dest);
2296+
else
2297+
m_assembler.vcmppd_rr(PackedCompareCondition::LessThanOrEqual, left, right, dest);
22742298
break;
22752299
default:
22762300
RELEASE_ASSERT_NOT_REACHED();
@@ -2279,33 +2303,166 @@ class MacroAssemblerX86_64 : public MacroAssemblerX86Common {
22792303

22802304
void compareIntegerVector(RelationalCondition cond, SIMDInfo simdInfo, FPRegisterID left, FPRegisterID right, FPRegisterID dest)
22812305
{
2306+
RELEASE_ASSERT(supportsAVXForSIMD());
22822307
RELEASE_ASSERT(scalarTypeIsIntegral(simdInfo.lane));
2283-
UNUSED_PARAM(left); UNUSED_PARAM(right); UNUSED_PARAM(dest);
22842308

22852309
switch (cond) {
22862310
case Equal:
2311+
switch (simdInfo.lane) {
2312+
case SIMDLane::i8x16:
2313+
m_assembler.vpcmpeqb_rr(left, right, dest);
2314+
break;
2315+
case SIMDLane::i16x8:
2316+
m_assembler.vpcmpeqw_rr(left, right, dest);
2317+
break;
2318+
case SIMDLane::i32x4:
2319+
m_assembler.vpcmpeqd_rr(left, right, dest);
2320+
break;
2321+
case SIMDLane::i64x2:
2322+
m_assembler.vpcmpeqq_rr(left, right, dest);
2323+
break;
2324+
default:
2325+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2326+
}
22872327
break;
22882328
case NotEqual:
2329+
// NotEqual comparisons are implemented by negating Equal on Intel, which should be
2330+
// handled before we ever reach this point.
2331+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Shouldn't emit integer vector NotEqual comparisons directly.");
22892332
break;
22902333
case Above:
2334+
// Above comparisons are implemented by negating BelowOrEqual on Intel, which should be
2335+
// handled before we ever reach this point.
2336+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Shouldn't emit integer vector Above comparisons directly.");
22912337
break;
22922338
case AboveOrEqual:
2339+
switch (simdInfo.lane) {
2340+
case SIMDLane::i8x16:
2341+
m_assembler.vpmaxub_rr(left, right, dest);
2342+
m_assembler.vpcmpeqb_rr(left, dest, dest);
2343+
break;
2344+
case SIMDLane::i16x8:
2345+
m_assembler.vpmaxuw_rr(left, right, dest);
2346+
m_assembler.vpcmpeqw_rr(left, dest, dest);
2347+
break;
2348+
case SIMDLane::i32x4:
2349+
m_assembler.vpmaxud_rr(left, right, dest);
2350+
m_assembler.vpcmpeqd_rr(left, dest, dest);
2351+
break;
2352+
case SIMDLane::i64x2:
2353+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 unsigned comparisons are not supported.");
2354+
break;
2355+
default:
2356+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2357+
}
22932358
break;
22942359
case Below:
2295-
// a < b => b > a
2360+
// Below comparisons are implemented by negating AboveOrEqual on Intel, which should be
2361+
// handled before we ever reach this point.
2362+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Shouldn't emit integer vector Below comparisons directly.");
22962363
break;
22972364
case BelowOrEqual:
2298-
// a <= b => b >= a
2365+
switch (simdInfo.lane) {
2366+
case SIMDLane::i8x16:
2367+
m_assembler.vpminub_rr(left, right, dest);
2368+
m_assembler.vpcmpeqb_rr(left, dest, dest);
2369+
break;
2370+
case SIMDLane::i16x8:
2371+
m_assembler.vpminuw_rr(left, right, dest);
2372+
m_assembler.vpcmpeqw_rr(left, dest, dest);
2373+
break;
2374+
case SIMDLane::i32x4:
2375+
m_assembler.vpminud_rr(left, right, dest);
2376+
m_assembler.vpcmpeqd_rr(left, dest, dest);
2377+
break;
2378+
case SIMDLane::i64x2:
2379+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("i64x2 unsigned comparisons are not supported.");
2380+
break;
2381+
default:
2382+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2383+
}
22992384
break;
23002385
case GreaterThan:
2386+
switch (simdInfo.lane) {
2387+
case SIMDLane::i8x16:
2388+
m_assembler.vpcmpgtb_rr(left, right, dest);
2389+
break;
2390+
case SIMDLane::i16x8:
2391+
m_assembler.vpcmpgtw_rr(left, right, dest);
2392+
break;
2393+
case SIMDLane::i32x4:
2394+
m_assembler.vpcmpgtd_rr(left, right, dest);
2395+
break;
2396+
case SIMDLane::i64x2:
2397+
m_assembler.vpcmpgtq_rr(left, right, dest);
2398+
break;
2399+
default:
2400+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2401+
}
23012402
break;
23022403
case GreaterThanOrEqual:
2404+
switch (simdInfo.lane) {
2405+
case SIMDLane::i8x16:
2406+
m_assembler.vpmaxsb_rr(left, right, dest);
2407+
m_assembler.vpcmpeqb_rr(left, dest, dest);
2408+
break;
2409+
case SIMDLane::i16x8:
2410+
m_assembler.vpmaxsw_rr(left, right, dest);
2411+
m_assembler.vpcmpeqw_rr(left, dest, dest);
2412+
break;
2413+
case SIMDLane::i32x4:
2414+
m_assembler.vpmaxsd_rr(left, right, dest);
2415+
m_assembler.vpcmpeqd_rr(left, dest, dest);
2416+
break;
2417+
case SIMDLane::i64x2:
2418+
// Intel doesn't support 64-bit packed maximum/minimum without AVX512, so this condition should have been transformed
2419+
// into a negated LessThan prior to reaching the macro assembler.
2420+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Shouldn't emit integer vector GreaterThanOrEqual comparisons directly.");
2421+
break;
2422+
default:
2423+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2424+
}
23032425
break;
23042426
case LessThan:
2305-
// a < b => b > a
2427+
switch (simdInfo.lane) {
2428+
case SIMDLane::i8x16:
2429+
m_assembler.vpcmpgtb_rr(right, left, dest);
2430+
break;
2431+
case SIMDLane::i16x8:
2432+
m_assembler.vpcmpgtw_rr(right, left, dest);
2433+
break;
2434+
case SIMDLane::i32x4:
2435+
m_assembler.vpcmpgtd_rr(right, left, dest);
2436+
break;
2437+
case SIMDLane::i64x2:
2438+
m_assembler.vpcmpgtq_rr(right, left, dest);
2439+
break;
2440+
default:
2441+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2442+
}
23062443
break;
23072444
case LessThanOrEqual:
2308-
// a <= b => b >= a
2445+
switch (simdInfo.lane) {
2446+
case SIMDLane::i8x16:
2447+
m_assembler.vpminsb_rr(left, right, dest);
2448+
m_assembler.vpcmpeqb_rr(left, dest, dest);
2449+
break;
2450+
case SIMDLane::i16x8:
2451+
m_assembler.vpminsw_rr(left, right, dest);
2452+
m_assembler.vpcmpeqw_rr(left, dest, dest);
2453+
break;
2454+
case SIMDLane::i32x4:
2455+
m_assembler.vpminsd_rr(left, right, dest);
2456+
m_assembler.vpcmpeqd_rr(left, dest, dest);
2457+
break;
2458+
case SIMDLane::i64x2:
2459+
// Intel doesn't support 64-bit packed maximum/minimum without AVX512, so this condition should have been transformed
2460+
// into a negated GreaterThan prior to reaching the macro assembler.
2461+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Shouldn't emit integer vector LessThanOrEqual comparisons directly.");
2462+
break;
2463+
default:
2464+
RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("Unsupported SIMD lane for comparison");
2465+
}
23092466
break;
23102467
default:
23112468
RELEASE_ASSERT_NOT_REACHED();

Source/JavaScriptCore/assembler/X86Assembler.h

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ class X86Assembler {
297297
OP2_PSHUFHW_VdqWdqIb = 0x70,
298298
OP2_PSLLQ_UdqIb = 0x73,
299299
OP2_PSRLQ_UdqIb = 0x73,
300-
OP2_PCMPEQW_VdqWdq = 0x75,
301300
OP2_MOVD_EdVd = 0x7E,
302301
OP2_JCC_rel32 = 0x80,
303302
OP_SETCC = 0x90,
@@ -327,19 +326,15 @@ class X86Assembler {
327326
OP2_BSWAP = 0xC8,
328327
OP2_PSUBUSB_VdqWdq = 0xD8,
329328
OP2_PSUBUSW_VdqWdq = 0xD9,
330-
OP2_PMINUB_VdqWdq = 0xDA,
331329
OP2_PADDUSB_VdqWdq = 0xDC,
332330
OP2_PADDUSW_VdqWdq = 0xDD,
333-
OP2_PMAXUB_VdqWdq = 0xDE,
334331
OP2_PAVGB_VdqWdq = 0xE0,
335332
OP2_PAVGW_VdqWdq = 0xE3,
336333
OP2_PSUBSB_VdqWdq = 0xE8,
337334
OP2_PSUBSW_VdqWdq = 0xE9,
338-
OP2_PMINSW_VdqWdq = 0xEA,
339335
OP2_POR_VdqWdq = 0XEB,
340336
OP2_PADDSB_VdqWdq = 0xEC,
341337
OP2_PADDSW_VdqWdq = 0xED,
342-
OP2_PMAXSW_VdqWdq = 0xEE,
343338
OP2_PXOR_VdqWdq = 0xEF,
344339
OP2_PADDB_VdqWdq = 0xFC,
345340
OP2_PADDW_VdqWdq = 0xFD,
@@ -360,7 +355,19 @@ class X86Assembler {
360355
OP2_DIVPD_VpdWpd = 0x5E,
361356
OP2_SQRTPS_VpsWps = 0x51,
362357
OP2_SQRTPD_VpdWpd = 0x51,
363-
OP2_PMADDWD_VdqWdq = 0xF5
358+
OP2_PMADDWD_VdqWdq = 0xF5,
359+
OP2_PCMPEQB_VdqWdq = 0x74,
360+
OP2_PCMPEQW_VdqWdq = 0x75,
361+
OP2_PCMPEQD_VdqWdq = 0x76,
362+
OP2_PCMPGTB_VdqWdq = 0x64,
363+
OP2_PCMPGTW_VdqWdq = 0x65,
364+
OP2_PCMPGTD_VdqWdq = 0x66,
365+
OP2_CMPPS_VpsWpsIb = 0xC2,
366+
OP2_CMPPD_VpdWpdIb = 0xC2,
367+
OP2_PMAXSW_VdqWdq = 0xEE,
368+
OP2_PMAXUB_VdqWdq = 0xDE,
369+
OP2_PMINSW_VdqWdq = 0xEA,
370+
OP2_PMINUB_VdqWdq = 0xDA
364371
} TwoByteOpcodeID;
365372

366373
typedef enum {
@@ -377,21 +384,23 @@ class X86Assembler {
377384
OP3_INSERTPS_VpsUpsIb = 0x21,
378385
OP3_PINSRB = 0x20,
379386
OP3_PINSRD = 0x22,
380-
OP3_PMINSB_VdqWdq = 0x38,
381-
OP3_PMINSD_VdqWdq = 0x39,
382-
OP3_PMINUW_VdqWdq = 0x3A,
383-
OP3_PMINUD_VdqWdq = 0x3B,
384-
OP3_PMAXSB_VdqWdq = 0x3C,
385-
OP3_PMAXSD_VdqWdq = 0x3D,
386-
OP3_PMAXUW_VdqWdq = 0x3E,
387-
OP3_PMAXUD_VdqWdq = 0x3F,
388387
OP3_BLENDVPD_VpdWpdXMM0 = 0x4B,
389388
OP3_LFENCE = 0xE8,
390389
OP3_MFENCE = 0xF0,
391390
OP3_SFENCE = 0xF8,
392391
OP3_ROUNDPS_VpsWpsIb = 0x08,
393392
OP3_ROUNDPD_VpdWpdIb = 0x09,
394-
OP3_PMULLD_VdqWdq = 0x40
393+
OP3_PMULLD_VdqWdq = 0x40,
394+
OP3_PCMPEQQ_VdqWdq = 0x29,
395+
OP3_PCMPGTQ_VdqWdq = 0x37,
396+
OP3_PMAXSB_VdqWdq = 0x3C,
397+
OP3_PMAXSD_VdqWdq = 0x3D,
398+
OP3_PMAXUW_VdqWdq = 0x3E,
399+
OP3_PMAXUD_VdqWdq = 0x3F,
400+
OP3_PMINSB_VdqWdq = 0x38,
401+
OP3_PMINSD_VdqWdq = 0x39,
402+
OP3_PMINUW_VdqWdq = 0x3A,
403+
OP3_PMINUD_VdqWdq = 0x3B
395404
} ThreeByteOpcodeID;
396405

397406
struct VexPrefix {
@@ -3223,6 +3232,70 @@ class X86Assembler {
32233232
m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_OPERAND_SIZE, OP2_PMADDWD_VdqWdq, (RegisterID)dest, (RegisterID)a, (RegisterID)b);
32243233
}
32253234

3235+
void vpcmpeqb_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3236+
{
3237+
m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_OPERAND_SIZE, OP2_PCMPEQB_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3238+
}
3239+
3240+
void vpcmpeqd_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3241+
{
3242+
m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_OPERAND_SIZE, OP2_PCMPEQD_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3243+
}
3244+
3245+
void vpcmpeqq_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3246+
{
3247+
m_formatter.vexNdsLigWigThreeByteOp(PRE_OPERAND_SIZE, VexImpliedBytes::ThreeBytesOp38, OP3_PCMPEQQ_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3248+
}
3249+
3250+
void vpcmpgtb_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3251+
{
3252+
m_formatter.vexNdsLigWigTwoByteOp(PRE_OPERAND_SIZE, OP2_PCMPGTB_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3253+
}
3254+
3255+
void vpcmpgtw_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3256+
{
3257+
m_formatter.vexNdsLigWigTwoByteOp(PRE_OPERAND_SIZE, OP2_PCMPGTW_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3258+
}
3259+
3260+
void vpcmpgtd_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3261+
{
3262+
m_formatter.vexNdsLigWigTwoByteOp(PRE_OPERAND_SIZE, OP2_PCMPGTD_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3263+
}
3264+
3265+
void vpcmpgtq_rr(XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3266+
{
3267+
m_formatter.vexNdsLigWigThreeByteOp(PRE_OPERAND_SIZE, VexImpliedBytes::ThreeBytesOp38, OP3_PCMPGTQ_VdqWdq, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3268+
}
3269+
3270+
enum class PackedCompareCondition : uint8_t {
3271+
Equal = 0,
3272+
LessThan = 1,
3273+
LessThanOrEqual = 2,
3274+
Unordered = 3,
3275+
NotEqual = 4,
3276+
GreaterThanOrEqual = 5, // Also called "NotLessThan" in the Intel manual
3277+
GreaterThan = 6, // Also called "NotLessThanOrEqual" in the Intel manual
3278+
Ordered = 7
3279+
};
3280+
3281+
void vcmpps_rr(PackedCompareCondition condition, XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3282+
{
3283+
if (condition == PackedCompareCondition::Equal || condition == PackedCompareCondition::NotEqual)
3284+
m_formatter.vexNdsLigWigCommutativeTwoByteOp((OneByteOpcodeID)0, OP2_CMPPS_VpsWpsIb, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3285+
else
3286+
m_formatter.vexNdsLigWigTwoByteOp((OneByteOpcodeID)0, OP2_CMPPS_VpsWpsIb, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3287+
m_formatter.immediate8(static_cast<uint8_t>(condition));
3288+
}
3289+
3290+
void vcmppd_rr(PackedCompareCondition condition, XMMRegisterID a, XMMRegisterID b, XMMRegisterID dest)
3291+
{
3292+
if (condition == PackedCompareCondition::Equal || condition == PackedCompareCondition::NotEqual)
3293+
m_formatter.vexNdsLigWigCommutativeTwoByteOp(PRE_OPERAND_SIZE, OP2_CMPPD_VpdWpdIb, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3294+
else
3295+
m_formatter.vexNdsLigWigTwoByteOp(PRE_OPERAND_SIZE, OP2_CMPPD_VpdWpdIb, (RegisterID)a, (RegisterID)b, (RegisterID)dest);
3296+
m_formatter.immediate8(static_cast<uint8_t>(condition));
3297+
}
3298+
32263299
void movl_rr(RegisterID src, RegisterID dst)
32273300
{
32283301
m_formatter.oneByteOp(OP_MOV_EvGv, src, dst);

0 commit comments

Comments
 (0)