Skip to content

Commit 22e08fa

Browse files
committed
Add basic implementations of ROUNDxx, BLENDPS/BLENDPD, and PBLENDW
1 parent f006a4f commit 22e08fa

File tree

3 files changed

+167
-1
lines changed

3 files changed

+167
-1
lines changed

blink/cvt.c

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,38 @@
3636
#define kOpCvt0f5b 16
3737
#define kOpCvt0fE6 20
3838

39+
static float SseRoundSingle(struct Machine *m, float x) {
40+
switch ((m->mxcsr & kMxcsrRc) >> 13) {
41+
case 0:
42+
return rint(x);
43+
case 1:
44+
return floor(x);
45+
case 2:
46+
return ceil(x);
47+
case 3:
48+
return trunc(x);
49+
default:
50+
__builtin_unreachable();
51+
}
52+
}
53+
54+
static float SseRoundSingleImm(struct Machine *m, float x, int imm) {
55+
int roundmode = imm & 3;
56+
if((imm >> 2) & 1) roundmode = (m->mxcsr & kMxcsrRc) >> 13;
57+
switch (roundmode) {
58+
case 0:
59+
return rint(x);
60+
case 1:
61+
return floor(x);
62+
case 2:
63+
return ceil(x);
64+
case 3:
65+
return trunc(x);
66+
default:
67+
__builtin_unreachable();
68+
}
69+
}
70+
3971
static double SseRoundDouble(struct Machine *m, double x) {
4072
switch ((m->mxcsr & kMxcsrRc) >> 13) {
4173
case 0:
@@ -51,6 +83,73 @@ static double SseRoundDouble(struct Machine *m, double x) {
5183
}
5284
}
5385

86+
static double SseRoundDoubleImm(struct Machine *m, double x, int imm) {
87+
int roundmode = imm & 3;
88+
if((imm >> 2) & 1) roundmode = (m->mxcsr & kMxcsrRc) >> 13;
89+
switch (roundmode) {
90+
case 0:
91+
return rint(x);
92+
case 1:
93+
return floor(x);
94+
case 2:
95+
return ceil(x);
96+
case 3:
97+
return trunc(x);
98+
default:
99+
__builtin_unreachable();
100+
}
101+
}
102+
103+
static void OpPpiWpdRoundps(P) {
104+
u8 *p;
105+
unsigned i;
106+
i32 n[4];
107+
union FloatPun f[2];
108+
p = GetModrmRegisterXmmPointerRead16(A);
109+
f[0].i = Read32(p + 0);
110+
f[1].i = Read32(p + 4);
111+
f[2].i = Read32(p + 8);
112+
f[3].i = Read32(p + 12);
113+
for (i = 0; i < 4; ++i) n[i] = SseRoundSingleImm(m, f[i].f, uimm0);
114+
Put32(XmmReg(m, rde) + 0, n[0]);
115+
Put32(XmmReg(m, rde) + 4, n[1]);
116+
Put32(XmmReg(m, rde) + 8, n[2]);
117+
Put32(XmmReg(m, rde) + 12, n[3]);
118+
}
119+
120+
static void OpPpiWpdRoundpd(P) {
121+
u8 *p;
122+
unsigned i;
123+
i64 n[2];
124+
union DoublePun d[2];
125+
p = GetModrmRegisterXmmPointerRead16(A);
126+
d[0].i = Read64(p + 0);
127+
d[1].i = Read64(p + 8);
128+
for (i = 0; i < 2; ++i) n[i] = SseRoundDoubleImm(m, d[i].f, uimm0);
129+
Put64(XmmReg(m, rde) + 0, n[0]);
130+
Put64(XmmReg(m, rde) + 8, n[1]);
131+
}
132+
133+
static void OpPpiWpdRoundss(P) {
134+
u8 *p;
135+
i32 n;
136+
union FloatPun f;
137+
p = GetModrmRegisterXmmPointerRead16(A);
138+
f.i = Read32(p);
139+
n = SseRoundSingleImm(m, f.f, uimm0);
140+
Put32(XmmReg(m, rde), n);
141+
}
142+
143+
static void OpPpiWpdRoundsd(P) {
144+
u8 *p;
145+
i32 n;
146+
union DoublePun d;
147+
p = GetModrmRegisterXmmPointerRead16(A);
148+
d.i = Read64(p);
149+
n = SseRoundDoubleImm(m, d.f, uimm0);
150+
Put64(XmmReg(m, rde), n);
151+
}
152+
54153
static void OpGdqpWssCvttss2si(P) {
55154
i64 n;
56155
union FloatPun f;
@@ -73,7 +172,7 @@ static void OpGdqpWssCvtss2si(P) {
73172
i64 n;
74173
union FloatPun f;
75174
f.i = Read32(GetModrmRegisterXmmPointerRead4(A));
76-
n = rintf(f.f);
175+
n = SseRoundSingle(f.f);
77176
if (!Rexw(rde)) n &= 0xffffffff;
78177
Put64(RegRexrReg(m, rde), n);
79178
}

blink/ssefloat.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,54 @@ void OpShufpsd(P) {
228228
}
229229
}
230230

231+
static void OpBlendps(P) {
232+
u8 *p;
233+
union FloatPun x[4], y[4], z[4];
234+
p = GetModrmRegisterXmmPointerRead16(A);
235+
IGNORE_RACES_START();
236+
y[0].i = Read32(p + 0 * 4);
237+
y[1].i = Read32(p + 1 * 4);
238+
y[2].i = Read32(p + 2 * 4);
239+
y[3].i = Read32(p + 3 * 4);
240+
p = XmmRexrReg(m, rde);
241+
x[0].i = Read32(p + 0 * 4);
242+
x[1].i = Read32(p + 1 * 4);
243+
x[2].i = Read32(p + 2 * 4);
244+
x[3].i = Read32(p + 3 * 4);
245+
if(uimm0 & 1) z[0].f = y[0].f;
246+
else z[0].f = x[0].f;
247+
if(uimm0 & 2) z[1].f = y[1].f;
248+
else z[1].f = x[1].f;
249+
if(uimm0 & 4) z[2].f = y[2].f;
250+
else z[2].f = x[2].f;
251+
if(uimm0 & 8) z[3].f = y[3].f;
252+
else z[3].f = x[3].f;
253+
Write32(p + 0 * 4, z[0].i);
254+
Write32(p + 1 * 4, z[1].i);
255+
Write32(p + 2 * 4, z[2].i);
256+
Write32(p + 3 * 4, z[3].i);
257+
IGNORE_RACES_END();
258+
}
259+
260+
static void OpBlendpd(P) {
261+
u8 *p;
262+
union DoublePun x[2], y[2], z[2];
263+
p = GetModrmRegisterXmmPointerRead16(A);
264+
IGNORE_RACES_START();
265+
y[0].i = Read64(p + 0 * 8);
266+
y[1].i = Read64(p + 1 * 8);
267+
p = XmmRexrReg(m, rde);
268+
x[0].i = Read64(p + 0 * 8);
269+
x[1].i = Read64(p + 1 * 8);
270+
if(uimm0 & 1) z[0].f = y[0].f;
271+
else z[0].f = x[0].f;
272+
if(uimm0 & 2) z[1].f = y[1].f;
273+
else z[1].f = x[1].f;
274+
Write64(p + 0 * 8, z[0].i);
275+
Write64(p + 1 * 8, z[1].i);
276+
IGNORE_RACES_END();
277+
}
278+
231279
static void Movmskps(P) {
232280
u8 *p = GetModrmRegisterXmmPointerRead16(A);
233281
IGNORE_RACES_START();

blink/ssemov.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ void OpMovntiMdqpGdqp(P) {
8989
IGNORE_RACES_END();
9090
}
9191

92+
void OpPBlendW(P) {
93+
u8* p;
94+
unsigned i;
95+
u16 x[8], y[8], z[8];
96+
97+
IGNORE_RACES_START();
98+
for(i = 0; i < 8; i++)
99+
{
100+
p = GetModrmRegisterXmmPointerRead16(A);
101+
y[i] = Read16(p + i * 2);
102+
p = XmmRexrReg(m, rde);
103+
x[i] = Read16(p + i * 2);
104+
if(uimm0 & (1 << i)) z[i] = y[i];
105+
else z[i] = x[i];
106+
Write16(p + i * 2, z[i]);
107+
}
108+
IGNORE_RACES_END();
109+
}
110+
92111
static void MovdqaVdqWdq(P) {
93112
IGNORE_RACES_START();
94113
memcpy(XmmRexrReg(m, rde), GetXmmAddress(A), 16);

0 commit comments

Comments
 (0)