Skip to content

Commit 3255fe5

Browse files
authored
Merge pull request #1522 from w4kfu/x86_fix_and_instr_add
X86 fix and instr add
2 parents 2a15c60 + 11d5c1f commit 3255fe5

2 files changed

Lines changed: 206 additions & 5 deletions

File tree

miasm/arch/x86/arch.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4285,7 +4285,8 @@ class ia32_aaa(mn_x86):
42854285
rmmod(xmm_reg, rm_arg_xmm_m128) + [u08])
42864286
addop("pshufhw", [bs8(0x0f), bs8(0x70), pref_f3] +
42874287
rmmod(xmm_reg, rm_arg_xmm_m128) + [u08])
4288-
4288+
addop("ptest", [bs8(0x0f), bs8(0x38), bs8(0x17), pref_66] +
4289+
rmmod(xmm_reg, rm_arg_xmm_m128))
42894290

42904291
### Convert
42914292
### SS = single precision
@@ -4345,6 +4346,23 @@ class ia32_aaa(mn_x86):
43454346
addop("palignr", [bs8(0x0f), bs8(0x3a), bs8(0x0f), pref_66] +
43464347
rmmod(xmm_reg, rm_arg_xmm_m128) + [u08], [xmm_reg, rm_arg_xmm_m128, u08])
43474348

4349+
addop("pclmulqdq", [bs8(0x0f), bs8(0x3a), bs8(0x44), pref_66] +
4350+
rmmod(xmm_reg, rm_arg_xmm_m128) + [u08],
4351+
[xmm_reg, rm_arg_xmm_m128, u08])
4352+
4353+
addop("psignb", [bs8(0x0f), bs8(0x38), bs8(0x08), no_xmm_pref] +
4354+
rmmod(mm_reg, rm_arg_mm_m64))
4355+
addop("psignb", [bs8(0x0f), bs8(0x38), bs8(0x08), pref_66] +
4356+
rmmod(xmm_reg, rm_arg_xmm_m128))
4357+
addop("psignw", [bs8(0x0f), bs8(0x38), bs8(0x09), no_xmm_pref] +
4358+
rmmod(mm_reg, rm_arg_mm_m64))
4359+
addop("psignw", [bs8(0x0f), bs8(0x38), bs8(0x09), pref_66] +
4360+
rmmod(xmm_reg, rm_arg_xmm_m128))
4361+
addop("psignd", [bs8(0x0f), bs8(0x38), bs8(0x0a), no_xmm_pref] +
4362+
rmmod(mm_reg, rm_arg_mm_m64))
4363+
addop("psignd", [bs8(0x0f), bs8(0x38), bs8(0x0a), pref_66] +
4364+
rmmod(xmm_reg, rm_arg_xmm_m128))
4365+
43484366
addop("psrlq", [bs8(0x0f), bs8(0x73), no_xmm_pref] +
43494367
rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08])
43504368
addop("psrlq", [bs8(0x0f), bs8(0x73), pref_66] +
@@ -4447,6 +4465,9 @@ class ia32_aaa(mn_x86):
44474465
addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] +
44484466
rmmod(xmm_reg, rm_arg_xmm))
44494467

4468+
addop("pmaxsd", [bs8(0x0f), bs8(0x38), bs8(0x3d), pref_66] +
4469+
rmmod(xmm_reg, rm_arg_xmm))
4470+
44504471
addop("pmaxsw", [bs8(0x0f), bs8(0xee), no_xmm_pref] +
44514472
rmmod(mm_reg, rm_arg_mm_m64))
44524473
addop("pmaxsw", [bs8(0x0f), bs8(0xee), pref_66] +
@@ -4460,9 +4481,14 @@ class ia32_aaa(mn_x86):
44604481
addop("pminuw", [bs8(0x0f), bs8(0x38), bs8(0x3a), pref_66] +
44614482
rmmod(xmm_reg, rm_arg_xmm))
44624483

4484+
addop("phminposuw", [bs8(0x0f), bs8(0x38), bs8(0x41), pref_66] +
4485+
rmmod(xmm_reg, rm_arg_xmm_m128))
4486+
44634487
addop("pminud", [bs8(0x0f), bs8(0x38), bs8(0x3b), pref_66] +
44644488
rmmod(xmm_reg, rm_arg_xmm))
44654489

4490+
addop("pminsd", [bs8(0x0f), bs8(0x38), bs8(0x39), pref_66] +
4491+
rmmod(xmm_reg, rm_arg_xmm))
44664492

44674493
addop("pcmpeqb", [bs8(0x0f), bs8(0x74), no_xmm_pref] +
44684494
rmmod(mm_reg, rm_arg_mm))
@@ -4566,9 +4592,8 @@ class ia32_aaa(mn_x86):
45664592
rmmod(xmm_reg, rm_arg_reg_m08) + [u08], [rm_arg_reg_m08, xmm_reg, u08])
45674593
addop("pextrd", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66, bs_opmode32] +
45684594
rmmod(xmm_reg, rm_arg) + [u08], [rm_arg, xmm_reg, u08])
4569-
addop("pextrq", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66] +
4570-
rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08], [rm_arg_m64, xmm_reg, u08])
4571-
4595+
addop("pextrq", [bs8(0x0F), bs8(0x3A), bs8(0x16), pref_66, bs_opmode64] +
4596+
rmmod(xmm_reg, rm_arg) + [u08], [rm_arg, xmm_reg, u08])
45724597

45734598
addop("pextrw", [bs8(0x0f), bs8(0x3a), bs8(0x15), pref_66] +
45744599
rmmod(xmm_reg, rm_arg_reg_m16) + [u08], [rm_arg_reg_m16, xmm_reg, u08])
@@ -4592,6 +4617,18 @@ class ia32_aaa(mn_x86):
45924617
addop("pmovmskb", [bs8(0x0f), bs8(0xd7), pref_66] +
45934618
rmmod(reg_modrm, rm_arg_xmm_reg))
45944619

4620+
addop("pmovsxwd", [bs8(0x0f), bs8(0x38), bs8(0x23), pref_66] +
4621+
rmmod(xmm_reg, rm_arg_xmm_m64))
4622+
4623+
addop("pmovsxwq", [bs8(0x0f), bs8(0x38), bs8(0x24), pref_66] +
4624+
rmmod(xmm_reg, rm_arg_xmm_m32))
4625+
4626+
addop("pmovsxbd", [bs8(0x0f), bs8(0x38), bs8(0x21), pref_66] +
4627+
rmmod(xmm_reg, rm_arg_xmm_m32))
4628+
4629+
addop("pmovsxdq", [bs8(0x0f), bs8(0x38), bs8(0x25), pref_66] +
4630+
rmmod(xmm_reg, rm_arg_xmm_m64))
4631+
45954632
addop("shufps", [bs8(0x0f), bs8(0xc6), no_xmm_pref] +
45964633
rmmod(xmm_reg, rm_arg_xmm) + [u08])
45974634
addop("shufpd", [bs8(0x0f), bs8(0xc6), pref_66] +
@@ -4617,6 +4654,9 @@ class ia32_aaa(mn_x86):
46174654
addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] +
46184655
rmmod(xmm_reg, rm_arg_xmm_m128))
46194656

4657+
addop("packusdw", [bs8(0x0f), bs8(0x38), bs8(0x2b), pref_66] +
4658+
rmmod(xmm_reg, rm_arg_xmm_m128))
4659+
46204660
addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] +
46214661
rmmod(mm_reg, rm_arg_mm_m64))
46224662
addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] +
@@ -4629,6 +4669,8 @@ class ia32_aaa(mn_x86):
46294669
rmmod(mm_reg, rm_arg_mm_m64))
46304670
addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] +
46314671
rmmod(xmm_reg, rm_arg_xmm_m128))
4672+
addop("pmuldq", [bs8(0x0f), bs8(0x38), bs8(0x28), pref_66] +
4673+
rmmod(xmm_reg, rm_arg_xmm_m128))
46324674
addop("pmuludq", [bs8(0x0f), bs8(0xf4), no_xmm_pref] +
46334675
rmmod(mm_reg, rm_arg_mm_m64))
46344676
addop("pmuludq", [bs8(0x0f), bs8(0xf4), pref_66] +

miasm/arch/x86/sem.py

Lines changed: 160 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3895,6 +3895,21 @@ def _float_min_max(expr):
38953895
pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True))
38963896
pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True))
38973897

3898+
def pmuldq(ir, instr, dst, src):
3899+
e = []
3900+
if dst.size != 128:
3901+
raise RuntimeError("Unsupported size %d" % dst.size)
3902+
3903+
e.append(m2_expr.ExprAssign(
3904+
dst[:64],
3905+
src[:32].signExtend(64) * dst[:32].signExtend(64)
3906+
))
3907+
e.append(m2_expr.ExprAssign(
3908+
dst[64:],
3909+
src[64:96].signExtend(64) * dst[64:96].signExtend(64)
3910+
))
3911+
return e, []
3912+
38983913
def pmuludq(ir, instr, dst, src):
38993914
e = []
39003915
if dst.size == 64:
@@ -3976,9 +3991,11 @@ def _average(expr):
39763991
pminub = vec_vertical_instr('min', 8, lambda x: _min_max(x, signed=False))
39773992
pminuw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=False))
39783993
pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False))
3994+
pminsd = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=True))
39793995
pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False))
39803996
pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False))
39813997
pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False))
3998+
pmaxsd = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=True))
39823999
pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True))
39834000

39844001
# Floating-point arithmetic
@@ -4417,6 +4434,36 @@ def pshufhw(_, instr, dst, src, imm):
44174434
out.append(src[shift + 64: shift + 16 + 64])
44184435
return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], []
44194436

4437+
def ptest(_, instr, dst, src):
4438+
e = []
4439+
e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ', dst & src)))
4440+
e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('FLAG_EQ', src & ~dst)))
4441+
e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1)))
4442+
e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1)))
4443+
e.append(m2_expr.ExprAssign(pf, m2_expr.ExprInt(0, 1)))
4444+
e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1)))
4445+
return e, []
4446+
4447+
def _clmul64_to_128(a64, b64):
4448+
assert a64.size == 64
4449+
assert b64.size == 64
4450+
4451+
a128 = a64.zeroExtend(128)
4452+
res = m2_expr.ExprInt(0, 128)
4453+
4454+
for i in range(64):
4455+
bit = b64[i:i + 1]
4456+
term = m2_expr.ExprCond(bit, a128 << m2_expr.ExprInt(i, 128), m2_expr.ExprInt(0, 128))
4457+
res = m2_expr.ExprOp('^', res, term)
4458+
4459+
return expr_simp(res)
4460+
4461+
def pclmulqdq(_, instr, dst, src, imm8):
4462+
control = int(imm8)
4463+
a = dst[64:128] if (control & 0x01) else dst[:64]
4464+
b = src[64:128] if (control & 0x10) else src[:64]
4465+
res = _clmul64_to_128(a, b)
4466+
return [m2_expr.ExprAssign(dst, res)], []
44204467

44214468
def ps_rl_ll(ir, instr, dst, src, op, size):
44224469
mask = {16: 0xF,
@@ -4594,7 +4641,6 @@ def punpckldq(ir, instr, dst, src):
45944641
def punpcklqdq(ir, instr, dst, src):
45954642
return punpck(ir, instr, dst, src, 64, 0)
45964643

4597-
45984644
def pinsr(_, instr, dst, src, imm, size):
45994645
e = []
46004646

@@ -4682,6 +4728,52 @@ def unpcklpd(_, instr, dst, src):
46824728
e.append(m2_expr.ExprAssign(dst, src))
46834729
return e, []
46844730

4731+
def pmovsxwd(ir, instr, dst, src):
4732+
out = []
4733+
for i in range(4):
4734+
lane = src[16 * i:16 * (i + 1)]
4735+
out.append(lane.signExtend(32))
4736+
return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], []
4737+
4738+
def pmovsxwq(ir, instr, dst, src):
4739+
e = []
4740+
if dst.size != 128:
4741+
raise RuntimeError("Unsupported size %d" % dst.size)
4742+
4743+
out = []
4744+
for i in range(2):
4745+
w = src[16 * i:16 * (i + 1)]
4746+
out.append(w.signExtend(64))
4747+
4748+
e.append(m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out)))
4749+
return e, []
4750+
4751+
def pmovmskb(ir, instr, dst, src):
4752+
e = []
4753+
e.append(m2_expr.ExprAssign(dst, src.zeroExtend(dst.size)))
4754+
return e, []
4755+
4756+
def pmovsxbd(ir, instr, dst, src):
4757+
e = []
4758+
if dst.size != 128:
4759+
raise RuntimeError("Unsupported size %d" % dst.size)
4760+
out = []
4761+
for i in range(4):
4762+
b = src[8 * i: 8 * (i + 1)]
4763+
out.append(b.signExtend(32))
4764+
e.append(m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out)))
4765+
return e, []
4766+
4767+
def pmovsxdq(_, instr, dst, src):
4768+
e = []
4769+
if dst.size != 128:
4770+
raise RuntimeError("Unsupported size %d" % dst.size)
4771+
out = []
4772+
for i in range(2):
4773+
d = src[32 * i: 32 * (i + 1)]
4774+
out.append(d.signExtend(64))
4775+
e.append(m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out)))
4776+
return e, []
46854777

46864778
def movlpd(_, instr, dst, src):
46874779
e = []
@@ -4805,6 +4897,36 @@ def palignr(ir, instr, dst, src, imm):
48054897

48064898
return [m2_expr.ExprAssign(dst, result)], []
48074899

4900+
def psign(ir, instr, dst, src, lane_size):
4901+
if dst.size not in [64, 128] or src.size != dst.size:
4902+
raise RuntimeError("Unsupported size dst=%d src=%d" % (dst.size, src.size))
4903+
4904+
out = []
4905+
for i in range(0, dst.size, lane_size):
4906+
data = dst[i:i + lane_size]
4907+
control = src[i:i + lane_size]
4908+
neg_data = (data ^ data.mask) + m2_expr.ExprInt(1, data.size)
4909+
out.append(
4910+
m2_expr.ExprCond(
4911+
control.msb(),
4912+
neg_data,
4913+
m2_expr.ExprCond(
4914+
control,
4915+
data,
4916+
m2_expr.ExprInt(0, data.size)
4917+
)
4918+
)
4919+
)
4920+
return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], []
4921+
4922+
def psignb(ir, instr, dst, src):
4923+
return psign(ir, instr, dst, src, 8)
4924+
4925+
def psignw(ir, instr, dst, src):
4926+
return psign(ir, instr, dst, src, 16)
4927+
4928+
def psignd(ir, instr, dst, src):
4929+
return psign(ir, instr, dst, src, 32)
48084930

48094931
def _signed_to_signed_saturation(expr, dst_size):
48104932
"""Saturate the expr @expr for @dst_size bit
@@ -4873,7 +4995,21 @@ def _signed_to_unsigned_saturation(expr, dst_size):
48734995
)
48744996
)
48754997

4998+
def phminposuw(ir, instr, dst, src):
4999+
if dst.size != 128 or src.size != 128:
5000+
raise RuntimeError("Unsupported size dst=%d src=%d" % (dst.size, src.size))
5001+
5002+
min_val = src[:16]
5003+
min_idx = m2_expr.ExprInt(0, 16)
48765004

5005+
for i in range(1, 8):
5006+
word = src[i * 16:(i + 1) * 16]
5007+
cond = m2_expr.expr_is_unsigned_lower(word, min_val)
5008+
min_val = m2_expr.ExprCond(cond, word, min_val)
5009+
min_idx = m2_expr.ExprCond(cond, m2_expr.ExprInt(i, 16), min_idx)
5010+
5011+
result = m2_expr.ExprCompose(min_val, min_idx, m2_expr.ExprInt(0, 96))
5012+
return [m2_expr.ExprAssign(dst, result)], []
48775013

48785014
def packsswb(ir, instr, dst, src):
48795015
out = []
@@ -4898,6 +5034,12 @@ def packuswb(ir, instr, dst, src):
48985034
out.append(_signed_to_unsigned_saturation(source[start:start + 16], 8))
48995035
return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], []
49005036

5037+
def packusdw(ir, instr, dst, src):
5038+
out = []
5039+
for source in [dst, src]:
5040+
for start in range(0, dst.size, 32):
5041+
out.append(_signed_to_unsigned_saturation(source[start:start + 32], 16))
5042+
return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], []
49015043

49025044
def _saturation_sub_unsigned(expr):
49035045
assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-")
@@ -5533,6 +5675,7 @@ def fxrstor(_ir, _instr, _dst):
55335675
"pmulhw": pmulhw,
55345676
"pmulhd": pmulhd,
55355677
"pmulhq": pmulhq,
5678+
"pmuldq": pmuldq,
55365679
"pmuludq": pmuludq,
55375680

55385681
# Mix
@@ -5633,6 +5776,9 @@ def fxrstor(_ir, _instr, _dst):
56335776
"pshufd": pshufd,
56345777
"pshuflw": pshuflw,
56355778
"pshufhw": pshufhw,
5779+
"ptest": ptest,
5780+
"ptest": ptest,
5781+
"pclmulqdq": pclmulqdq,
56365782

56375783
"psrlw": psrlw,
56385784
"psrld": psrld,
@@ -5647,14 +5793,20 @@ def fxrstor(_ir, _instr, _dst):
56475793

56485794
"palignr": palignr,
56495795

5796+
"psignb": psignb,
5797+
"psignw": psignw,
5798+
"psignd": psignd,
5799+
56505800
"pmaxub": pmaxub,
56515801
"pmaxuw": pmaxuw,
56525802
"pmaxud": pmaxud,
5803+
"pmaxsd": pmaxsd,
56535804
"pmaxsw": pmaxsw,
56545805

56555806
"pminub": pminub,
56565807
"pminuw": pminuw,
56575808
"pminud": pminud,
5809+
"pminsd": pminsd,
56585810

56595811
"pcmpeqb": pcmpeqb,
56605812
"pcmpeqw": pcmpeqw,
@@ -5707,10 +5859,17 @@ def fxrstor(_ir, _instr, _dst):
57075859
"sqrtss": sqrtss,
57085860

57095861
"pmovmskb": pmovmskb,
5862+
"pmovsxwd": pmovsxwd,
5863+
"pmovsxwq": pmovsxwq,
5864+
"pmovsxbd": pmovsxbd,
5865+
"pmovsxdq": pmovsxdq,
5866+
5867+
"phminposuw": phminposuw,
57105868

57115869
"packsswb": packsswb,
57125870
"packssdw": packssdw,
57135871
"packuswb": packuswb,
5872+
"packusdw": packusdw,
57145873

57155874
"psubusb": psubusb,
57165875
"psubusw": psubusw,

0 commit comments

Comments
 (0)